From 9acf16a37b6101b5f75adb26d9bc57e055cfc2ec Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Tue, 17 Oct 2023 13:25:07 +0000 Subject: [PATCH 01/65] Add sceleton object NVMeofGwMon/Map, added it to the array of monitors --- src/mon/CMakeLists.txt | 1 + src/mon/Monitor.cc | 2 + src/mon/Monitor.h | 5 +++ src/mon/NVMeofGwMap.h | 65 ++++++++++++++++++++++++++++++ src/mon/NVMeofGwMon.cc | 82 ++++++++++++++++++++++++++++++++++++++ src/mon/NVMeofGwMon.h | 90 ++++++++++++++++++++++++++++++++++++++++++ src/mon/mon_types.h | 1 + 7 files changed, 246 insertions(+) create mode 100755 src/mon/NVMeofGwMap.h create mode 100644 src/mon/NVMeofGwMon.cc create mode 100755 src/mon/NVMeofGwMon.h diff --git a/src/mon/CMakeLists.txt b/src/mon/CMakeLists.txt index 784b4c3ee0b3..2754b4b50d9a 100644 --- a/src/mon/CMakeLists.txt +++ b/src/mon/CMakeLists.txt @@ -21,6 +21,7 @@ set(lib_mon_srcs ConnectionTracker.cc HealthMonitor.cc KVMonitor.cc + NVMeofGwMon.cc ../mds/MDSAuthCaps.cc ../mgr/mgr_commands.cc ../osd/OSDCap.cc diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 349ca30a8981..0b0c4122f67f 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -84,6 +84,7 @@ #include "MgrStatMonitor.h" #include "ConfigMonitor.h" #include "KVMonitor.h" +#include "NVMeofGwMon.h" #include "mon/HealthMonitor.h" #include "common/config.h" #include "common/cmdparse.h" @@ -247,6 +248,7 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorDBStore *s, paxos_service[PAXOS_HEALTH].reset(new HealthMonitor(*this, *paxos, "health")); paxos_service[PAXOS_CONFIG].reset(new ConfigMonitor(*this, *paxos, "config")); paxos_service[PAXOS_KV].reset(new KVMonitor(*this, *paxos, "kv")); + paxos_service[PAXOS_NVMEGW].reset(new NVMeofGwMon(*this, *paxos, "nvmeofgw")); bool r = mon_caps.parse("allow *", NULL); ceph_assert(r); diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 7f9a16a9a36c..2c4dd6d37b0f 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -712,6 +712,11 @@ class Monitor : public Dispatcher, return (class KVMonitor*) paxos_service[PAXOS_KV].get(); } + class NVMeofGwMon *nvmegwmon() { + return (class NVMeofGwMon*) paxos_service[PAXOS_NVMEGW].get(); + } + + friend class Paxos; friend class OSDMonitor; friend class MDSMonitor; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h new file mode 100755 index 000000000000..bb96868754d7 --- /dev/null +++ b/src/mon/NVMeofGwMap.h @@ -0,0 +1,65 @@ +/* + * NVMeofGwMap.h + * + * Created on: Oct 17, 2023 + * Author: 227870756 + */ + +#ifndef MON_NVMEOFGWMAP_H_ +#define MON_NVMEOFGWMAP_H_ +#include "string" +#include +#include "map" +#include +#include + +#include "msg/Message.h" + +typedef enum { + GW_IDLE_STATE = 0, //invalid state + GW_STANDBY_STATE, + GW_ACTIVE_STATE, + GW_BLOCKED_AGROUP_OWNER, + GW_WAIT_FAILBACK_PREPARED +}GW_STATES_PER_AGROUP_E; + + enum class GW_AVAILABILITY_E { + GW_CREATED = 0, + GW_AVAILABLE, + GW_UNAVAILABLE +}; + +#define MAX_SUPPORTED_ANA_GROUPS 5 +#define REDUNDANT_GW_ANA_GROUP_ID 0xFF +typedef struct GW_STATE_T { + //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible + GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group + uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable + uint16_t gw_id; + uint64_t epoch; // epoch per GW +}GW_STATE_T; + +typedef struct GW_METADATA_T { + uint32_t anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state +}GW_METADATA_T; + +using GWMAP = std::map >; +using GWMETADATA = std::map >; + +class NVMeofGwMap +{ + public: + GWMAP Gmap; + GWMETADATA Gmetadata; + std::map subsyst_epoch; + bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode + uint32_t listen_mode_start_tick{0}; + + NVMeofGwMap() = default; + +}; + + + +#endif /* SRC_MON_NVMEOFGWMAP_H_ */ diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc new file mode 100644 index 000000000000..716e0520ebfc --- /dev/null +++ b/src/mon/NVMeofGwMon.cc @@ -0,0 +1,82 @@ +/* + * NVMeGWMonitor.cc + * + * Created on: Oct 17, 2023 + * Author: + */ + + + +#include "common/TextTable.h" +#include "include/stringify.h" +#include "NVMeofGwMon.h" +using std::map; +using std::make_pair; +using std::ostream; +using std::ostringstream; + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +using namespace TOPNSPC::common; + +static ostream& _prefix(std::ostream *_dout, const Monitor &mon, + const NVMeofGwMon *hmon) { + return *_dout << "mon." << mon.name << "@" << mon.rank; +} + + + + void NVMeofGwMon::init(){ + dout(4) << __func__ << "called " << dendl; + } + + void NVMeofGwMon::on_shutdown() { + + } + + void NVMeofGwMon::tick(){ + + if (!is_active() || !mon.is_leader()) + return; + + const auto now = ceph::coarse_mono_clock::now(); + dout(4) << __func__ << "NVMeofGwMon leader got a tick " << dendl; + last_tick = now; + } + + void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ + dout(4) << __func__ << dendl; + } + + + bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ + dout(4) << __func__ << dendl; + return true; + } + + bool NVMeofGwMon::prepare_update(MonOpRequestRef op){ + dout(4) << __func__ << dendl; + return true; + } + + bool NVMeofGwMon::preprocess_command(MonOpRequestRef op){ + dout(4) << __func__ << dendl; + return true; + } + + bool NVMeofGwMon::prepare_command(MonOpRequestRef op){ + dout(4) << __func__ << dendl; + return true; + } + + + bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ + dout(4) << __func__ << dendl; + return true; + } + + bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ + dout(4) << __func__ << dendl; + return true; + } diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h new file mode 100755 index 000000000000..1f3c2360c673 --- /dev/null +++ b/src/mon/NVMeofGwMon.h @@ -0,0 +1,90 @@ +/* + * NVMeGWMonitor.h + * + * Created on: Oct 17, 2023 + * Author: 227870756 + */ + +#ifndef MON_NVMEGWMONITOR_H_ +#define MON_NVMEGWMONITOR_H_ +#include +#include + +#include "include/Context.h" +//#include "MgrMap.h" +#include "PaxosService.h" +#include "MonCommand.h" +#include "NVMeofGwMap.h" + +class NVMeofGwMon: public PaxosService +{ + NVMeofGwMap map; //NVMeGWMap + //MgrMap pending_map; + //utime_t first_seen_inactive; + + std::map last_beacon; + + + // when the mon was not updating us for some period (e.g. during slow + // election) to reset last_beacon timeouts + ceph::coarse_mono_clock::time_point last_tick; + + std::vector command_descs; + std::vector pending_command_descs; + +public: + NVMeofGwMon(Monitor &mn, Paxos &p, const std::string& service_name) + : PaxosService(mn, p, service_name) + {} + ~NVMeofGwMon() override {} + + + //const MgrMap &get_map() const { return map; } + + // bool in_use() const { return map.epoch > 0; } + + //void prime_mgr_client(); + + + // void get_store_prefixes(std::set& s) const override; + + // 3 pure virtual methods of the paxosService + void create_initial()override{}; + void create_pending()override{}; + void encode_pending(MonitorDBStore::TransactionRef t)override{}; + + + void init() override; + void on_shutdown() override; + void update_from_paxos(bool *need_bootstrap) override; + + + bool preprocess_query(MonOpRequestRef op) override; + bool prepare_update(MonOpRequestRef op) override; + + bool preprocess_command(MonOpRequestRef op); + bool prepare_command(MonOpRequestRef op); + + void encode_full(MonitorDBStore::TransactionRef t) override { } + + bool preprocess_beacon(MonOpRequestRef op); + bool prepare_beacon(MonOpRequestRef op); + + //void check_sub(Subscription *sub); + //void check_subs() + + void tick() override; + + void print_summary(ceph::Formatter *f, std::ostream *ss) const; + + //const std::vector &get_command_descs() const; + + + //void get_versions(std::map> &versions); + + +}; + + + +#endif /* SRC_MON_NVMEGWMONITOR_H_ */ diff --git a/src/mon/mon_types.h b/src/mon/mon_types.h index cce9976f3c35..7f5ed911dacb 100644 --- a/src/mon/mon_types.h +++ b/src/mon/mon_types.h @@ -36,6 +36,7 @@ enum { PAXOS_HEALTH, PAXOS_CONFIG, PAXOS_KV, + PAXOS_NVMEGW, PAXOS_NUM }; From 696debbec7f1e2fb05a687deb056d0e008a8fabe Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 18 Oct 2023 12:12:06 +0000 Subject: [PATCH 02/65] Commit GWMap --- src/mon/CMakeLists.txt | 1 + src/mon/NVMeofGwMap.cc | 37 +++++++++++++ src/mon/NVMeofGwMap.h | 123 ++++++++++++++++++++++++++++++++++++++++- src/mon/NVMeofGwMon.cc | 29 ++++++++-- src/mon/NVMeofGwMon.h | 7 +-- 5 files changed, 185 insertions(+), 12 deletions(-) create mode 100755 src/mon/NVMeofGwMap.cc diff --git a/src/mon/CMakeLists.txt b/src/mon/CMakeLists.txt index 2754b4b50d9a..35e27d35e85b 100644 --- a/src/mon/CMakeLists.txt +++ b/src/mon/CMakeLists.txt @@ -22,6 +22,7 @@ set(lib_mon_srcs HealthMonitor.cc KVMonitor.cc NVMeofGwMon.cc + NVMeofGwMap.cc ../mds/MDSAuthCaps.cc ../mgr/mgr_commands.cc ../osd/OSDCap.cc diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc new file mode 100755 index 000000000000..a7cbc003782d --- /dev/null +++ b/src/mon/NVMeofGwMap.cc @@ -0,0 +1,37 @@ + +#include +#include "include/stringify.h" +#include "NVMeofGwMon.h" + +using std::map; +using std::make_pair; +using std::ostream; +using std::ostringstream; + +/* +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +using namespace TOPNSPC::common; + +static ostream& _prefix(std::ostream *_dout, const Monitor &mon, + const NVMeofGwMap *hmon) { + return *_dout << "mon." << mon.name << "@" << mon.rank; +} +*/ + + +int NVMeofGwMap::_dump_gws( GWMAP & Gmap)const { + + for (auto& itr : Gmap) { + for (auto& ptr : itr.second) { + std::cout << "NQN " << itr.first + << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id << " available " << (int)ptr.second.availability << " States: "; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + std::cout << ptr.second.sm_state[i] << " "; + } + std::cout << endl; + } + } + return 0; + } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index bb96868754d7..8ca4c7a12b10 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -12,8 +12,33 @@ #include "map" #include #include - +#include +#include "include/encoding.h" +#include "include/utime.h" +#include "common/Formatter.h" +#include "common/ceph_releases.h" +#include "common/version.h" +#include "common/options.h" +#include "common/Clock.h" +#include "PaxosService.h" #include "msg/Message.h" +/*#include "NVMeofGwMon.h" + +using std::ostream; + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +using namespace TOPNSPC::common; + +class NVMeofGwMap; + +inline ostream& _prefix(std::ostream *_dout, const Monitor &mon, + const NVMeofGwMap *map) { + return *_dout << "mon." << mon.name << "@" << mon.rank; +} +*/ + typedef enum { GW_IDLE_STATE = 0, //invalid state @@ -47,19 +72,113 @@ typedef struct GW_METADATA_T { using GWMAP = std::map >; using GWMETADATA = std::map >; + + +inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { + for(int i = 0; i subsyst_epoch; bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode uint32_t listen_mode_start_tick{0}; + + //std::map module_options; + void encode(ceph::buffer::list &bl) const { + ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); + encode ((int)Gmap.size(),bl); // number nqn + for (auto& itr : Gmap) { + encode((const std::string &)itr.first, bl);// nqn + encode( itr.second, bl);// encode the full map of this nqn : map + } + ENCODE_FINISH(bl); + } + + void decode(ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + // decode(name, bl);// decode(can_run, bl);// decode(error_string, bl);// decode(module_options, bl); + int num_subsystems; + std::string nqn; + decode(num_subsystems, bl); + std::map gw_map; + Gmap.clear(); + for(int i = 0; i < num_subsystems; i++){ + decode(nqn, bl); + Gmap.insert(make_pair(nqn, std::map())); + //decode the map + gw_map.clear(); + decode(gw_map, bl); + //insert the qw_map to Gmap + for(auto &itr: gw_map ){ + Gmap[nqn].insert({itr.first, itr.second}); + } + } + DECODE_FINISH(bl); + } + NVMeofGwMap() = default; -}; + GW_STATE_T * find_gw_map(uint16_t gw_id, const std::string& nqn ) + { + auto it = Gmap.find(nqn); + if (it != Gmap.end() /* && it->first == nqn*/) { + auto it2 = it->second.find(gw_id); + if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; + return &it2->second; + } + } + return NULL; + } + + int cfg_add_gw (uint16_t gw_id, const std::string & nqn, uint16_t ana_grpid) { + GW_STATE_T state{ {GW_IDLE_STATE,} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, gw_id, 0 }; + if (find_gw_map(gw_id, nqn)) { + //dout(4) << __func__ << " ERROR :GW already exists in map " << gw_id << dendl; + return 1; + } + if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) + { + //dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; + return 1; + } + //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert + + if(Gmap[nqn].size() ==0 ) + Gmap.insert(make_pair(nqn, std::map())); + Gmap[nqn].insert({gw_id, state}); + return 0; + } + + int _dump_gws( GWMAP & Gmap)const ; + +}; #endif /* SRC_MON_NVMEOFGWMAP_H_ */ diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 716e0520ebfc..04db66022136 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -6,15 +6,16 @@ */ - -#include "common/TextTable.h" -#include "include/stringify.h" +#include + #include "include/stringify.h" #include "NVMeofGwMon.h" + using std::map; using std::make_pair; using std::ostream; using std::ostringstream; + #define dout_subsys ceph_subsys_mon #undef dout_prefix #define dout_prefix _prefix(_dout, mon, this) @@ -34,14 +35,24 @@ static ostream& _prefix(std::ostream *_dout, const Monitor &mon, void NVMeofGwMon::on_shutdown() { } - +static int cnt ; void NVMeofGwMon::tick(){ - if (!is_active() || !mon.is_leader()) + if (!is_active() || !mon.is_leader()){ + dout(4) << __func__ << " NVMeofGwMon leader : " << mon.is_leader() << "active : " << is_active() << dendl; + if(mon.is_leader() && ++cnt == 4){ + Gmap.cfg_add_gw(1, "nqn2008.node1", 1); + Gmap.cfg_add_gw(2, "nqn2008.node1", 2); + Gmap.cfg_add_gw(3, "nqn2008.node1", 3); + Gmap.cfg_add_gw(1, "nqn2008.node2", 2); + // map._dump_gws(map.Gmap); + } + return; + } const auto now = ceph::coarse_mono_clock::now(); - dout(4) << __func__ << "NVMeofGwMon leader got a tick " << dendl; + dout(4) << __func__ << "NVMeofGwMon leader got a real tick " << dendl; last_tick = now; } @@ -50,6 +61,12 @@ static ostream& _prefix(std::ostream *_dout, const Monitor &mon, } + void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t){ + dout(4) << __func__ << dendl; + } + + + bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ dout(4) << __func__ << dendl; return true; diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 1f3c2360c673..0f5d02d47f13 100755 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -18,7 +18,7 @@ class NVMeofGwMon: public PaxosService { - NVMeofGwMap map; //NVMeGWMap + NVMeofGwMap Gmap; //NVMeGWMap //MgrMap pending_map; //utime_t first_seen_inactive; @@ -34,8 +34,7 @@ class NVMeofGwMon: public PaxosService public: NVMeofGwMon(Monitor &mn, Paxos &p, const std::string& service_name) - : PaxosService(mn, p, service_name) - {} + : PaxosService(mn, p, service_name) { } ~NVMeofGwMon() override {} @@ -51,7 +50,7 @@ class NVMeofGwMon: public PaxosService // 3 pure virtual methods of the paxosService void create_initial()override{}; void create_pending()override{}; - void encode_pending(MonitorDBStore::TransactionRef t)override{}; + void encode_pending(MonitorDBStore::TransactionRef t)override ; void init() override; From c8abde1c93932da0c40a514370a58fc1bf5d3294 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 22 Oct 2023 11:56:25 +0000 Subject: [PATCH 03/65] Add GWMap --- src/mon/NVMeofGwMap.cc | 264 ++++++++++++++++++++++++++++++++++++++--- src/mon/NVMeofGwMap.h | 244 ++++++++++++++++++++----------------- src/mon/NVMeofGwMon.cc | 247 +++++++++++++++++++++++++++++--------- src/mon/NVMeofGwMon.h | 79 ++++++------ 4 files changed, 615 insertions(+), 219 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index a7cbc003782d..57e84bf6e686 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -8,30 +8,256 @@ using std::make_pair; using std::ostream; using std::ostringstream; -/* + #define dout_subsys ceph_subsys_mon #undef dout_prefix -#define dout_prefix _prefix(_dout, mon, this) +#define dout_prefix _prefix(_dout, this, this) using namespace TOPNSPC::common; -static ostream& _prefix(std::ostream *_dout, const Monitor &mon, - const NVMeofGwMap *hmon) { - return *_dout << "mon." << mon.name << "@" << mon.rank; +static ostream& _prefix(std::ostream *_dout, const NVMeofGwMap *h,//const Monitor &mon, + const NVMeofGwMap *map) { + return *_dout << "gw-mon." << map->mon->name << "@" << map->mon->rank; +} + +int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid) { + GW_STATE_T state{ {GW_IDLE_STATE,} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, 0 }; + + if (find_gw_map(gw_id, nqn)) { + dout(4) << __func__ << " ERROR :GW already exists in map " << gw_id << dendl; + return 1; + } + if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) + { + dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; + return 1; + } + + //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert + + if(Gmap[nqn].size() ==0 ) + Gmap.insert(make_pair(nqn, SUBSYST_GWMAP())); + Gmap[nqn].insert({gw_id, state}); + + create_metadata(gw_id, nqn); + //epoch++; + return 0; +} + + +GW_METADATA_T* NVMeofGwMap::find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn) +{ + auto it = Gmetadata.find(nqn); + if (it != Gmetadata.end() ) { + auto it2 = it->second.find(gw_id); + if (it2 != it->second.end() ) { + return &it2->second; + } + else{ + dout(4) << __func__ << " not found by gw id " << gw_id << dendl; + } + } + else{ + dout(4) << __func__ << " not found by nqn " << nqn << dendl; + } + return NULL; +} + + +int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { + + dout(4) << __func__ << " called " << mon << dendl; + std::ostringstream ss; + ss << std::endl; + for (auto& itr : Gmap) { + for (auto& ptr : itr.second) { + + ss << " NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id << " available " << (int)ptr.second.availability << " States: "; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + ss << (int)ptr.second.sm_state[i] << " " ; + } + ss << std::endl; + } + } + dout(10) << ss.str() <availability << dendl; + propose_pending = false; + if (gw_state->availability == GW_AVAILABILITY_E::GW_CREATED) { + // first time appears - allow IO traffic for this GW + gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) gw_state->sm_state[i] = GW_STANDBY_STATE; + if (gw_state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { // not a redundand GW + //gw_state->ana_state[gw_state->optimized_ana_group_id] = true; + gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; + } + propose_pending = true; + } + + + else if (gw_state->availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) { + gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; + if (gw_state->optimized_ana_group_id == REDUNDANT_GW_ANA_GROUP_ID) { + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) gw_state->sm_state[i] = GW_STANDBY_STATE; + propose_pending = true; + //TODO try to find the 1st GW overloaded by ANA groups and start failback for ANA group that it is not an owner of + } + else {// prepare to Failback to this GW + // find the GW that took over on the group gw_state->optimized_ana_group_id + bool found = false; + for (auto& itr : *subsyst_it) { + //cout << "Found GW " << itr.second.gw_id << endl; + if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { + dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_id << dendl; + itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; + add_timestamp_to_metadata(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation to metadata of gw + gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; + propose_pending = true; + found = true; + break; + } + } + if (!found) { + dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << "that took over the GW" << gw_id << "when it was fallen" << dendl; + gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; + propose_pending = true; + } + } + } + + + else if (gw_state->availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + const auto now = ceph::coarse_mono_clock::now(); // const auto mgr_beacon_grace = g_conf().get_val("mon_mgr_beacon_grace");// todo change to something related to NVMeGW KATO + std::chrono::seconds sc(FAILBACK_PERSISTENCY_INT_SEC); + + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + if (gw_state->sm_state[i] == GW_WAIT_FAILBACK_PREPARED) { + GW_METADATA_T* metadata = find_gw_metadata(gw_id, nqn); + + ceph_assert(metadata !=0); + + // inspect (now - metadata->anagrp_sm_tstamps[i]) + if(now - metadata->anagrp_sm_tstamps[i] > sc){ //mgr_beacon_grace){ + // interval = 2*KATO pased T so find the state of the candidate to failback - whether it is still available + for (auto& itr : *subsyst_it) { + if (itr.second.sm_state[i] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + gw_state->sm_state[i] = GW_STANDBY_STATE; + itr.second.sm_state[i] = GW_ACTIVE_STATE; + dout(4) << "Failback from GW " << gw_id << " to " << itr.first << dendl; + propose_pending = true; + break; + } + } + } + // maybe there are other ANA groups that this GW is in state GW_WAIT_FAILBACK_PREPARED so continue pass over all ANA groups + } + } + } + else{ + dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + rc = 1; + } + return rc; +} + + + +int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid) +{ + GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + gw_state->sm_state[ANA_groupid] = GW_ACTIVE_STATE; + //publish_map_to_gws(nqn); + dout(4) << "Set failower GW " << gw_id << " for ANA group " << (int)ANA_groupid << dendl; + return 0; +} + + +int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending) +{ +#define ILLEGAL_GW_ID " " +#define MIN_NUM_ANA_GROUPS 0xFFF + int rc = 0; + // bool found = 0; + int i; + int min_num_ana_groups_in_gw = 0; + int current_ana_groups_in_gw = 0; + GW_ID_T min_loaded_gw_id = ILLEGAL_GW_ID; + GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + if (gw_state) { + dout(4) << "GW down " << gw_id << dendl; + auto subsyst_it = find_subsystem_map(nqn); + gw_state->availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; + for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { // this GW may handle several ANA groups and for each of them need to found the candidate GW + if (gw_state->sm_state[i] == GW_ACTIVE_STATE) { + // Find a GW that takes over the ANA group(s) + + min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; + min_loaded_gw_id = ILLEGAL_GW_ID; + for (auto& itr : *subsyst_it) { // for all the gateways of the subsystem + if (itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + + current_ana_groups_in_gw = 0; + for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { + if (itr.second.sm_state[j] == GW_BLOCKED_AGROUP_OWNER) { + current_ana_groups_in_gw = 0xFFFF; + break; // dont take into account these GWs in the transitive state + } + else if (itr.second.sm_state[j] == GW_ACTIVE_STATE) + current_ana_groups_in_gw++; // how many ANA groups are handled by this GW + } + + if (min_num_ana_groups_in_gw > current_ana_groups_in_gw) { + min_num_ana_groups_in_gw = current_ana_groups_in_gw; + min_loaded_gw_id = itr.first; + dout(4) << "choose: gw-id min_ana_groups " << itr.first << current_ana_groups_in_gw << " min " << min_num_ana_groups_in_gw << dendl; + } + } + } + if (min_loaded_gw_id != ILLEGAL_GW_ID) { + propose_pending = true; + set_failover_gw_for_ANA_group(min_loaded_gw_id, nqn, i); + } + else + propose_pending = false; + gw_state->sm_state[i] = GW_STANDBY_STATE; + } + } + } + else { + dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + rc = 1; + } + return rc; +} + - for (auto& itr : Gmap) { - for (auto& ptr : itr.second) { - std::cout << "NQN " << itr.first - << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id << " available " << (int)ptr.second.availability << " States: "; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - std::cout << ptr.second.sm_state[i] << " "; - } - std::cout << endl; - } - } - return 0; - } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 8ca4c7a12b10..7d345f22209f 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -37,148 +37,180 @@ inline ostream& _prefix(std::ostream *_dout, const Monitor &mon, const NVMeofGwMap *map) { return *_dout << "mon." << mon.name << "@" << mon.rank; } -*/ + */ +using GW_ID_T = std::string; typedef enum { - GW_IDLE_STATE = 0, //invalid state - GW_STANDBY_STATE, - GW_ACTIVE_STATE, - GW_BLOCKED_AGROUP_OWNER, - GW_WAIT_FAILBACK_PREPARED + GW_IDLE_STATE = 0, //invalid state + GW_STANDBY_STATE, + GW_ACTIVE_STATE, + GW_BLOCKED_AGROUP_OWNER, + GW_WAIT_FAILBACK_PREPARED }GW_STATES_PER_AGROUP_E; - enum class GW_AVAILABILITY_E { - GW_CREATED = 0, - GW_AVAILABLE, - GW_UNAVAILABLE +enum class GW_AVAILABILITY_E { + GW_CREATED = 0, + GW_AVAILABLE, + GW_UNAVAILABLE }; #define MAX_SUPPORTED_ANA_GROUPS 5 #define REDUNDANT_GW_ANA_GROUP_ID 0xFF typedef struct GW_STATE_T { - //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible - GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group - uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF - GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable - uint16_t gw_id; - uint64_t epoch; // epoch per GW + //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible + GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group + uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable + uint64_t version; // version per all GWs of the same subsystem. subsystem version }GW_STATE_T; typedef struct GW_METADATA_T { - uint32_t anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state + ceph::coarse_mono_clock::time_point anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state }GW_METADATA_T; -using GWMAP = std::map >; -using GWMETADATA = std::map >; +using GWMAP = std::map >; +using GWMETADATA = std::map >; +using SUBSYST_GWMAP = std::map; inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { - for(int i = 0; i subsyst_epoch; - bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode - uint32_t listen_mode_start_tick{0}; - - - //std::map module_options; - void encode(ceph::buffer::list &bl) const { - ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); - encode ((int)Gmap.size(),bl); // number nqn - for (auto& itr : Gmap) { - encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : map - } - ENCODE_FINISH(bl); - } - - void decode(ceph::buffer::list::const_iterator &bl) { - DECODE_START(1, bl); - // decode(name, bl);// decode(can_run, bl);// decode(error_string, bl);// decode(module_options, bl); +public: + Monitor *mon= NULL;// just for logs in the mon module file + GWMAP Gmap; + GWMETADATA Gmetadata;//TODO !!! this map is used in the processing of Gmap - so it should be add to the encode/decode + epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm + //std::map subsyst_epoch;// dont think we need this since epoch per subsystem stored in each GW in GW_STATE_T + bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode + uint32_t listen_mode_start_tick{0}; + + + //std::map module_options; + void encode(ceph::buffer::list &bl) const { + ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); + encode((int) epoch, bl);// global map epoch + encode ((int)Gmap.size(),bl); // number nqn + for (auto& itr : Gmap) { + encode((const std::string &)itr.first, bl);// nqn + encode( itr.second, bl);// encode the full map of this nqn : map + } + ENCODE_FINISH(bl); + } + + void decode(ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + // decode(name, bl);// decode(can_run, bl);// decode(error_string, bl);// decode(module_options, bl); int num_subsystems; std::string nqn; + decode(epoch, bl); decode(num_subsystems, bl); - std::map gw_map; + SUBSYST_GWMAP gw_map; Gmap.clear(); + _dump_gwmap(Gmap); for(int i = 0; i < num_subsystems; i++){ - decode(nqn, bl); - Gmap.insert(make_pair(nqn, std::map())); - //decode the map - gw_map.clear(); - decode(gw_map, bl); - //insert the qw_map to Gmap - for(auto &itr: gw_map ){ - Gmap[nqn].insert({itr.first, itr.second}); - } + decode(nqn, bl); + Gmap.insert(make_pair(nqn, std::map())); + //decode the map + gw_map.clear(); + decode(gw_map, bl); + //insert the qw_map to Gmap + for(auto &itr: gw_map ){ + Gmap[nqn].insert({itr.first, itr.second}); + } } - DECODE_FINISH(bl); - } + DECODE_FINISH(bl); + } - NVMeofGwMap() = default; + //NVMeofGwMap( ) {} - GW_STATE_T * find_gw_map(uint16_t gw_id, const std::string& nqn ) + GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) { - auto it = Gmap.find(nqn); - if (it != Gmap.end() /* && it->first == nqn*/) { - auto it2 = it->second.find(gw_id); - if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; - return &it2->second; - } - } - return NULL; - } - - int cfg_add_gw (uint16_t gw_id, const std::string & nqn, uint16_t ana_grpid) { - GW_STATE_T state{ {GW_IDLE_STATE,} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, gw_id, 0 }; - - if (find_gw_map(gw_id, nqn)) { - //dout(4) << __func__ << " ERROR :GW already exists in map " << gw_id << dendl; - return 1; - } - if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) - { - //dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; - return 1; - } - - //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert - - if(Gmap[nqn].size() ==0 ) - Gmap.insert(make_pair(nqn, std::map())); - Gmap[nqn].insert({gw_id, state}); - return 0; - } - - int _dump_gws( GWMAP & Gmap)const ; + auto it = Gmap.find(nqn); + if (it != Gmap.end() /* && it->first == nqn*/) { + auto it2 = it->second.find(gw_id); + if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; + return &it2->second; + } + } + return NULL; + } + + int _dump_gwmap(GWMAP & Gmap)const; + int _dump_metadata_map( )const ; + int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); + int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); + int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); + + void debug_encode_decode(){ + ceph::buffer::list bl; + encode(bl); + auto p = bl.cbegin(); + decode(p); + } +private: + int set_failover_gw_for_ANA_group (const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid); + void publish_map_to_gws(const std::string& nqn){ + } + + SUBSYST_GWMAP * find_subsystem_map(const std::string& nqn) + { + auto it = Gmap.find(nqn); + if (it != Gmap.end() ){ + return &it->second; + } + return NULL; + } + + int create_metadata(const GW_ID_T& gw_id, const std::string & nqn) + { + GW_METADATA_T new_metadata = {ceph::coarse_mono_clock::now(),}; + if(Gmetadata[nqn].size() == 0) + Gmetadata.insert(make_pair(nqn, std::map())); + Gmetadata[nqn].insert({ gw_id, new_metadata }); + return 0; + } + + + int add_timestamp_to_metadata(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) + { + GW_METADATA_T* metadata; + const auto now = ceph::coarse_mono_clock::now(); + if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { + metadata->anagrp_sm_tstamps[anagrpid] = now; + } + else { + ceph_assert(false); + } + return 0; + } + GW_METADATA_T* find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn); }; #endif /* SRC_MON_NVMEOFGWMAP_H_ */ diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 04db66022136..f08ac0bf4127 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -7,7 +7,7 @@ #include - #include "include/stringify.h" +#include "include/stringify.h" #include "NVMeofGwMon.h" using std::map; @@ -18,82 +18,215 @@ using std::ostringstream; #define dout_subsys ceph_subsys_mon #undef dout_prefix -#define dout_prefix _prefix(_dout, mon, this) +#define dout_prefix _prefix(_dout, this, this) using namespace TOPNSPC::common; -static ostream& _prefix(std::ostream *_dout, const Monitor &mon, - const NVMeofGwMon *hmon) { - return *_dout << "mon." << mon.name << "@" << mon.rank; +static ostream& _prefix(std::ostream *_dout, const NVMeofGwMon *h,//const Monitor &mon, + const NVMeofGwMon *hmon) { + return *_dout << "gw-mon." << hmon->mon.name << "@" << hmon->mon.rank; } +#define MY_MON_PREFFIX " NVMeGW " +void NVMeofGwMon::init(){ + dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; +} - void NVMeofGwMon::init(){ - dout(4) << __func__ << "called " << dendl; - } +void NVMeofGwMon::on_restart(){ + dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; + last_beacon.clear(); + last_tick = ceph::coarse_mono_clock::now(); +} - void NVMeofGwMon::on_shutdown() { - } +void NVMeofGwMon::on_shutdown() { + +} + static int cnt ; - void NVMeofGwMon::tick(){ - - if (!is_active() || !mon.is_leader()){ - dout(4) << __func__ << " NVMeofGwMon leader : " << mon.is_leader() << "active : " << is_active() << dendl; - if(mon.is_leader() && ++cnt == 4){ - Gmap.cfg_add_gw(1, "nqn2008.node1", 1); - Gmap.cfg_add_gw(2, "nqn2008.node1", 2); - Gmap.cfg_add_gw(3, "nqn2008.node1", 3); - Gmap.cfg_add_gw(1, "nqn2008.node2", 2); - // map._dump_gws(map.Gmap); - } - - return; - } - - const auto now = ceph::coarse_mono_clock::now(); - dout(4) << __func__ << "NVMeofGwMon leader got a real tick " << dendl; - last_tick = now; - } +#define start_cnt 6 +void NVMeofGwMon::inject1(){ + bool propose = false; + if( ++cnt == 4 ){// simulation that new configuration was added + pending_map.cfg_add_gw("gw1", "nqn2008.node1", 1); + pending_map.cfg_add_gw("gw2", "nqn2008.node1", 2); + pending_map.cfg_add_gw("gw3", "nqn2008.node1", 3); + pending_map.cfg_add_gw("gw1", "nqn2008.node2", 2); + pending_map._dump_gwmap(pending_map.Gmap); + pending_map._dump_metadata_map(); + pending_map.debug_encode_decode(); + dout(4) << "Dump map after decode encode:" <type == "mgrmap") { + if (sub->next <= map.get_epoch()) { + dout(20) << "Sending map to subscriber " << sub->session->con + << " " << sub->session->con->get_peer_addr() << dendl; + sub->session->con->send_message2(make_message(map)); + if (sub->onetime) { + mon.session_map.remove_sub(sub); + } else { + sub->next = map.get_epoch() + 1; + } + } } + */ +} - bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ - dout(4) << __func__ << dendl; - return true; + +void NVMeofGwMon::check_subs() +{ + const std::string type = "nvmegwmap";//"mgrmap"; + dout(4) << MY_MON_PREFFIX << __func__ << " count " << mon.session_map.subs.count(type) << dendl; + + if (mon.session_map.subs.count(type) == 0) + return; + for (auto sub : *(mon.session_map.subs[type])) { + check_sub(sub); } +} + + + +bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + return true; +} + +bool NVMeofGwMon::prepare_update(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + return true; +} + +bool NVMeofGwMon::preprocess_command(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + return true; +} + +bool NVMeofGwMon::prepare_command(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + return true; +} + + +bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + return false; // allways return false to call leader's prepare beacon +} + +bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + //auto m = op->get_req(); + //last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); + return true; +} diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 0f5d02d47f13..8a631fa85735 100755 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -18,69 +18,74 @@ class NVMeofGwMon: public PaxosService { - NVMeofGwMap Gmap; //NVMeGWMap - //MgrMap pending_map; - //utime_t first_seen_inactive; + NVMeofGwMap map; //NVMeGWMap + NVMeofGwMap pending_map; + //utime_t first_seen_inactive; - std::map last_beacon; + //TODO the key of the beacon is a unique gw-id; for example string consisting from gw_num + subsystem_nqn + std::map< std::string, ceph::coarse_mono_clock::time_point> last_beacon; - // when the mon was not updating us for some period (e.g. during slow - // election) to reset last_beacon timeouts - ceph::coarse_mono_clock::time_point last_tick; + // when the mon was not updating us for some period (e.g. during slow + // election) to reset last_beacon timeouts + ceph::coarse_mono_clock::time_point last_tick; - std::vector command_descs; - std::vector pending_command_descs; + std::vector command_descs; + std::vector pending_command_descs; public: - NVMeofGwMon(Monitor &mn, Paxos &p, const std::string& service_name) - : PaxosService(mn, p, service_name) { } - ~NVMeofGwMon() override {} + NVMeofGwMon(Monitor &mn, Paxos &p, const std::string& service_name) + : PaxosService(mn, p, service_name) {map.mon = &mn; } + ~NVMeofGwMon() override {} - //const MgrMap &get_map() const { return map; } + //const MgrMap &get_map() const { return map; } - // bool in_use() const { return map.epoch > 0; } + // bool in_use() const { return map.epoch > 0; } - //void prime_mgr_client(); + //void prime_mgr_client(); - // void get_store_prefixes(std::set& s) const override; + // void get_store_prefixes(std::set& s) const override; - // 3 pure virtual methods of the paxosService - void create_initial()override{}; - void create_pending()override{}; - void encode_pending(MonitorDBStore::TransactionRef t)override ; + // 3 pure virtual methods of the paxosService + void create_initial()override{}; + void create_pending()override ; + void encode_pending(MonitorDBStore::TransactionRef t)override ; - void init() override; - void on_shutdown() override; - void update_from_paxos(bool *need_bootstrap) override; + void init() override; + void on_shutdown() override; + void on_restart() override; + void update_from_paxos(bool *need_bootstrap) override; - bool preprocess_query(MonOpRequestRef op) override; - bool prepare_update(MonOpRequestRef op) override; + bool preprocess_query(MonOpRequestRef op) override; + bool prepare_update(MonOpRequestRef op) override; - bool preprocess_command(MonOpRequestRef op); - bool prepare_command(MonOpRequestRef op); + bool preprocess_command(MonOpRequestRef op); + bool prepare_command(MonOpRequestRef op); - void encode_full(MonitorDBStore::TransactionRef t) override { } + void encode_full(MonitorDBStore::TransactionRef t) override { } - bool preprocess_beacon(MonOpRequestRef op); - bool prepare_beacon(MonOpRequestRef op); + bool preprocess_beacon(MonOpRequestRef op); + bool prepare_beacon(MonOpRequestRef op); - //void check_sub(Subscription *sub); - //void check_subs() + //void check_sub(Subscription *sub); + //void check_subs() - void tick() override; + void tick() override; - void print_summary(ceph::Formatter *f, std::ostream *ss) const; + void print_summary(ceph::Formatter *f, std::ostream *ss) const; - //const std::vector &get_command_descs() const; + //const std::vector &get_command_descs() const; - //void get_versions(std::map> &versions); - + //void get_versions(std::map> &versions); +private: + void check_subs(); + void check_sub(Subscription *sub); + void inject1(); }; From 1b4a9bb632c901156bfe8e0eca9c5ad24d19e764 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Fri, 20 Oct 2023 00:29:38 +0300 Subject: [PATCH 04/65] initial skeletone NvmeOf GW Map message Signed-off-by: Alexander Indenbaum --- src/CMakeLists.txt | 9 ++ src/ceph_nvmeof.cc | 75 +++++++++++ src/messages/MNVMeofGwBeacon.h | 142 +++++++++++++++++++++ src/messages/MNVMeofGwMap.h | 62 +++++++++ src/msg/Message.h | 10 +- src/nvmeof/NVMeofGw.cc | 223 +++++++++++++++++++++++++++++++++ src/nvmeof/NVMeofGw.h | 71 +++++++++++ 7 files changed, 590 insertions(+), 2 deletions(-) create mode 100644 src/ceph_nvmeof.cc create mode 100644 src/messages/MNVMeofGwBeacon.h create mode 100644 src/messages/MNVMeofGwMap.h create mode 100644 src/nvmeof/NVMeofGw.cc create mode 100644 src/nvmeof/NVMeofGw.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8aa271a2b5b2..04dc028ae583 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -865,6 +865,15 @@ if(WITH_FUSE) install(PROGRAMS mount.fuse.ceph DESTINATION ${CMAKE_INSTALL_SBINDIR}) endif(WITH_FUSE) +set(ceph_nvmeof_srcs + ceph_nvmeof.cc + nvmeof/NVMeofGw.cc) +add_executable(ceph-nvmeof ${ceph_nvmeof_srcs}) +add_dependencies(ceph-nvmeof ceph-common) +target_link_libraries(ceph-nvmeof client mon os global-static ceph-common) +install(TARGETS ceph-nvmeof DESTINATION bin) + + if(WITH_DOKAN) add_subdirectory(dokan) endif(WITH_DOKAN) diff --git a/src/ceph_nvmeof.cc b/src/ceph_nvmeof.cc new file mode 100644 index 000000000000..b510b9fe115e --- /dev/null +++ b/src/ceph_nvmeof.cc @@ -0,0 +1,75 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 Red Hat Inc + * + * Author: Alexander Indenbaum + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include + +#include "include/types.h" +#include "include/compat.h" +#include "common/config.h" +#include "common/ceph_argparse.h" +#include "common/errno.h" +#include "common/pick_address.h" +#include "global/global_init.h" + +#include "nvmeof/NVMeofGw.h" + +static void usage() +{ + std::cout << "usage: ceph-nvmeof -i [flags]\n" + << std::endl; + generic_server_usage(); +} + +/** + * A short main() which just instantiates a Nvme and + * hands over control to that. + */ +int main(int argc, const char **argv) +{ + ceph_pthread_setname(pthread_self(), "ceph-nvmeof"); + + auto args = argv_to_vec(argc, argv); + if (args.empty()) { + std::cerr << argv[0] << ": -h or --help for usage" << std::endl; + exit(1); + } + if (ceph_argparse_need_usage(args)) { + usage(); + exit(0); + } + + std::map defaults = {}; + auto cct = global_init(&defaults, args, + CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + + pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); + + global_init_daemonize(g_ceph_context); + global_init_chdir(g_ceph_context); + common_init_finish(g_ceph_context); + + NVMeofGw gw(argc, argv); + int rc = gw.init(); + if (rc != 0) { + std::cerr << "Error in initialization: " << cpp_strerror(rc) << std::endl; + return rc; + } + + return gw.main(args); +} + diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h new file mode 100644 index 000000000000..7a44b46aa4bc --- /dev/null +++ b/src/messages/MNVMeofGwBeacon.h @@ -0,0 +1,142 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_NVMEOFGWBEACON_H +#define CEPH_NVMEOFGWBEACON_H + +#include +#include "messages/PaxosServiceMessage.h" +#include "mon/MonCommand.h" +#include "mon/NVMeofGwMap.h" + +#include "include/types.h" + +typedef GW_STATES_PER_AGROUP_E SM_STATE[MAX_SUPPORTED_ANA_GROUPS]; + +std::ostream& operator<<(std::ostream& os, const SM_STATE value) { + os << "SM_STATE [ "; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + switch (value[i]) { + case GW_STATES_PER_AGROUP_E::GW_IDLE_STATE: os << "IDLE "; break; + case GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE: os << "STANDBY "; break; + case GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE: os << "ACTIVE "; break; + case GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER: os << "BLOCKED_AGROUP_OWNER "; break; + case GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED: os << "WAIT_FAILBACK_PREPARED "; break; + default: os << "Invalid"; + } + } + os << "]"; + return os; +} + +std::ostream& operator<<(std::ostream& os, const GW_AVAILABILITY_E value) { + switch (value) { + + case GW_AVAILABILITY_E::GW_CREATED: os << "CREATED"; break; + case GW_AVAILABILITY_E::GW_AVAILABLE: os << "AVAILABLE"; break; + case GW_AVAILABILITY_E::GW_UNAVAILABLE: os << "UNAVAILABLE"; break; + + default: os << "Invalid"; + } + return os; +} + +class MNVMeofGwBeacon final : public PaxosServiceMessage { +private: + static constexpr int HEAD_VERSION = 1; + static constexpr int COMPAT_VERSION = 1; + +protected: + //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible + std::string gw_id; + SM_STATE sm_state; // state machine states per ANA group + uint16_t opt_ana_gid; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable + + uint64_t version; + +public: + MNVMeofGwBeacon() + : PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION} + {} + + MNVMeofGwBeacon(const std::string &gw_id_, + const GW_STATES_PER_AGROUP_E (&sm_state_)[MAX_SUPPORTED_ANA_GROUPS], + const uint16_t& opt_ana_gid_, + const GW_AVAILABILITY_E availability_, + const uint64_t& version_ + ) + : PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION}, + gw_id(gw_id_), opt_ana_gid(opt_ana_gid_), + availability(availability_), version(version_) + { + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + sm_state[i] = sm_state_[i]; + } + + const std::string& get_gw_id() const { return gw_id; } + const uint16_t& get_opt_ana_gid() const { return opt_ana_gid; } + const GW_AVAILABILITY_E& get_availability() const { return availability; } + const uint64_t& get_version() const { return version; } + const SM_STATE& get_sm_state() const { return sm_state; }; + +private: + ~MNVMeofGwBeacon() final {} + +public: + + std::string_view get_type_name() const override { return "nvmeofgwbeacon"; } + + void print(std::ostream& out) const override { + out << get_type_name() << " nvmeofgw" << "(" + << gw_id << ", " << sm_state << "," << opt_ana_gid << "," << availability << "," << version + << ")"; + } + + void encode_payload(uint64_t features) override { + header.version = HEAD_VERSION; + header.compat_version = COMPAT_VERSION; + using ceph::encode; + paxos_encode(); + encode(gw_id, payload); + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + encode((int)sm_state[i], payload); + encode(opt_ana_gid, payload); + encode((int)availability, payload); + encode(version, payload); + } + + void decode_payload() override { + using ceph::decode; + auto p = payload.cbegin(); + + paxos_decode(p); + decode(gw_id, payload); + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + int e; decode(e, payload); + sm_state[i] = static_cast(e); + } + decode(opt_ana_gid, p); + int a; decode(a, p); + availability = static_cast(a); + decode(version, p); + } + +private: + template + friend boost::intrusive_ptr ceph::make_message(Args&&... args); +}; + + +#endif diff --git a/src/messages/MNVMeofGwMap.h b/src/messages/MNVMeofGwMap.h new file mode 100644 index 000000000000..49f29d050e7d --- /dev/null +++ b/src/messages/MNVMeofGwMap.h @@ -0,0 +1,62 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_MNVMEOFGWMAP_H +#define CEPH_MNVMEOFGWMAP_H + +#include "msg/Message.h" +#include "mon/NVMeofGwMap.h" + +class MNVMeofGwMap final : public Message { +protected: + NVMeofGwMap map; + +public: + const NVMeofGwMap& get_map() {return map;} + +private: + MNVMeofGwMap() : + Message{MSG_MNVMEOF_GW_MAP} {} + MNVMeofGwMap(const NVMeofGwMap &map_) : + Message{MSG_MNVMEOF_GW_MAP}, map(map_) + {} + ~MNVMeofGwMap() final {} + +public: + std::string_view get_type_name() const override { return "nvmeofgwmap"; } + void print(std::ostream& out) const override { + // ../src/messages/MNVMeofGwMap.h:40:39: error: no match for ‘operator<<’ (operand types are ‘std::basic_ostream’ and ‘const NVMeofGwMap’) + out << get_type_name() << "(map " << "should be map instance here" << ")"; + } + + void decode_payload() override { + // ../src/messages/MNVMeofGwMap.h:46:11: error: no matching function for call to ‘decode(NVMeofGwMap&, ceph::buffer::v15_2_0::list::iterator_impl&)’ + //auto p = payload.cbegin(); + //decode(map, p); + } + void encode_payload(uint64_t features) override { + //../src/messages/MNVMeofGwMap.h:51:11: error: no matching function for call to ‘encode(NVMeofGwMap&, ceph::buffer::v15_2_0::list&, uint64_t&)’ + //using ceph::encode; + //encode(map, payload, features); + } +private: + using RefCountedObject::put; + using RefCountedObject::get; + template + friend boost::intrusive_ptr ceph::make_message(Args&&... args); + template + friend MURef crimson::make_message(Args&&... args); +}; + +#endif diff --git a/src/msg/Message.h b/src/msg/Message.h index 40833744b67d..47f83c3e1074 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -227,7 +227,7 @@ #define MSG_MGR_MAP 0x704 // *** ceph-mon(MgrMonitor) -> ceph-mgr -#define MSG_MGR_DIGEST 0x705 +#define MSG_MGR_DIGEST 0x705 // *** cephmgr -> ceph-mon #define MSG_MON_MGR_REPORT 0x706 #define MSG_SERVICE_MAP 0x707 @@ -237,7 +237,13 @@ #define MSG_MGR_COMMAND_REPLY 0x70a // *** ceph-mgr <-> MON daemons *** -#define MSG_MGR_UPDATE 0x70b +#define MSG_MGR_UPDATE 0x70b + +// *** nvmeof mon -> gw daemons *** +#define MSG_MNVMEOF_GW_MAP 0x70c + +// *** gw daemons -> nvmeof mon *** +#define MSG_MNVMEOF_GW_BEACON 0x70d // ====================================================== diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc new file mode 100644 index 000000000000..e1aa13cead81 --- /dev/null +++ b/src/nvmeof/NVMeofGw.cc @@ -0,0 +1,223 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 John Spray + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include + +#include "common/errno.h" +#include "common/signal.h" +#include "include/compat.h" + +#include "include/stringify.h" +#include "global/global_context.h" +#include "global/signal_handler.h" + + +#include "messages/MNVMeofGwBeacon.h" +#include "messages/MNVMeofGwMap.h" +#include "NVMeofGw.h" + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_mgr +#undef dout_prefix +#define dout_prefix *_dout << "nvmeofgw " << __func__ << " " + +using std::map; +using std::string; +using std::vector; + +NVMeofGw::NVMeofGw(int argc, const char **argv) : + Dispatcher(g_ceph_context), + monc{g_ceph_context, poolctx}, + client_messenger(Messenger::create(g_ceph_context, "async", entity_name_t::CLIENT(-1), "client", getpid())), + objecter{g_ceph_context, client_messenger.get(), &monc, poolctx}, + client{client_messenger.get(), &monc, &objecter}, + finisher(g_ceph_context, "Nvmeof", "nvme-fin"), + timer(g_ceph_context, lock), + orig_argc(argc), + orig_argv(argv) +{ +} + +NVMeofGw::~NVMeofGw() = default; + +const char** NVMeofGw::get_tracked_conf_keys() const +{ + static const char* KEYS[] = { + NULL + }; + return KEYS; +} + +int NVMeofGw::init() +{ + init_async_signal_handler(); + register_async_signal_handler(SIGHUP, sighup_handler); + + std::lock_guard l(lock); + + // Start finisher + finisher.start(); + + // Initialize Messenger + client_messenger->add_dispatcher_tail(this); + client_messenger->add_dispatcher_head(&objecter); + client_messenger->add_dispatcher_tail(&client); + client_messenger->start(); + + poolctx.start(2); + + // Initialize MonClient + if (monc.build_initial_monmap() < 0) { + client_messenger->shutdown(); + client_messenger->wait(); + return -1; + } + + monc.sub_want("NVMeofGw", 0, 0); + + monc.set_want_keys(CEPH_ENTITY_TYPE_MON|CEPH_ENTITY_TYPE_OSD + |CEPH_ENTITY_TYPE_MDS|CEPH_ENTITY_TYPE_MGR); + monc.set_messenger(client_messenger.get()); + + // We must register our config callback before calling init(), so + // that we see the initial configuration message + monc.register_config_callback([this](const std::string &k, const std::string &v){ + // removing value to hide sensitive data going into mgr logs + // leaving this for debugging purposes + dout(10) << "nvmeof config_callback: " << k << " : " << v << dendl; + + return false; + }); + monc.register_config_notify_callback([this]() { + dout(4) << "nvmeof monc config notify callback" << dendl; + }); + dout(4) << "nvmeof Registered monc callback" << dendl; + + int r = monc.init(); + if (r < 0) { + monc.shutdown(); + client_messenger->shutdown(); + client_messenger->wait(); + return r; + } + + r = monc.authenticate(); + if (r < 0) { + derr << "Authentication failed, did you specify an ID with a valid keyring?" << dendl; + monc.shutdown(); + client_messenger->shutdown(); + client_messenger->wait(); + return r; + } + // only forward monmap updates after authentication finishes, otherwise + // monc.authenticate() will be waiting for MgrStandy::ms_dispatch() + // to acquire the lock forever, as it is already locked in the beginning of + // this method. + monc.set_passthrough_monmap(); + + client_t whoami = monc.get_global_id(); + client_messenger->set_myname(entity_name_t::MGR(whoami.v)); + objecter.set_client_incarnation(0); + objecter.init(); + objecter.start(); + client.init(); + timer.init(); + + tick(); + + dout(4) << "nvmeof Complete." << dendl; + return 0; +} + +void NVMeofGw::send_beacon() +{ + ceph_assert(ceph_mutex_is_locked_by_me(lock)); + dout(10) << "sending beacon as gid " << monc.get_global_id() << dendl; + + auto m = ceph::make_message(); + + monc.send_mon_message(std::move(m)); +} + +void NVMeofGw::tick() +{ + dout(10) << __func__ << dendl; + send_beacon(); + + timer.add_event_after( + g_conf().get_val("mgr_tick_period").count(), + new LambdaContext([this](int r){ + tick(); + } + )); +} + +void NVMeofGw::shutdown() +{ + finisher.queue(new LambdaContext([&](int) { + std::lock_guard l(lock); + + dout(4) << "nvmeof Shutting down" << dendl; + + + // stop sending beacon first, I use monc to talk with monitors + timer.shutdown(); + // client uses monc and objecter + client.shutdown(); + // Stop asio threads, so leftover events won't call into shut down + // monclient/objecter. + poolctx.finish(); + // stop monc, so mon won't be able to instruct me to shutdown/activate after + // the active_mgr is stopped + monc.shutdown(); + + // objecter is used by monc and active_mgr + objecter.shutdown(); + // client_messenger is used by all of them, so stop it in the end + client_messenger->shutdown(); + })); + + // Then stop the finisher to ensure its enqueued contexts aren't going + // to touch references to the things we're about to tear down + finisher.wait_for_empty(); + finisher.stop(); +} + +void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t m) +{ + dout(10) << "handle nvmeof gw map" << dendl; +} + +bool NVMeofGw::ms_dispatch2(const ref_t& m) +{ + std::lock_guard l(lock); + dout(10) << "got map type" << m->get_type() << dendl; + + + if (m->get_type() == MSG_MNVMEOF_GW_MAP) { + handle_nvmeof_gw_map(ref_cast(m)); + } + bool handled = false; + return handled; +} + +int NVMeofGw::main(vector args) +{ + client_messenger->wait(); + + // Disable signal handlers + unregister_async_signal_handler(SIGHUP, sighup_handler); + shutdown_async_signal_handler(); + + return 0; +} diff --git a/src/nvmeof/NVMeofGw.h b/src/nvmeof/NVMeofGw.h new file mode 100644 index 000000000000..e27b49037904 --- /dev/null +++ b/src/nvmeof/NVMeofGw.h @@ -0,0 +1,71 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2016 John Spray + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + + +#ifndef NVMEOFGW_H_ +#define NVMEOFGW_H_ + +#include "auth/Auth.h" +#include "common/async/context_pool.h" +#include "common/Finisher.h" +#include "common/Timer.h" +#include "common/LogClient.h" + +#include "client/Client.h" +#include "mon/MonClient.h" +#include "osdc/Objecter.h" +#include "messages/MNVMeofGwMap.h" + +class NVMeofGw : public Dispatcher, + public md_config_obs_t { +protected: + ceph::async::io_context_pool poolctx; + MonClient monc; + std::unique_ptr client_messenger; + Objecter objecter; + Client client; + + ceph::mutex lock = ceph::make_mutex("NVMeofGw::lock"); + Finisher finisher; + SafeTimer timer; + + int orig_argc; + const char **orig_argv; + + void send_beacon(); + +public: + NVMeofGw(int argc, const char **argv); + ~NVMeofGw() override; + + // Dispatcher interface + bool ms_dispatch2(const ceph::ref_t& m) override; + bool ms_handle_reset(Connection *con) override { return false; } + void ms_handle_remote_reset(Connection *con) override {} + bool ms_handle_refused(Connection *con) override { return false; }; + + // config observer bits + const char** get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set &changed) override {}; + + int init(); + void shutdown(); + int main(std::vector args); + void tick(); + + void handle_nvmeof_gw_map(ceph::ref_t m); +}; + +#endif + From 1285fc623b51645d08857182d5cc25036f28b518 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Wed, 25 Oct 2023 17:14:07 +0000 Subject: [PATCH 05/65] Initial integration - fix global_init, run as CODE_ENVIRONMENT_UTILITY - increase llog verbosity for now Test: - ceph cluster ../src/vstart.sh --new --without-dashboard --memstore - nvmeof gw monitor client ./bin/ceph-nvmeof -i a -c /home/baum/ceph-ci/build/ceph.conf -n client.admin \ -k /home/baum/ceph-ci/build/keyring 2>&1 | tee ceph-nvmeof.log Output: - init ``` 2023-10-25T17:10:25.786+0000 7fae15611580 0 nvmeofgw int NVMeofGw::init() 2023-10-25T17:10:25.787+0000 7fae15611580 0 nvmeofgw int NVMeofGw::init() nvmeof Registered monc callback 2023-10-25T17:10:25.791+0000 7fae15611580 0 nvmeofgw int NVMeofGw::init() monc.authentication done 2023-10-25T17:10:25.792+0000 7fae15611580 0 nvmeofgw void NVMeofGw::tick() 2023-10-25T17:10:25.792+0000 7fae15611580 0 nvmeofgw void NVMeofGw::send_beacon() sending beacon as gid 4258 2023-10-25T17:10:25.792+0000 7fae15611580 0 nvmeofgw int NVMeofGw::init() Complete. ``` - map dispatch ``` 2023-10-25T17:10:25.792+0000 7fae10017640 0 nvmeofgw virtual bool NVMeofGw::ms_dispatch2(ceph::ref_t&) got map type 4 2023-10-25T17:10:25.792+0000 7fae10017640 0 ms_deliver_dispatch: unhandled message 0x56170d357600 mon_map magic: 0 v1 from mon.1 v2:10.243.64.4:40910/0 202 ``` - beacon send ``` 2023-10-25T17:10:29.794+0000 7fae0c00f640 0 nvmeofgw void NVMeofGw::tick() 2023-10-25T17:10:29.794+0000 7fae0c00f640 0 nvmeofgw void NVMeofGw::send_beacon() sending beacon as gid 4258 ``` Signed-off-by: Alexander Indenbaum --- src/ceph_nvmeof.cc | 8 +++----- src/nvmeof/NVMeofGw.cc | 15 +++++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/ceph_nvmeof.cc b/src/ceph_nvmeof.cc index b510b9fe115e..0e1259164842 100644 --- a/src/ceph_nvmeof.cc +++ b/src/ceph_nvmeof.cc @@ -51,11 +51,9 @@ int main(int argc, const char **argv) exit(0); } - std::map defaults = {}; - auto cct = global_init(&defaults, args, - CEPH_ENTITY_TYPE_CLIENT, - CODE_ENVIRONMENT_DAEMON, - CINIT_FLAG_UNPRIVILEGED_DAEMON_DEFAULTS); + auto cct = global_init(nullptr, args, CEPH_ENTITY_TYPE_CLIENT, + CODE_ENVIRONMENT_UTILITY, // maybe later use CODE_ENVIRONMENT_DAEMON, + CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); pick_addresses(g_ceph_context, CEPH_PICK_ADDRESS_PUBLIC); diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index e1aa13cead81..45adf31015bc 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -29,7 +29,7 @@ #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mgr #undef dout_prefix -#define dout_prefix *_dout << "nvmeofgw " << __func__ << " " +#define dout_prefix *_dout << "nvmeofgw " << __PRETTY_FUNCTION__ << " " using std::map; using std::string; @@ -60,6 +60,7 @@ const char** NVMeofGw::get_tracked_conf_keys() const int NVMeofGw::init() { + dout(0) << dendl; init_async_signal_handler(); register_async_signal_handler(SIGHUP, sighup_handler); @@ -110,6 +111,7 @@ int NVMeofGw::init() client_messenger->wait(); return r; } + dout(0) << "nvmeof Registered monc callback" << dendl; r = monc.authenticate(); if (r < 0) { @@ -119,6 +121,7 @@ int NVMeofGw::init() client_messenger->wait(); return r; } + dout(0) << "monc.authentication done" << dendl; // only forward monmap updates after authentication finishes, otherwise // monc.authenticate() will be waiting for MgrStandy::ms_dispatch() // to acquire the lock forever, as it is already locked in the beginning of @@ -135,14 +138,14 @@ int NVMeofGw::init() tick(); - dout(4) << "nvmeof Complete." << dendl; + dout(0) << "Complete." << dendl; return 0; } void NVMeofGw::send_beacon() { ceph_assert(ceph_mutex_is_locked_by_me(lock)); - dout(10) << "sending beacon as gid " << monc.get_global_id() << dendl; + dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; auto m = ceph::make_message(); @@ -151,7 +154,7 @@ void NVMeofGw::send_beacon() void NVMeofGw::tick() { - dout(10) << __func__ << dendl; + dout(0) << dendl; send_beacon(); timer.add_event_after( @@ -195,13 +198,13 @@ void NVMeofGw::shutdown() void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t m) { - dout(10) << "handle nvmeof gw map" << dendl; + dout(0) << "handle nvmeof gw map" << dendl; } bool NVMeofGw::ms_dispatch2(const ref_t& m) { std::lock_guard l(lock); - dout(10) << "got map type" << m->get_type() << dendl; + dout(0) << "got map type " << m->get_type() << dendl; if (m->get_type() == MSG_MNVMEOF_GW_MAP) { From 90a311b5b7105bdf7553377493f6d022de56c3c8 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 25 Oct 2023 19:19:33 +0000 Subject: [PATCH 06/65] debug MAP api --- src/mon/NVMeofGwMap.cc | 25 ++++++++++++++----- src/mon/NVMeofGwMap.h | 56 ++++++++++++++++++++++++++++++++++++++++-- src/mon/NVMeofGwMon.cc | 9 ++++--- 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 57e84bf6e686..18f6ed5d6726 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -19,6 +19,8 @@ static ostream& _prefix(std::ostream *_dout, const NVMeofGwMap *h,//const Monito return *_dout << "gw-mon." << map->mon->name << "@" << map->mon->rank; } + + int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid) { GW_STATE_T state{ {GW_IDLE_STATE,} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, 0 }; @@ -98,8 +100,13 @@ int NVMeofGwMap::_dump_metadata_map( )const { } +void NVMeofGwMap::dump_timestamp(ceph::coarse_mono_clock::time_point &tp){ - + auto now_s = std::chrono::time_point_cast( tp); + auto value = now_s.time_since_epoch(); + long duration = value.count(); + dout(4) << "NVM ts : " << duration << dendl; +} int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending) @@ -157,16 +164,15 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn else if (gw_state->availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - const auto now = ceph::coarse_mono_clock::now(); // const auto mgr_beacon_grace = g_conf().get_val("mon_mgr_beacon_grace");// todo change to something related to NVMeGW KATO + auto now = ceph::coarse_mono_clock::now(); // const auto mgr_beacon_grace = g_conf().get_val("mon_mgr_beacon_grace");// todo change to something related to NVMeGW KATO std::chrono::seconds sc(FAILBACK_PERSISTENCY_INT_SEC); for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) if (gw_state->sm_state[i] == GW_WAIT_FAILBACK_PREPARED) { GW_METADATA_T* metadata = find_gw_metadata(gw_id, nqn); - ceph_assert(metadata !=0); - - // inspect (now - metadata->anagrp_sm_tstamps[i]) + dump_timestamp(now); + dump_timestamp(metadata->anagrp_sm_tstamps[i]); if(now - metadata->anagrp_sm_tstamps[i] > sc){ //mgr_beacon_grace){ // interval = 2*KATO pased T so find the state of the candidate to failback - whether it is still available for (auto& itr : *subsyst_it) { @@ -177,6 +183,13 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn propose_pending = true; break; } + else if (itr.second.optimized_ana_group_id == i && itr.second.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE){ + //This GW is failed again - persistency interval is broken so this gw standby for the group + gw_state->sm_state[i] = GW_STANDBY_STATE; + dout(4) << "Failback unsuccessfull " << gw_id << "becomes standby for the ana group " << i << dendl; + propose_pending = true; + break; + } } } // maybe there are other ANA groups that this GW is in state GW_WAIT_FAILBACK_PREPARED so continue pass over all ANA groups @@ -184,7 +197,7 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn } } else{ - dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + dout(4) << __func__ << "ERROR GW-id was not found in the map " << gw_id << dendl; rc = 1; } return rc; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 7d345f22209f..7b2821afbf33 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -22,6 +22,7 @@ #include "common/Clock.h" #include "PaxosService.h" #include "msg/Message.h" +#include "common/ceph_time.h" /*#include "NVMeofGwMon.h" using std::ostream; @@ -39,6 +40,9 @@ inline ostream& _prefix(std::ostream *_dout, const Monitor &mon, } */ +using ceph::coarse_mono_clock; + + using GW_ID_T = std::string; typedef enum { @@ -73,7 +77,7 @@ typedef struct GW_METADATA_T { using GWMAP = std::map >; using GWMETADATA = std::map >; using SUBSYST_GWMAP = std::map; - +using SUBSYST_GWMETA = std::map; inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { for(int i = 0; i ( state.anagrp_sm_tstamps[i]); + auto value = now_ms.time_since_epoch(); + long duration = value.count(); + encode( duration, bl); + } +} + +inline void decode(GW_METADATA_T& state, ceph::bufferlist::const_iterator& bl) { + for(int i = 0; i } + // Encode Gmetadata + encode ((int)Gmetadata.size(),bl); + for (auto& itr : Gmetadata) { + encode((const std::string &)itr.first, bl);// nqn + encode( itr.second, bl);// encode the full map of this nqn : map + } + ENCODE_FINISH(bl); } @@ -144,6 +179,22 @@ class NVMeofGwMap Gmap[nqn].insert({itr.first, itr.second}); } } + // decode Gmetadata + decode(num_subsystems, bl); + SUBSYST_GWMETA gw_meta; + Gmetadata.clear(); + //_dump_gwmap(Gmap); + for(int i = 0; i < num_subsystems; i++){ + decode(nqn, bl); + Gmetadata.insert(make_pair(nqn, std::map())); + //decode the map + gw_meta.clear(); + decode(gw_meta, bl); + //insert the qw_map to Gmap + for(auto &itr: gw_meta ){ + Gmetadata[nqn].insert({itr.first, itr.second}); + } + } DECODE_FINISH(bl); } @@ -166,6 +217,7 @@ class NVMeofGwMap int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); + void dump_timestamp(ceph::coarse_mono_clock::time_point &tp); void debug_encode_decode(){ ceph::buffer::list bl; diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index f08ac0bf4127..bdc011c0daed 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -79,7 +79,7 @@ void NVMeofGwMon::inject1(){ else if( cnt == start_cnt+3 ){ // simulate - gw1 down pending_map.process_gw_map_gw_down( "gw1", "nqn2008.node1", propose); if(propose) - propose_pending(); + propose_pending(); pending_map._dump_metadata_map(); } @@ -89,15 +89,18 @@ void NVMeofGwMon::inject1(){ pending_map.process_gw_map_ka( "gw1", "nqn2008.node1", propose); if(propose) propose_pending(); - } - else if( cnt == start_cnt+6 ){ // simulate - gw2 still OK pending_map.process_gw_map_ka( "gw2", "nqn2008.node1", propose); if(propose) propose_pending(); } + else if( cnt == start_cnt+8 ){ // simulate - gw2 still OK - checks the persistency timer in the state + pending_map.process_gw_map_ka( "gw2", "nqn2008.node1", propose); + if(propose) + propose_pending(); + } } From 00231a291cbe8b4df3e42097336f5f5df63783e5 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 26 Oct 2023 08:43:08 +0000 Subject: [PATCH 07/65] new SM timer concept --- src/mon/NVMeofGwMap.cc | 41 ++++++++++++++++++++++++------ src/mon/NVMeofGwMap.h | 57 ++++++++++++++++++++++++++++++------------ src/mon/NVMeofGwMon.cc | 2 +- 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 18f6ed5d6726..82eb8f174243 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -85,6 +85,26 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { } +int NVMeofGwMap::update_gw_timers( ) { + + dout(4) << __func__ << " called " << mon << dendl; + + for (auto& itr : Gmetadata) { + for (auto& ptr : itr.second) { + + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + if (ptr.second.anagrp_sm_tstamps[i] != INVALID_GW_TIMER){ + ptr.second.anagrp_sm_tstamps[i] ++; + dout(4) << "timer for GW " << itr.first << " ANA GRP " << i << ptr.second.anagrp_sm_tstamps[i] <availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - auto now = ceph::coarse_mono_clock::now(); // const auto mgr_beacon_grace = g_conf().get_val("mon_mgr_beacon_grace");// todo change to something related to NVMeGW KATO - std::chrono::seconds sc(FAILBACK_PERSISTENCY_INT_SEC); - - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) if (gw_state->sm_state[i] == GW_WAIT_FAILBACK_PREPARED) { GW_METADATA_T* metadata = find_gw_metadata(gw_id, nqn); ceph_assert(metadata !=0); - dump_timestamp(now); - dump_timestamp(metadata->anagrp_sm_tstamps[i]); - if(now - metadata->anagrp_sm_tstamps[i] > sc){ //mgr_beacon_grace){ + ceph_assert(metadata->anagrp_sm_tstamps[i] != INVALID_GW_TIMER); + //dump_timestamp(now); + dout(4) << "Check timer for Failback from GW " << gw_id << " ticks : " << metadata->anagrp_sm_tstamps[i] << dendl; + if(metadata->anagrp_sm_tstamps[i] >= 2){//TODO //mgr_beacon_grace){ // interval = 2*KATO pased T so find the state of the candidate to failback - whether it is still available + remove_timestamp_from_metadata(gw_id,nqn, i); for (auto& itr : *subsyst_it) { if (itr.second.sm_state[i] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { gw_state->sm_state[i] = GW_STANDBY_STATE; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 7b2821afbf33..c7f9ddf29b92 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -60,6 +60,7 @@ enum class GW_AVAILABILITY_E { }; #define MAX_SUPPORTED_ANA_GROUPS 5 +#define INVALID_GW_TIMER 0xffff #define REDUNDANT_GW_ANA_GROUP_ID 0xFF typedef struct GW_STATE_T { //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible @@ -70,7 +71,7 @@ typedef struct GW_STATE_T { }GW_STATE_T; typedef struct GW_METADATA_T { - ceph::coarse_mono_clock::time_point anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state + int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state }GW_METADATA_T; @@ -108,21 +109,20 @@ inline void encode(const GW_METADATA_T& state, ceph::bufferlist &bl) { for(int i = 0; i ( state.anagrp_sm_tstamps[i]); - auto value = now_ms.time_since_epoch(); - long duration = value.count(); - encode( duration, bl); + //auto now_ms = std::chrono::time_point_cast( state.anagrp_sm_tstamps[i]); + //auto value = now_ms.time_since_epoch(); + //long duration = value.count(); + int tick = state.anagrp_sm_tstamps[i]; + encode( tick, bl); } } inline void decode(GW_METADATA_T& state, ceph::bufferlist::const_iterator& bl) { for(int i = 0; i ())); - Gmetadata[nqn].insert({ gw_id, new_metadata }); + //Gmetadata[nqn].insert({ gw_id, new_metadata }); return 0; } @@ -252,16 +253,40 @@ class NVMeofGwMap int add_timestamp_to_metadata(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) { GW_METADATA_T* metadata; - const auto now = ceph::coarse_mono_clock::now(); + //const auto now = ceph::coarse_mono_clock::now(); if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { - metadata->anagrp_sm_tstamps[anagrpid] = now; + metadata->anagrp_sm_tstamps[anagrpid] = 0;// set timer } else { - ceph_assert(false); + GW_METADATA_T new_metadata = {INVALID_GW_TIMER,}; + for (int i=0; ianagrp_sm_tstamps[anagrpid] = INVALID_GW_TIMER; + for(i=0; ianagrp_sm_tstamps[i] != INVALID_GW_TIMER) + break; + if(i==MAX_SUPPORTED_ANA_GROUPS){ + Gmetadata[nqn].clear(); // remove all gw_id timers from the map + } + } + else { + ceph_assert(false); + } + return 0; + } + GW_METADATA_T* find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn); }; diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index bdc011c0daed..2e3d0c7810f8 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -117,7 +117,7 @@ void NVMeofGwMon::tick(){ dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; last_tick = now; - + pending_map.update_gw_timers( ); //TODO pass over the last_beacon map to detect the overdue beacons indicating the GW died //if found the one - convert the last_beacon key to gw_id and nqn and call the function pending_map_process_gw_map_gw_down From f4e223480d282c20dd507aa004b1802b04879c25 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 26 Oct 2023 10:03:31 +0000 Subject: [PATCH 08/65] Add dispatch for MSG_MNVMEOF_GW_BEACON + traces Signed-off-by: Leonid Chernin --- src/mon/Monitor.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 0b0c4122f67f..1eda9848150f 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4527,6 +4527,9 @@ void Monitor::_ms_dispatch(Message *m) void Monitor::dispatch_op(MonOpRequestRef op) { op->mark_event("mon:dispatch_op"); + + dout(10) << "Received message: " << op->get_req()->get_type() << dendl; + MonSession *s = op->get_session(); ceph_assert(s); if (s->closed) { @@ -4640,6 +4643,11 @@ void Monitor::dispatch_op(MonOpRequestRef op) paxos_service[PAXOS_MGR]->dispatch(op); return; + case MSG_MNVMEOF_GW_BEACON: + paxos_service[PAXOS_NVMEGW]->dispatch(op); + return; + + // MgrStat case MSG_MON_MGR_REPORT: case CEPH_MSG_STATFS: From f600f9794223ee11ffb3064b9379c124e9f9187a Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Thu, 26 Oct 2023 13:11:12 +0300 Subject: [PATCH 09/65] add MSG_MNVMEOF_GW_BEACON decode Signed-off-by: Alexander Indenbaum --- src/msg/Message.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 70ac4ad13389..c80ea60b0673 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -217,6 +217,8 @@ #include "messages/MOSDPGUpdateLogMissing.h" #include "messages/MOSDPGUpdateLogMissingReply.h" +#include "messages/MNVMeofGwBeacon.h" + #ifdef WITH_BLKIN #include "Messenger.h" #endif @@ -875,6 +877,10 @@ Message *decode_message(CephContext *cct, m = make_message(); break; + case MSG_MNVMEOF_GW_BEACON: + m = make_message(); + break; + case MSG_MON_MGR_REPORT: m = make_message(); break; From 1902ef042fcda19c36e45273d5af7ccab3dad26e Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Sun, 29 Oct 2023 11:15:08 +0200 Subject: [PATCH 10/65] MNVMeofGwBeacon:decode_payload() fix Signed-off-by: Alexander Indenbaum --- src/messages/MNVMeofGwBeacon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index 7a44b46aa4bc..3dca70144ebc 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -122,9 +122,9 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { auto p = payload.cbegin(); paxos_decode(p); - decode(gw_id, payload); + decode(gw_id, p); for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - int e; decode(e, payload); + int e; decode(e, p); sm_state[i] = static_cast(e); } decode(opt_ana_gid, p); From 96bac5450a99d2a1d0e5b34ec33eaa3363d9db52 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 29 Oct 2023 09:46:52 +0000 Subject: [PATCH 11/65] NVME monitor map logic Signed-off-by: Leonid Chernin --- src/mon/Monitor.cc | 1 + src/mon/NVMeofGwMap.cc | 168 +++++++++++++++++++++++++++-------------- src/mon/NVMeofGwMap.h | 37 +++++---- src/mon/NVMeofGwMon.cc | 33 +++++--- 4 files changed, 156 insertions(+), 83 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 1eda9848150f..8e6cc935d7ba 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4422,6 +4422,7 @@ void Monitor::_ms_dispatch(Message *m) } MonOpRequestRef op = op_tracker.create_request(m); + dout(10) << "Received message: " << op->get_req()->get_type() << dendl; bool src_is_mon = op->is_src_mon(); op->mark_event("mon:_ms_dispatch"); MonSession *s = op->get_session(); diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 82eb8f174243..04af6ae66022 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -85,7 +85,7 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { } -int NVMeofGwMap::update_gw_timers( ) { +int NVMeofGwMap::update_active_timers( ){ dout(4) << __func__ << " called " << mon << dendl; @@ -95,7 +95,7 @@ int NVMeofGwMap::update_gw_timers( ) { for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { if (ptr.second.anagrp_sm_tstamps[i] != INVALID_GW_TIMER){ ptr.second.anagrp_sm_tstamps[i] ++; - dout(4) << "timer for GW " << itr.first << " ANA GRP " << i << ptr.second.anagrp_sm_tstamps[i] <( tp); - auto value = now_s.time_since_epoch(); - long duration = value.count(); - dout(4) << "NVM ts : " << duration << dendl; -} - int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending) { @@ -173,7 +164,7 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_id << dendl; itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; - add_timestamp_to_metadata(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation to metadata of gw + start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation to metadata of gw gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; propose_pending = true; found = true; @@ -192,14 +183,11 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn else if (gw_state->availability == GW_AVAILABILITY_E::GW_AVAILABLE) { for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) if (gw_state->sm_state[i] == GW_WAIT_FAILBACK_PREPARED) { - GW_METADATA_T* metadata = find_gw_metadata(gw_id, nqn); - ceph_assert(metadata !=0); - ceph_assert(metadata->anagrp_sm_tstamps[i] != INVALID_GW_TIMER); - //dump_timestamp(now); - dout(4) << "Check timer for Failback from GW " << gw_id << " ticks : " << metadata->anagrp_sm_tstamps[i] << dendl; - if(metadata->anagrp_sm_tstamps[i] >= 2){//TODO //mgr_beacon_grace){ + auto timer = get_timer(gw_id, nqn, i); + dout(4) << "Check timer for Failback from GW " << gw_id << " ANA groupId "<< i << " ticks : " << timer << dendl; + if(timer >= 2){//TODO //mgr_beacon_grace){ // interval = 2*KATO pased T so find the state of the candidate to failback - whether it is still available - remove_timestamp_from_metadata(gw_id,nqn, i); + cancel_timer(gw_id, nqn, i); for (auto& itr : *subsyst_it) { if (itr.second.sm_state[i] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { gw_state->sm_state[i] = GW_STANDBY_STATE; @@ -228,7 +216,33 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn return rc; } +int NVMeofGwMap::handle_homeless_ana_groups(bool & propose) +{ + for (auto& nqn_itr : Gmap) { + dout(4) << "NQN " << nqn_itr.first << dendl; + for (auto& ptr : nqn_itr.second) { // loop for GWs inside nqn group + auto gw_id = ptr.first; + GW_STATE_T* state = &ptr.second; // is there GW in unavailable state? if yes, is its ANA group handled? + if (state->availability == GW_AVAILABILITY_E::GW_UNAVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { + auto found_gw_for_ana_group = false; + for (auto& ptr2 : nqn_itr.second) { + if (ptr2.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE && ptr2.second.sm_state[state->optimized_ana_group_id] == GW_ACTIVE_STATE) { + found_gw_for_ana_group = true; + dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; + break; + } + } + if (found_gw_for_ana_group == false) { //choose the GW for handle ana group + dout(4)<< "Was not found the GW " << " that handles ANA grp " << (int)state->optimized_ana_group_id << " find candidate "<< dendl; + GW_STATE_T* gw_state = find_gw_map(gw_id, nqn_itr.first); + find_failover_candidate( gw_id, nqn_itr.first , gw_state, propose ); + } + } + } + } + return 0; +} int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid) { @@ -240,56 +254,96 @@ int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &gw_id, const std: } -int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending) +// TODO When decision to change ANA state of group is prepared, need to consider that last seen FSM state is "approved" - means it was returned in beacon alone with map version +int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &propose_pending) { + dout(4) <<__func__<< " process GW down " << gw_id << dendl; #define ILLEGAL_GW_ID " " #define MIN_NUM_ANA_GROUPS 0xFFF + int i; + int min_num_ana_groups_in_gw = 0; + int current_ana_groups_in_gw = 0; + GW_ID_T min_loaded_gw_id = ILLEGAL_GW_ID; + auto subsyst_it = find_subsystem_map(nqn); + + for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { // this GW may handle several ANA groups and for each of them need to found the candidate GW + if (gw_state->sm_state[i] == GW_ACTIVE_STATE || gw_state->optimized_ana_group_id == i) { + // Find a GW that takes over the ANA group(s) + + min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; + min_loaded_gw_id = ILLEGAL_GW_ID; + for (auto& itr : *subsyst_it) { // for all the gateways of the subsystem + if (itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + + current_ana_groups_in_gw = 0; + for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { + if (itr.second.sm_state[j] == GW_BLOCKED_AGROUP_OWNER) { + current_ana_groups_in_gw = 0xFFFF; + break; // dont take into account these GWs in the transitive state + } + else if (itr.second.sm_state[j] == GW_ACTIVE_STATE) + //dout(4) << " process GW down " << current_ana_groups_in_gw << dendl; + current_ana_groups_in_gw++; // how many ANA groups are handled by this GW + } + + if (min_num_ana_groups_in_gw > current_ana_groups_in_gw) { + min_num_ana_groups_in_gw = current_ana_groups_in_gw; + min_loaded_gw_id = itr.first; + dout(4) << "choose: gw-id min_ana_groups " << itr.first << current_ana_groups_in_gw << " min " << min_num_ana_groups_in_gw << dendl; + } + } + } + if (min_loaded_gw_id != ILLEGAL_GW_ID) { + propose_pending = true; + set_failover_gw_for_ANA_group(min_loaded_gw_id, nqn, i); + } + else + propose_pending = false; + gw_state->sm_state[i] = GW_STANDBY_STATE; + } + } + return 0; +} + + +int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending) +{ + int rc = 0; - // bool found = 0; + bool found = 0; int i; - int min_num_ana_groups_in_gw = 0; - int current_ana_groups_in_gw = 0; - GW_ID_T min_loaded_gw_id = ILLEGAL_GW_ID; GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); if (gw_state) { dout(4) << "GW down " << gw_id << dendl; auto subsyst_it = find_subsystem_map(nqn); gw_state->availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; - for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { // this GW may handle several ANA groups and for each of them need to found the candidate GW - if (gw_state->sm_state[i] == GW_ACTIVE_STATE) { - // Find a GW that takes over the ANA group(s) - - min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; - min_loaded_gw_id = ILLEGAL_GW_ID; - for (auto& itr : *subsyst_it) { // for all the gateways of the subsystem - if (itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - - current_ana_groups_in_gw = 0; - for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { - if (itr.second.sm_state[j] == GW_BLOCKED_AGROUP_OWNER) { - current_ana_groups_in_gw = 0xFFFF; - break; // dont take into account these GWs in the transitive state - } - else if (itr.second.sm_state[j] == GW_ACTIVE_STATE) - current_ana_groups_in_gw++; // how many ANA groups are handled by this GW - } - if (min_num_ana_groups_in_gw > current_ana_groups_in_gw) { - min_num_ana_groups_in_gw = current_ana_groups_in_gw; - min_loaded_gw_id = itr.first; - dout(4) << "choose: gw-id min_ana_groups " << itr.first << current_ana_groups_in_gw << " min " << min_num_ana_groups_in_gw << dendl; - } - } - } - if (min_loaded_gw_id != ILLEGAL_GW_ID) { - propose_pending = true; - set_failover_gw_for_ANA_group(min_loaded_gw_id, nqn, i); + for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { //search for outgoing Failback to ANA group of this GW + //gw_state->sm_state[i] = GW_STANDBY_STATE; + for (auto& itr : *subsyst_it){ + if (gw_state->optimized_ana_group_id == i && itr.second.sm_state[i] == GW_WAIT_FAILBACK_PREPARED){ + dout(4) << "Warning: Outgoing Failback when GW is down back - to rollback it" << nqn <<" GW " <sm_state[i] = GW_STANDBY_STATE; } + return rc; } + + find_failover_candidate( gw_id, nqn, gw_state, propose_pending); + } else { dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index c7f9ddf29b92..0f59baf80721 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -63,7 +63,6 @@ enum class GW_AVAILABILITY_E { #define INVALID_GW_TIMER 0xffff #define REDUNDANT_GW_ANA_GROUP_ID 0xFF typedef struct GW_STATE_T { - //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable @@ -107,11 +106,6 @@ inline void decode(GW_STATE_T& state, ceph::bufferlist::const_iterator& bl) { inline void encode(const GW_METADATA_T& state, ceph::bufferlist &bl) { for(int i = 0; i ( state.anagrp_sm_tstamps[i]); - //auto value = now_ms.time_since_epoch(); - //long duration = value.count(); int tick = state.anagrp_sm_tstamps[i]; encode( tick, bl); } @@ -119,7 +113,6 @@ inline void encode(const GW_METADATA_T& state, ceph::bufferlist &bl) { inline void decode(GW_METADATA_T& state, ceph::bufferlist::const_iterator& bl) { for(int i = 0; i subsyst_epoch;// dont think we need this since epoch per subsystem stored in each GW in GW_STATE_T + bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode uint32_t listen_mode_start_tick{0}; @@ -152,7 +145,7 @@ class NVMeofGwMap encode ((int)Gmetadata.size(),bl); for (auto& itr : Gmetadata) { encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : map + encode( itr.second, bl);// encode the full map of this nqn : } ENCODE_FINISH(bl); @@ -160,7 +153,6 @@ class NVMeofGwMap void decode(ceph::buffer::list::const_iterator &bl) { DECODE_START(1, bl); - // decode(name, bl);// decode(can_run, bl);// decode(error_string, bl);// decode(module_options, bl); int num_subsystems; std::string nqn; decode(epoch, bl); @@ -190,7 +182,7 @@ class NVMeofGwMap //decode the map gw_meta.clear(); decode(gw_meta, bl); - //insert the qw_map to Gmap + //insert the gw_meta to Gmap for(auto &itr: gw_meta ){ Gmetadata[nqn].insert({itr.first, itr.second}); } @@ -211,14 +203,14 @@ class NVMeofGwMap } return NULL; } - int update_gw_timers(); + int update_active_timers(); int _dump_gwmap(GWMAP & Gmap)const; - int _dump_metadata_map( )const ; + int _dump_active_timers( )const ; int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); - void dump_timestamp(ceph::coarse_mono_clock::time_point &tp); + int handle_homeless_ana_groups(bool &propose_pending); void debug_encode_decode(){ ceph::buffer::list bl; @@ -227,6 +219,7 @@ class NVMeofGwMap decode(p); } private: + int find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &propose_pending); int set_failover_gw_for_ANA_group (const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid); void publish_map_to_gws(const std::string& nqn){ } @@ -250,7 +243,7 @@ class NVMeofGwMap } - int add_timestamp_to_metadata(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) + int start_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) { GW_METADATA_T* metadata; //const auto now = ceph::coarse_mono_clock::now(); @@ -268,7 +261,19 @@ class NVMeofGwMap return 0; } - int remove_timestamp_from_metadata(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) + int get_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) + { + GW_METADATA_T* metadata; + if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { + ceph_assert(metadata->anagrp_sm_tstamps[anagrpid] != INVALID_GW_TIMER); + return metadata->anagrp_sm_tstamps[anagrpid]; + } + else{ + ceph_assert(false); + } + } + + int cancel_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) { GW_METADATA_T* metadata; int i; diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 2e3d0c7810f8..2c4b0dfb70eb 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -53,7 +53,7 @@ void NVMeofGwMon::inject1(){ pending_map.cfg_add_gw("gw3", "nqn2008.node1", 3); pending_map.cfg_add_gw("gw1", "nqn2008.node2", 2); pending_map._dump_gwmap(pending_map.Gmap); - pending_map._dump_metadata_map(); + pending_map._dump_active_timers(); pending_map.debug_encode_decode(); dout(4) << "Dump map after decode encode:" < Date: Sun, 29 Oct 2023 13:55:00 +0000 Subject: [PATCH 12/65] NVME monitor map logic, simulated exceptions --- src/mon/NVMeofGwMap.cc | 65 ++++++++++++++++++++++++++---------------- src/mon/NVMeofGwMap.h | 2 +- src/mon/NVMeofGwMon.cc | 20 ++++++------- 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 04af6ae66022..71fac5ef846a 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -95,7 +95,7 @@ int NVMeofGwMap::update_active_timers( ){ for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { if (ptr.second.anagrp_sm_tstamps[i] != INVALID_GW_TIMER){ ptr.second.anagrp_sm_tstamps[i] ++; - dout(4) << "timer for GW " << ptr.first << " ANA GRP " << i << ptr.second.anagrp_sm_tstamps[i] <availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) { gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; if (gw_state->optimized_ana_group_id == REDUNDANT_GW_ANA_GROUP_ID) { @@ -156,9 +155,11 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn propose_pending = true; //TODO try to find the 1st GW overloaded by ANA groups and start failback for ANA group that it is not an owner of } - else {// prepare to Failback to this GW + else { + //========= prepare to Failback to this GW ========= // find the GW that took over on the group gw_state->optimized_ana_group_id bool found = false; + bool found_some_gw = false; for (auto& itr : *subsyst_it) { //cout << "Found GW " << itr.second.gw_id << endl; if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { @@ -170,16 +171,16 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn found = true; break; } + else found_some_gw = true; } - if (!found) { - dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << "that took over the GW" << gw_id << "when it was fallen" << dendl; + if (!found && !found_some_gw) { // There is start of single GW so immediately turn its group to GW_ACTIVE_STATE + dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << " that took over the GW " << gw_id << "when it was fallen" << dendl; gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; propose_pending = true; } } } - else if (gw_state->availability == GW_AVAILABILITY_E::GW_AVAILABLE) { for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) if (gw_state->sm_state[i] == GW_WAIT_FAILBACK_PREPARED) { @@ -196,10 +197,13 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn propose_pending = true; break; } - else if (itr.second.optimized_ana_group_id == i && itr.second.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE){ - //This GW is failed again - persistency interval is broken so this gw standby for the group + else if (itr.second.optimized_ana_group_id == i ){ + if(itr.second.sm_state[i] == GW_STANDBY_STATE && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + itr.second.sm_state[i] = GW_ACTIVE_STATE; // GW failed and started during the persistency interval + dout(4) << "Failback unsuccessfull. GW: " << itr.first << "becomes Active for the ana group " << i << dendl; + } gw_state->sm_state[i] = GW_STANDBY_STATE; - dout(4) << "Failback unsuccessfull " << gw_id << "becomes standby for the ana group " << i << dendl; + dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << i << dendl; propose_pending = true; break; } @@ -222,13 +226,15 @@ int NVMeofGwMap::handle_homeless_ana_groups(bool & propose) dout(4) << "NQN " << nqn_itr.first << dendl; for (auto& ptr : nqn_itr.second) { // loop for GWs inside nqn group auto gw_id = ptr.first; - GW_STATE_T* state = &ptr.second; // is there GW in unavailable state? if yes, is its ANA group handled? + GW_STATE_T* state = &ptr.second; + + //1. is there is a GW in unavailable state? if yes, is its ANA group handled by some other GW? if (state->availability == GW_AVAILABILITY_E::GW_UNAVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { auto found_gw_for_ana_group = false; for (auto& ptr2 : nqn_itr.second) { if (ptr2.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE && ptr2.second.sm_state[state->optimized_ana_group_id] == GW_ACTIVE_STATE) { found_gw_for_ana_group = true; - dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; + // dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; break; } } @@ -239,6 +245,25 @@ int NVMeofGwMap::handle_homeless_ana_groups(bool & propose) find_failover_candidate( gw_id, nqn_itr.first , gw_state, propose ); } } + + //2. Check this GW is Available and Standby and no other GW is doing Failback to it + else if (state->availability == GW_AVAILABILITY_E::GW_AVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID && + state->sm_state[state->optimized_ana_group_id] == GW_STANDBY_STATE + ) + { + bool found = false; + for (auto& ptr2 : nqn_itr.second) { + if ( ptr2.second.sm_state[state->optimized_ana_group_id] == GW_WAIT_FAILBACK_PREPARED){ + found = true; + break; + } + } + if(!found){ + dout(4) << __func__ << " GW " <optimized_ana_group_id << dendl; + state->sm_state[state->optimized_ana_group_id] = GW_ACTIVE_STATE; + propose = true; + } + } } } return 0; @@ -277,9 +302,9 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin current_ana_groups_in_gw = 0; for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { - if (itr.second.sm_state[j] == GW_BLOCKED_AGROUP_OWNER) { + if (itr.second.sm_state[j] == GW_BLOCKED_AGROUP_OWNER || itr.second.sm_state[j] == GW_WAIT_FAILBACK_PREPARED) { current_ana_groups_in_gw = 0xFFFF; - break; // dont take into account these GWs in the transitive state + break; // dont take into account GWs in the transitive state } else if (itr.second.sm_state[j] == GW_ACTIVE_STATE) //dout(4) << " process GW down " << current_ana_groups_in_gw << dendl; @@ -319,14 +344,10 @@ int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& gw_state->availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { //search for outgoing Failback to ANA group of this GW - //gw_state->sm_state[i] = GW_STANDBY_STATE; + for (auto& itr : *subsyst_it){ if (gw_state->optimized_ana_group_id == i && itr.second.sm_state[i] == GW_WAIT_FAILBACK_PREPARED){ dout(4) << "Warning: Outgoing Failback when GW is down back - to rollback it" << nqn <<" GW " <sm_state[i] = GW_STANDBY_STATE; - } + propose_pending = false; + for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { gw_state->sm_state[i] = GW_STANDBY_STATE;} return rc; } - find_failover_candidate( gw_id, nqn, gw_state, propose_pending); - } else { dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 0f59baf80721..697f0ee1c0a1 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -159,7 +159,7 @@ class NVMeofGwMap decode(num_subsystems, bl); SUBSYST_GWMAP gw_map; Gmap.clear(); - _dump_gwmap(Gmap); + //_dump_gwmap(Gmap); for(int i = 0; i < num_subsystems; i++){ decode(nqn, bl); Gmap.insert(make_pair(nqn, std::map())); diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 2c4b0dfb70eb..00f9b315c1ed 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -91,29 +91,28 @@ void NVMeofGwMon::inject1(){ propose_pending(); } - else if( cnt == start_cnt+6 ){ // simulate - gw1 is down + else if( cnt == start_cnt+6 ){ // simulate - gw1 is down Simulate gw election by polling function handle_homeless_ana_groups pending_map._dump_active_timers(); pending_map.process_gw_map_gw_down( "gw1", "nqn2008.node1", propose); if(propose) propose_pending(); } - /* else if( cnt == start_cnt+7 ){ // simulate - gw2 still OK - pending_map.process_gw_map_ka( "gw2", "nqn2008.node1", propose); + else if( cnt == start_cnt+7 ){ // simulate - gw1 is UP + pending_map.process_gw_map_ka( "gw1", "nqn2008.node1", propose); if(propose) propose_pending(); - }*/ - else if( cnt == start_cnt+8 ){ // simulate - gw2 still OK - checks the persistency timer in the state + } + else if( cnt == start_cnt+9 ){ // simulate - gw2 still OK - checks the persistency timer in the state pending_map.process_gw_map_ka( "gw2", "nqn2008.node1", propose); if(propose) propose_pending(); } - - } + void NVMeofGwMon::tick(){ - static int cnt=0; + // static int cnt=0; if (!is_active() || !mon.is_leader()){ dout(4) << __func__ << " NVMeofGwMon leader : " << mon.is_leader() << "active : " << is_active() << dendl; return; @@ -126,7 +125,8 @@ void NVMeofGwMon::tick(){ pending_map.update_active_timers( ); bool propose = false; - if((cnt++ %2) == 0) { + //if((cnt++ %2) == 0) + { pending_map.handle_homeless_ana_groups(propose); if(propose){ propose_pending(); @@ -175,7 +175,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ auto p = bl.cbegin(); map.decode(p); - map._dump_gwmap(map.Gmap); + if(!mon.is_leader()) map._dump_gwmap(map.Gmap); check_subs(); } } From 5bf4cc4610f82daee984c66c3ae33682ea9d97c7 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 29 Oct 2023 19:47:12 +0000 Subject: [PATCH 13/65] Changes for receive beacon and sending the map --- src/mon/NVMeofGwMap.h | 2 +- src/mon/NVMeofGwMon.cc | 77 +++++++++++++++++++++++++++++++++++------- 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 697f0ee1c0a1..1a49a02e2f00 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -204,7 +204,7 @@ class NVMeofGwMap return NULL; } int update_active_timers(); - + epoch_t get_epoch() const { return epoch; } int _dump_gwmap(GWMAP & Gmap)const; int _dump_active_timers( )const ; int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 00f9b315c1ed..1217e907c6fb 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -9,6 +9,8 @@ #include #include "include/stringify.h" #include "NVMeofGwMon.h" +#include "messages/MNVMeofGwBeacon.h" +#include "messages/MNVMeofGwMap.h" using std::map; using std::make_pair; @@ -183,31 +185,34 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ void NVMeofGwMon::check_sub(Subscription *sub) { - /* MgrMonitor::check_sub - if (sub->type == "mgrmap") { - if (sub->next <= map.get_epoch()) { - dout(20) << "Sending map to subscriber " << sub->session->con - << " " << sub->session->con->get_peer_addr() << dendl; - sub->session->con->send_message2(make_message(map)); + /* MgrMonitor::check_sub*/ + //if (sub->type == "NVMeofGw") { + dout(4) << "sub->next , map-epoch " << sub->next << map.get_epoch() << dendl; + if (sub->next <= map.get_epoch()) + { + dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl; + sub->session->con->send_message2(make_message(map)); + + if (sub->onetime) { mon.session_map.remove_sub(sub); } else { sub->next = map.get_epoch() + 1; } } - } - */ + //} + } void NVMeofGwMon::check_subs() { - const std::string type = "nvmegwmap";//"mgrmap"; + const std::string type = "NVMeofGw"; dout(4) << MY_MON_PREFFIX << __func__ << " count " << mon.session_map.subs.count(type) << dendl; - - if (mon.session_map.subs.count(type) == 0) - return; + //for (auto &sub : *mon.session_map.subs) { dout(20) << sub.first << ", " << dendl;} + if (mon.session_map.subs.count(type) == 0) return; for (auto sub : *(mon.session_map.subs[type])) { + dout(4) << "sub-type "<< sub->type << dendl; check_sub(sub); } } @@ -216,16 +221,57 @@ void NVMeofGwMon::check_subs() bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + + auto m = op->get_req(); + switch (m->get_type()) { + case MSG_MNVMEOF_GW_BEACON: + return preprocess_beacon(op); + /* case MSG_MON_COMMAND: + try { + return preprocess_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return true; + } +*/ + default: + mon.no_reply(op); + derr << "Unhandled message type " << m->get_type() << dendl; + return true; + } return false; } bool NVMeofGwMon::prepare_update(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + auto m = op->get_req(); + switch (m->get_type()) { + case MSG_MNVMEOF_GW_BEACON: + return prepare_beacon(op); + + case MSG_MON_COMMAND: + try { + return prepare_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return false; /* nothing to propose! */ + } + + default: + mon.no_reply(op); + derr << "Unhandled message type " << m->get_type() << dendl; + return false; /* nothing to propose! */ + } return true; } bool NVMeofGwMon::preprocess_command(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + auto m = op->get_req(); + mon.no_reply(op); // we never reply to beacons + dout(4) << "beacon from " << m->get_type() << dendl; return false; } @@ -237,12 +283,19 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op){ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + auto m = op->get_req(); + mon.no_reply(op); // we never reply to beacons + dout(4) << "beacon from " << m->get_type() << dendl; return false; // allways return false to call leader's prepare beacon } bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; //auto m = op->get_req(); + auto m = op->get_req(); + + dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " optimized ANA grp. " << m->get_opt_ana_gid() << dendl; + //last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); return true; } From f97c6547ce4723e5e3a8c8d9230dc3f66961ae48 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Sun, 29 Oct 2023 21:41:59 +0200 Subject: [PATCH 14/65] MSG_MNVMEOF_GW_MAP decode Signed-off-by: Alexander Indenbaum --- src/msg/Message.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/msg/Message.cc b/src/msg/Message.cc index c80ea60b0673..e8be64f63139 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -218,6 +218,7 @@ #include "messages/MOSDPGUpdateLogMissingReply.h" #include "messages/MNVMeofGwBeacon.h" +#include "messages/MNVMeofGwMap.h" #ifdef WITH_BLKIN #include "Messenger.h" @@ -940,6 +941,9 @@ Message *decode_message(CephContext *cct, m = make_message(); break; + case MSG_MNVMEOF_GW_MAP: + m = make_message(); + break; // -- simple messages without payload -- case CEPH_MSG_SHUTDOWN: From a1245a3e815ece1a6950a17db687a0bf267d4e0d Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 30 Oct 2023 15:08:13 +0000 Subject: [PATCH 15/65] encode decode to Map message --- src/messages/MNVMeofGwMap.h | 5 +++-- src/nvmeof/NVMeofGw.cc | 8 +++++++- src/nvmeof/NVMeofGw.h | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/messages/MNVMeofGwMap.h b/src/messages/MNVMeofGwMap.h index 49f29d050e7d..66ffbd396624 100644 --- a/src/messages/MNVMeofGwMap.h +++ b/src/messages/MNVMeofGwMap.h @@ -42,13 +42,14 @@ class MNVMeofGwMap final : public Message { void decode_payload() override { // ../src/messages/MNVMeofGwMap.h:46:11: error: no matching function for call to ‘decode(NVMeofGwMap&, ceph::buffer::v15_2_0::list::iterator_impl&)’ - //auto p = payload.cbegin(); - //decode(map, p); + auto p = payload.cbegin(); + map.decode( p); } void encode_payload(uint64_t features) override { //../src/messages/MNVMeofGwMap.h:51:11: error: no matching function for call to ‘encode(NVMeofGwMap&, ceph::buffer::v15_2_0::list&, uint64_t&)’ //using ceph::encode; //encode(map, payload, features); + map.encode(payload); } private: using RefCountedObject::put; diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 45adf31015bc..5e7e1e6e7cfc 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -196,9 +196,15 @@ void NVMeofGw::shutdown() finisher.stop(); } -void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t m) +void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) { dout(0) << "handle nvmeof gw map" << dendl; + // NVMeofGwMap + auto &map = mmap->get_map(); + dout(0) << "received map epoch " << map.get_epoch() << dendl; + // map._dump_gwmap(map.Gmap); + + } bool NVMeofGw::ms_dispatch2(const ref_t& m) diff --git a/src/nvmeof/NVMeofGw.h b/src/nvmeof/NVMeofGw.h index e27b49037904..cc4e9c9ec550 100644 --- a/src/nvmeof/NVMeofGw.h +++ b/src/nvmeof/NVMeofGw.h @@ -34,7 +34,7 @@ class NVMeofGw : public Dispatcher, std::unique_ptr client_messenger; Objecter objecter; Client client; - + NVMeofGwMap map; ceph::mutex lock = ceph::make_mutex("NVMeofGw::lock"); Finisher finisher; SafeTimer timer; From 095ed0f8854b01612abab077f1b0e5a309d8eabb Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 30 Oct 2023 15:39:19 +0000 Subject: [PATCH 16/65] fix prepare_beacon return --- src/mon/NVMeofGwMon.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 1217e907c6fb..5992eb41d288 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -297,5 +297,5 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " optimized ANA grp. " << m->get_opt_ana_gid() << dendl; //last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); - return true; + return false; // if no changes are need in the map } From db8841ce757a09a724d56a7b3f083f093646ea75 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 30 Oct 2023 20:20:38 +0000 Subject: [PATCH 17/65] Dump of GWMAp on the GW side --- src/mon/NVMeofGwMap.cc | 25 +++++++++++++++++++------ src/mon/NVMeofGwMap.h | 1 + src/mon/NVMeofGwMon.cc | 2 +- src/nvmeof/NVMeofGw.cc | 8 +++++--- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 71fac5ef846a..98fafbc7e798 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -67,7 +67,7 @@ GW_METADATA_T* NVMeofGwMap::find_gw_metadata(const GW_ID_T &gw_id, const std::st int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { - dout(4) << __func__ << " called " << mon << dendl; + dout(0) << __func__ << " called " << mon << dendl; std::ostringstream ss; ss << std::endl; for (auto& itr : Gmap) { @@ -80,7 +80,23 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { ss << std::endl; } } - dout(10) << ss.str() <type == "NVMeofGw") { - dout(4) << "sub->next , map-epoch " << sub->next << map.get_epoch() << dendl; + dout(4) << "sub->next , map-epoch " << sub->next << " " << map.get_epoch() << dendl; if (sub->next <= map.get_epoch()) { dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl; diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 5e7e1e6e7cfc..8dbc0d608a59 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -33,6 +33,7 @@ using std::map; using std::string; +using std::stringstream; using std::vector; NVMeofGw::NVMeofGw(int argc, const char **argv) : @@ -198,12 +199,13 @@ void NVMeofGw::shutdown() void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) { - dout(0) << "handle nvmeof gw map" << dendl; + dout(0) << "handle nvmeof gw map" << dendl; // NVMeofGwMap auto &map = mmap->get_map(); dout(0) << "received map epoch " << map.get_epoch() << dendl; - // map._dump_gwmap(map.Gmap); - + std::stringstream ss; + map._dump_gwmap(ss); + dout(0) << ss.str() << dendl; } From f9be8974cfb168fac76966ebdfb237b87e569f43 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 31 Oct 2023 11:48:37 +0200 Subject: [PATCH 18/65] rework beacon Signed-off-by: Alexander Indenbaum --- src/messages/MNVMeofGwBeacon.h | 72 ++++++++++++++++++++++------------ src/mon/NVMeofGwMon.cc | 3 +- 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index 3dca70144ebc..e04a785ebbff 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -16,6 +16,7 @@ #define CEPH_NVMEOFGWBEACON_H #include +#include #include "messages/PaxosServiceMessage.h" #include "mon/MonCommand.h" #include "mon/NVMeofGwMap.h" @@ -23,6 +24,13 @@ #include "include/types.h" typedef GW_STATES_PER_AGROUP_E SM_STATE[MAX_SUPPORTED_ANA_GROUPS]; +struct NqnState { + std::string nqn; // subsystem NQN + SM_STATE sm_state; // susbsystem's state machine state + uint16_t opt_ana_gid; // optimized ANA group index +}; + +typedef std::vector GwSubsystems; std::ostream& operator<<(std::ostream& os, const SM_STATE value) { os << "SM_STATE [ "; @@ -40,6 +48,11 @@ std::ostream& operator<<(std::ostream& os, const SM_STATE value) { return os; } +std::ostream& operator<<(std::ostream& os, const NqnState value) { + os << "Subsystem( nqn: " << value.nqn << ", " << value.opt_ana_gid << ", " << value.sm_state << " )"; + return os; +} + std::ostream& operator<<(std::ostream& os, const GW_AVAILABILITY_E value) { switch (value) { @@ -58,12 +71,9 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { static constexpr int COMPAT_VERSION = 1; protected: - //bool ana_state[MAX_SUPPORTED_ANA_GROUPS]; // real ana states per ANA group for this GW :1- optimized, 0- inaccessible std::string gw_id; - SM_STATE sm_state; // state machine states per ANA group - uint16_t opt_ana_gid; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + GwSubsystems subsystems; // gateway susbsystem and their state machine states GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable - uint64_t version; public: @@ -72,24 +82,19 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { {} MNVMeofGwBeacon(const std::string &gw_id_, - const GW_STATES_PER_AGROUP_E (&sm_state_)[MAX_SUPPORTED_ANA_GROUPS], - const uint16_t& opt_ana_gid_, - const GW_AVAILABILITY_E availability_, + const GwSubsystems& subsystems_, + const GW_AVAILABILITY_E& availability_, const uint64_t& version_ ) : PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION}, - gw_id(gw_id_), opt_ana_gid(opt_ana_gid_), + gw_id(gw_id_), subsystems(subsystems_), availability(availability_), version(version_) - { - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - sm_state[i] = sm_state_[i]; - } + {} const std::string& get_gw_id() const { return gw_id; } - const uint16_t& get_opt_ana_gid() const { return opt_ana_gid; } const GW_AVAILABILITY_E& get_availability() const { return availability; } const uint64_t& get_version() const { return version; } - const SM_STATE& get_sm_state() const { return sm_state; }; + const GwSubsystems& get_subsystems() const { return subsystems; }; private: ~MNVMeofGwBeacon() final {} @@ -99,9 +104,11 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { std::string_view get_type_name() const override { return "nvmeofgwbeacon"; } void print(std::ostream& out) const override { - out << get_type_name() << " nvmeofgw" << "(" - << gw_id << ", " << sm_state << "," << opt_ana_gid << "," << availability << "," << version - << ")"; + out << get_type_name() << " nvmeofgw" << "(" << gw_id << ", susbsystems: [ "; + for (const NqnState& st: subsystems) { + out << st << " "; + } + out << "], " << "availability: " << availability << ", version:" << version; } void encode_payload(uint64_t features) override { @@ -110,9 +117,13 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { using ceph::encode; paxos_encode(); encode(gw_id, payload); - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - encode((int)sm_state[i], payload); - encode(opt_ana_gid, payload); + encode(subsystems.size(), payload); + for (const NqnState& st: subsystems) { + encode(st.nqn, payload); + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + encode((int)st.sm_state[i], payload); + encode(st.opt_ana_gid, payload); + } encode((int)availability, payload); encode(version, payload); } @@ -123,13 +134,22 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { paxos_decode(p); decode(gw_id, p); - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - int e; decode(e, p); - sm_state[i] = static_cast(e); + int n; + decode(n, p); + // Reserve memory for the vector to avoid reallocations + subsystems.reserve(n); + for (int i; i < n; i++) { + NqnState st; + decode(st.nqn, p); + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + decode(n, p); + st.sm_state[i] = static_cast(n); + } + decode(st.opt_ana_gid, p); + subsystems.push_back(st); } - decode(opt_ana_gid, p); - int a; decode(a, p); - availability = static_cast(a); + decode(n, p); + availability = static_cast(n); decode(version, p); } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 5dbc60cf8410..61bad3f5b108 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -294,7 +294,8 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ //auto m = op->get_req(); auto m = op->get_req(); - dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " optimized ANA grp. " << m->get_opt_ana_gid() << dendl; + dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() + << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; //last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); return false; // if no changes are need in the map From 2320252613713327710878306d8826de7cbadeca Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 5 Nov 2023 07:05:30 +0000 Subject: [PATCH 19/65] GW FSM called when GW state is modified --- src/messages/MNVMeofGwBeacon.h | 2 +- src/mon/NVMeofGwMap.cc | 252 +++++++++++++++++++-------------- src/mon/NVMeofGwMap.h | 11 +- src/mon/NVMeofGwMon.cc | 66 +++++++-- 4 files changed, 205 insertions(+), 126 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index e04a785ebbff..c0ce7897f555 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -138,7 +138,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { decode(n, p); // Reserve memory for the vector to avoid reallocations subsystems.reserve(n); - for (int i; i < n; i++) { + for (int i = 0; i < n; i++) { NqnState st; decode(st.nqn, p); for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 98fafbc7e798..1461ab29dccf 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -41,7 +41,7 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uin Gmap[nqn].insert({gw_id, state}); create_metadata(gw_id, nqn); - //epoch++; + dout(4) << " Add GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << ana_grpid << dendl; return 0; } @@ -101,17 +101,19 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { } -int NVMeofGwMap::update_active_timers( ){ +int NVMeofGwMap:: update_active_timers( bool &propose_pending ){ dout(4) << __func__ << " called " << mon << dendl; - for (auto& itr : Gmetadata) { for (auto& ptr : itr.second) { - + GW_METADATA_T *metadata = &ptr.second; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - if (ptr.second.anagrp_sm_tstamps[i] != INVALID_GW_TIMER){ - ptr.second.anagrp_sm_tstamps[i] ++; - dout(4) << "timer for GW " << ptr.first << " ANA GRP " << i<<" :" << ptr.second.anagrp_sm_tstamps[i] <anagrp_sm_tstamps[i] != INVALID_GW_TIMER){ + metadata->anagrp_sm_tstamps[i] ++; + dout(4) << "timer for GW " << ptr.first << " ANA GRP " << i<<" :" << metadata->anagrp_sm_tstamps[i] <anagrp_sm_tstamps[i] >= 2){//TODO define + fsm_handle_to_expired (ptr.first, itr.first, i, propose_pending); + } } } } @@ -141,15 +143,35 @@ int NVMeofGwMap::_dump_active_timers( )const { } -int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending) +int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending) { int rc = 0; + int i; + GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + if (gw_state) { + dout(4) << "GW down " << gw_id << dendl; + gw_state->availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; + for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + bool map_modified; + fsm_handle_gw_down (gw_id, nqn, gw_state->sm_state[i], i, map_modified); + if(map_modified) propose_pending = true; + gw_state->sm_state[i] = GW_STANDBY_STATE; + } + } + else { + dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + rc = 1; + } + return rc; +} + +int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending) +{ + int rc = 0; #define FAILBACK_PERSISTENCY_INT_SEC 8 GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); if (gw_state) { - auto subsyst_it = find_subsystem_map(nqn); - //assert(it) dout(4) << "KA beacon from the GW " << gw_id << " in state " << (int)gw_state->availability << dendl; propose_pending = false; if (gw_state->availability == GW_AVAILABILITY_E::GW_CREATED) { @@ -157,7 +179,6 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) gw_state->sm_state[i] = GW_STANDBY_STATE; if (gw_state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { // not a redundand GW - //gw_state->ana_state[gw_state->optimized_ana_group_id] = true; gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; } propose_pending = true; @@ -167,66 +188,21 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; if (gw_state->optimized_ana_group_id == REDUNDANT_GW_ANA_GROUP_ID) { for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) gw_state->sm_state[i] = GW_STANDBY_STATE; - propose_pending = true; - //TODO try to find the 1st GW overloaded by ANA groups and start failback for ANA group that it is not an owner of + propose_pending = true; //TODO try to find the 1st GW overloaded by ANA groups and start failback for ANA group that it is not an owner of } else { //========= prepare to Failback to this GW ========= // find the GW that took over on the group gw_state->optimized_ana_group_id - bool found = false; - bool found_some_gw = false; - for (auto& itr : *subsyst_it) { - //cout << "Found GW " << itr.second.gw_id << endl; - if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { - dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_id << dendl; - itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; - start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation to metadata of gw - gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; - propose_pending = true; - found = true; - break; - } - else found_some_gw = true; - } - if (!found && !found_some_gw) { // There is start of single GW so immediately turn its group to GW_ACTIVE_STATE + bool some_found = false; + propose_pending = true; + find_failback_gw(gw_id, nqn, gw_state, some_found); + if (!some_found ) { // There is start of single GW so immediately turn its group to GW_ACTIVE_STATE dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << " that took over the GW " << gw_id << "when it was fallen" << dendl; gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; - propose_pending = true; } } } - - else if (gw_state->availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - if (gw_state->sm_state[i] == GW_WAIT_FAILBACK_PREPARED) { - auto timer = get_timer(gw_id, nqn, i); - dout(4) << "Check timer for Failback from GW " << gw_id << " ANA groupId "<< i << " ticks : " << timer << dendl; - if(timer >= 2){//TODO //mgr_beacon_grace){ - // interval = 2*KATO pased T so find the state of the candidate to failback - whether it is still available - cancel_timer(gw_id, nqn, i); - for (auto& itr : *subsyst_it) { - if (itr.second.sm_state[i] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - gw_state->sm_state[i] = GW_STANDBY_STATE; - itr.second.sm_state[i] = GW_ACTIVE_STATE; - dout(4) << "Failback from GW " << gw_id << " to " << itr.first << dendl; - propose_pending = true; - break; - } - else if (itr.second.optimized_ana_group_id == i ){ - if(itr.second.sm_state[i] == GW_STANDBY_STATE && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - itr.second.sm_state[i] = GW_ACTIVE_STATE; // GW failed and started during the persistency interval - dout(4) << "Failback unsuccessfull. GW: " << itr.first << "becomes Active for the ana group " << i << dendl; - } - gw_state->sm_state[i] = GW_STANDBY_STATE; - dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << i << dendl; - propose_pending = true; - break; - } - } - } - // maybe there are other ANA groups that this GW is in state GW_WAIT_FAILBACK_PREPARED so continue pass over all ANA groups - } - } + // if GW remains AVAILABLE need to handle failback Timers , this is handled separately } else{ dout(4) << __func__ << "ERROR GW-id was not found in the map " << gw_id << dendl; @@ -235,21 +211,23 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn return rc; } -int NVMeofGwMap::handle_homeless_ana_groups(bool & propose) + +int NVMeofGwMap::handle_abandoned_ana_groups(bool & propose) { + propose = false; for (auto& nqn_itr : Gmap) { dout(4) << "NQN " << nqn_itr.first << dendl; + for (auto& ptr : nqn_itr.second) { // loop for GWs inside nqn group auto gw_id = ptr.first; GW_STATE_T* state = &ptr.second; - //1. is there is a GW in unavailable state? if yes, is its ANA group handled by some other GW? + //1. Failover missed : is there is a GW in unavailable state? if yes, is its ANA group handled by some other GW? if (state->availability == GW_AVAILABILITY_E::GW_UNAVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { auto found_gw_for_ana_group = false; for (auto& ptr2 : nqn_itr.second) { if (ptr2.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE && ptr2.second.sm_state[state->optimized_ana_group_id] == GW_ACTIVE_STATE) { - found_gw_for_ana_group = true; - // dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; + found_gw_for_ana_group = true; // dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; break; } } @@ -257,11 +235,12 @@ int NVMeofGwMap::handle_homeless_ana_groups(bool & propose) dout(4)<< "Was not found the GW " << " that handles ANA grp " << (int)state->optimized_ana_group_id << " find candidate "<< dendl; GW_STATE_T* gw_state = find_gw_map(gw_id, nqn_itr.first); - find_failover_candidate( gw_id, nqn_itr.first , gw_state, propose ); + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + find_failover_candidate( gw_id, nqn_itr.first , gw_state, i, propose ); } } - //2. Check this GW is Available and Standby and no other GW is doing Failback to it + //2. Failback missed: Check this GW is Available and Standby and no other GW is doing Failback to it else if (state->availability == GW_AVAILABILITY_E::GW_AVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID && state->sm_state[state->optimized_ana_group_id] == GW_STANDBY_STATE ) @@ -294,22 +273,42 @@ int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &gw_id, const std: } +int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &some_found) +{ + auto subsyst_it = find_subsystem_map(nqn); + bool found_some_gw = false; + bool found_candidate = false; + for (auto& itr : *subsyst_it) { + //cout << "Found GW " << itr.second.gw_id << endl; + if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { + dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_id << dendl; + itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; + start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation + gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; + found_candidate = true; + break; + } + else found_some_gw = true; + } + some_found = found_candidate |found_some_gw; + return 0; +} + + // TODO When decision to change ANA state of group is prepared, need to consider that last seen FSM state is "approved" - means it was returned in beacon alone with map version -int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &propose_pending) +int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, int grpid, bool &propose_pending) { dout(4) <<__func__<< " process GW down " << gw_id << dendl; #define ILLEGAL_GW_ID " " #define MIN_NUM_ANA_GROUPS 0xFFF - int i; int min_num_ana_groups_in_gw = 0; int current_ana_groups_in_gw = 0; GW_ID_T min_loaded_gw_id = ILLEGAL_GW_ID; auto subsyst_it = find_subsystem_map(nqn); - for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { // this GW may handle several ANA groups and for each of them need to found the candidate GW - if (gw_state->sm_state[i] == GW_ACTIVE_STATE || gw_state->optimized_ana_group_id == i) { + // this GW may handle several ANA groups and for each of them need to found the candidate GW + if (gw_state->sm_state[grpid] == GW_ACTIVE_STATE || gw_state->optimized_ana_group_id == grpid) { // Find a GW that takes over the ANA group(s) - min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; min_loaded_gw_id = ILLEGAL_GW_ID; for (auto& itr : *subsyst_it) { // for all the gateways of the subsystem @@ -335,51 +334,88 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin } if (min_loaded_gw_id != ILLEGAL_GW_ID) { propose_pending = true; - set_failover_gw_for_ANA_group(min_loaded_gw_id, nqn, i); + set_failover_gw_for_ANA_group(min_loaded_gw_id, nqn, grpid); } - else - propose_pending = false; - gw_state->sm_state[i] = GW_STANDBY_STATE; + else propose_pending = false; + gw_state->sm_state[grpid] = GW_STANDBY_STATE; } - } return 0; } -int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending) -{ + int NVMeofGwMap::fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state , int grpid, bool &map_modified) + { + switch (state) + { + case GW_STANDBY_STATE: + case GW_IDLE_STATE: + // nothing to do + break; + + case GW_WAIT_FAILBACK_PREPARED: + { + cancel_timer(gw_id, nqn, grpid); + auto subsyst_it = find_subsystem_map(nqn); + for (auto& itr : *subsyst_it){ + if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER) // found GW that was intended for Failback for this ana grp + { + dout(4) << "Warning: Outgoing Failback when GW is down back - to rollback it" << nqn <<" GW " <availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; + case GW_BLOCKED_AGROUP_OWNER: + // nothing to do - let failback timer expire + break; - for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { //search for outgoing Failback to ANA group of this GW + case GW_ACTIVE_STATE: + { + GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + find_failover_candidate( gw_id, nqn, gw_state, grpid, map_modified); + } + break; + + default:{ + ceph_assert(false); + } - for (auto& itr : *subsyst_it){ - if (gw_state->optimized_ana_group_id == i && itr.second.sm_state[i] == GW_WAIT_FAILBACK_PREPARED){ - dout(4) << "Warning: Outgoing Failback when GW is down back - to rollback it" << nqn <<" GW " <sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED) { + + dout(4) << "Expired Failback timer from GW " << gw_id << " ANA groupId "<< grpid << dendl; + + cancel_timer(gw_id, nqn, grpid); + for (auto& itr : *subsyst_it) { + if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + gw_state->sm_state[grpid] = GW_STANDBY_STATE; + itr.second.sm_state[grpid] = GW_ACTIVE_STATE; + dout(4) << "Failback from GW " << gw_id << " to " << itr.first << dendl; + map_modified = true; + break; + } + else if (itr.second.optimized_ana_group_id == grpid ){ + if(itr.second.sm_state[grpid] == GW_STANDBY_STATE && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + itr.second.sm_state[grpid] = GW_ACTIVE_STATE; // GW failed and started during the persistency interval + dout(4) << "Failback unsuccessfull. GW: " << itr.first << "becomes Active for the ana group " << grpid << dendl; } + gw_state->sm_state[grpid] = GW_STANDBY_STATE; + dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << grpid << dendl; + map_modified = true; + break; } } - if(found){ - // This GW is reported as down but we werent able to resolve HA at this moment since handling of 2 bad-flows in time is risky - //HA - the GW that takes over would be resolved in polling function handle_homeless_ana_groups - propose_pending = false; - for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { gw_state->sm_state[i] = GW_STANDBY_STATE;} - return rc; - } - find_failover_candidate( gw_id, nqn, gw_state, propose_pending); } - else { - dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; - rc = 1; - } - return rc; -} \ No newline at end of file + return 0; +} diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 036dcbe717fd..2039f35e9c5a 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -203,7 +203,7 @@ class NVMeofGwMap } return NULL; } - int update_active_timers(); + int update_active_timers( bool &propose_pending); epoch_t get_epoch() const { return epoch; } int _dump_gwmap(GWMAP & Gmap)const; int _dump_gwmap(std::stringstream &ss)const ; @@ -211,7 +211,7 @@ class NVMeofGwMap int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); - int handle_homeless_ana_groups(bool &propose_pending); + int handle_abandoned_ana_groups(bool &propose_pending); void debug_encode_decode(){ ceph::buffer::list bl; @@ -220,7 +220,12 @@ class NVMeofGwMap decode(p); } private: - int find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &propose_pending); + int fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); + int fsm_handle_gw_up (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); + int fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& nqn, int grpid, bool &map_modified); + + int find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, int grpid, bool &propose_pending); + int find_failback_gw (const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &found); int set_failover_gw_for_ANA_group (const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid); void publish_map_to_gws(const std::string& nqn){ } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 61bad3f5b108..4bc6a5269329 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -119,21 +119,21 @@ void NVMeofGwMon::tick(){ dout(4) << __func__ << " NVMeofGwMon leader : " << mon.is_leader() << "active : " << is_active() << dendl; return; } - + bool _propose_pending = false; inject1(); const auto now = ceph::coarse_mono_clock::now(); dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; last_tick = now; - - pending_map.update_active_timers( ); bool propose = false; - //if((cnt++ %2) == 0) - { - pending_map.handle_homeless_ana_groups(propose); - if(propose){ - propose_pending(); - } + pending_map.update_active_timers(propose); + _propose_pending |= propose; + + pending_map.handle_abandoned_ana_groups(propose); + _propose_pending |= propose; + if(_propose_pending){ + propose_pending(); } + //TODO pass over the last_beacon map to detect the overdue beacons indicating the GW died //if found the one - convert the last_beacon key to gw_id and nqn and call the function pending_map_process_gw_map_gw_down // if propose_pending returned true , call propose_pending method of the paxosService @@ -289,14 +289,52 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ return false; // allways return false to call leader's prepare beacon } +#define GW_DELIM "," bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - //auto m = op->get_req(); + auto m = op->get_req(); - dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() - << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; + // dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; + std::stringstream out; + m->print(out); + dout(4) << out.str() <get_gw_id(); + GW_AVAILABILITY_E avail = m->get_availability(); + const GwSubsystems& subsystems = m->get_subsystems(); + bool propose = false; + + if(avail == GW_AVAILABILITY_E::GW_CREATED){ + // create gw call cfg_add_gw + for (const NqnState& st: subsystems) { + pending_map.cfg_add_gw( gw_id, st.nqn, st.opt_ana_gid ); + } + } + else if(avail == GW_AVAILABILITY_E::GW_AVAILABLE){ + + auto now = ceph::coarse_mono_clock::now(); + // check pending_map.epoch vs m->get_version() - if different - drop the beacon + + for (const NqnState& st: subsystems) { + last_beacon[(gw_id + GW_DELIM + st.nqn)] = now; + pending_map.process_gw_map_ka( gw_id, st.nqn, propose ); + } + } + else if(avail == GW_AVAILABILITY_E::GW_UNAVAILABLE){ // state set by GW client application + // TODO: remove from last_beacon if found . if gw was found in last_beacon call process_gw_map_gw_down + for (const NqnState& st: subsystems) { + + auto it = last_beacon.find(gw_id + GW_DELIM + st.nqn); + if (it != last_beacon.end()){ + last_beacon.erase(gw_id + GW_DELIM + st.nqn); + pending_map.process_gw_map_gw_down( gw_id, st.nqn, propose ); + } + } + } + if (propose) + return true; - //last_beacon[m->get_gid()] = ceph::coarse_mono_clock::now(); - return false; // if no changes are need in the map + else + return false; // if no changes are need in the map } From 03e59dca941fca19b6b433b397e9c29ee969ac6c Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Tue, 7 Nov 2023 07:36:35 +0000 Subject: [PATCH 20/65] Fixed beacon encode/decode ,added beacon processing in NVMeofGwMon Signed-off-by: Leonid Chernin --- src/messages/MNVMeofGwBeacon.h | 32 ++++++++++--------- src/mon/NVMeofGwMap.cc | 4 +-- src/mon/NVMeofGwMap.h | 7 +++-- src/mon/NVMeofGwMon.cc | 53 +++++++++++++++++++++++++++----- src/mon/NVMeofGwMon.h | 1 + src/nvmeof/NVMeofGw.cc | 56 +++++++++++++++++++++++++++++----- src/nvmeof/NVMeofGw.h | 3 +- 7 files changed, 120 insertions(+), 36 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index c0ce7897f555..9a0604853089 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -41,7 +41,7 @@ std::ostream& operator<<(std::ostream& os, const SM_STATE value) { case GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE: os << "ACTIVE "; break; case GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER: os << "BLOCKED_AGROUP_OWNER "; break; case GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED: os << "WAIT_FAILBACK_PREPARED "; break; - default: os << "Invalid"; + default: os << "Invalid " << (int)value[i] << " "; } } os << "]"; @@ -49,7 +49,7 @@ std::ostream& operator<<(std::ostream& os, const SM_STATE value) { } std::ostream& operator<<(std::ostream& os, const NqnState value) { - os << "Subsystem( nqn: " << value.nqn << ", " << value.opt_ana_gid << ", " << value.sm_state << " )"; + os << "Subsystem( nqn: " << value.nqn << ", ANAGrpId: " << value.opt_ana_gid << ", " << value.sm_state << " )"; return os; } @@ -60,7 +60,7 @@ std::ostream& operator<<(std::ostream& os, const GW_AVAILABILITY_E value) { case GW_AVAILABILITY_E::GW_AVAILABLE: os << "AVAILABLE"; break; case GW_AVAILABILITY_E::GW_UNAVAILABLE: os << "UNAVAILABLE"; break; - default: os << "Invalid"; + default: os << "Invalid " << (int)value << " "; } return os; } @@ -74,7 +74,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { std::string gw_id; GwSubsystems subsystems; // gateway susbsystem and their state machine states GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable - uint64_t version; + uint32_t version; public: MNVMeofGwBeacon() @@ -84,7 +84,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { MNVMeofGwBeacon(const std::string &gw_id_, const GwSubsystems& subsystems_, const GW_AVAILABILITY_E& availability_, - const uint64_t& version_ + const uint32_t& version_ ) : PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION}, gw_id(gw_id_), subsystems(subsystems_), @@ -93,7 +93,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { const std::string& get_gw_id() const { return gw_id; } const GW_AVAILABILITY_E& get_availability() const { return availability; } - const uint64_t& get_version() const { return version; } + const uint32_t& get_version() const { return version; } const GwSubsystems& get_subsystems() const { return subsystems; }; private: @@ -104,7 +104,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { std::string_view get_type_name() const override { return "nvmeofgwbeacon"; } void print(std::ostream& out) const override { - out << get_type_name() << " nvmeofgw" << "(" << gw_id << ", susbsystems: [ "; + out << get_type_name() << " nvmeofgw " << "(" << gw_id << ", susbsystems: [ "; for (const NqnState& st: subsystems) { out << st << " "; } @@ -117,7 +117,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { using ceph::encode; paxos_encode(); encode(gw_id, payload); - encode(subsystems.size(), payload); + encode((int)subsystems.size(), payload); for (const NqnState& st: subsystems) { encode(st.nqn, payload); for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) @@ -125,7 +125,7 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { encode(st.opt_ana_gid, payload); } encode((int)availability, payload); - encode(version, payload); + encode(version, payload); } void decode_payload() override { @@ -135,21 +135,23 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { paxos_decode(p); decode(gw_id, p); int n; + int tmp; decode(n, p); // Reserve memory for the vector to avoid reallocations - subsystems.reserve(n); + subsystems.clear(); + //subsystems.reserve(n); for (int i = 0; i < n; i++) { NqnState st; decode(st.nqn, p); - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - decode(n, p); - st.sm_state[i] = static_cast(n); + for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { + decode(tmp, p); + st.sm_state[j] = static_cast(tmp); } decode(st.opt_ana_gid, p); subsystems.push_back(st); } - decode(n, p); - availability = static_cast(n); + decode(tmp, p); + availability = static_cast(tmp); decode(version, p); } diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 1461ab29dccf..a73a4e8f3aa2 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -7,7 +7,7 @@ using std::map; using std::make_pair; using std::ostream; using std::ostringstream; - +using std::string; #define dout_subsys ceph_subsys_mon #undef dout_prefix @@ -149,7 +149,7 @@ int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& int i; GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); if (gw_state) { - dout(4) << "GW down " << gw_id << dendl; + dout(4) << "GW down " << gw_id << " nqn " <availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { bool map_modified; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 2039f35e9c5a..b0ca32e66447 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -119,7 +119,7 @@ inline void decode(GW_METADATA_T& state, ceph::bufferlist::const_iterator& bl) } } - +/*-------------------*/ class NVMeofGwMap { public: @@ -127,7 +127,7 @@ class NVMeofGwMap GWMAP Gmap; GWMETADATA Gmetadata;//TODO !!! this map is used in the processing of Gmap - so it should be add to the encode/decode epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm - + bool delay_propose = false; bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode uint32_t listen_mode_start_tick{0}; @@ -136,6 +136,7 @@ class NVMeofGwMap void encode(ceph::buffer::list &bl) const { ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); encode((int) epoch, bl);// global map epoch + encode(delay_propose,bl); encode ((int)Gmap.size(),bl); // number nqn for (auto& itr : Gmap) { encode((const std::string &)itr.first, bl);// nqn @@ -156,6 +157,7 @@ class NVMeofGwMap int num_subsystems; std::string nqn; decode(epoch, bl); + decode(delay_propose,bl); decode(num_subsystems, bl); SUBSYST_GWMAP gw_map; Gmap.clear(); @@ -219,6 +221,7 @@ class NVMeofGwMap auto p = bl.cbegin(); decode(p); } + private: int fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); int fsm_handle_gw_up (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 4bc6a5269329..985fac82ed86 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -120,22 +120,50 @@ void NVMeofGwMon::tick(){ return; } bool _propose_pending = false; - inject1(); + + // inject1(); const auto now = ceph::coarse_mono_clock::now(); + const auto nvmegw_beacon_grace = g_conf().get_val("mon_mgr_beacon_grace");//TODO + if(pending_map.delay_propose){ + check_subs(); // to send map to clients + pending_map.delay_propose = false; + } + dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; last_tick = now; bool propose = false; - pending_map.update_active_timers(propose); + + pending_map.update_active_timers(propose); // Periodic: check active FSM timers _propose_pending |= propose; - pending_map.handle_abandoned_ana_groups(propose); + + //TODO handle exception of tick overdued in oreder to avoid false detection of overdued beacons , see MgrMonitor::tick + + const auto cutoff = now - nvmegw_beacon_grace; + for(auto &itr : last_beacon){// Pass over all the stored beacons + auto last_beacon_time = itr.second; + GW_ID_T gw_id; + std::string nqn; + if(last_beacon_time < cutoff){ + get_gw_and_nqn_from_key(itr.first, gw_id, nqn); + dout(4) << "beacon timeout for GW " << gw_id << dendl; + pending_map.process_gw_map_gw_down( gw_id, nqn, propose); + _propose_pending |= propose; + last_beacon.erase(itr.first); + } + else{ + dout(4) << "beacon live for GW " << gw_id << dendl; + } + } + + pending_map.handle_abandoned_ana_groups(propose); // Periodic: take care of not handled ANA groups _propose_pending |= propose; + if(_propose_pending){ + pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos" propose_pending(); } - //TODO pass over the last_beacon map to detect the overdue beacons indicating the GW died - //if found the one - convert the last_beacon key to gw_id and nqn and call the function pending_map_process_gw_map_gw_down // if propose_pending returned true , call propose_pending method of the paxosService // todo understand the logic of paxos.plugged for sending several propose_pending see MgrMonitor::tick } @@ -178,7 +206,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ auto p = bl.cbegin(); map.decode(p); if(!mon.is_leader()) map._dump_gwmap(map.Gmap); - check_subs(); + //check_subs(); } } @@ -218,7 +246,6 @@ void NVMeofGwMon::check_subs() } - bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; @@ -289,7 +316,17 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ return false; // allways return false to call leader's prepare beacon } -#define GW_DELIM "," + +#define GW_DELIM ',' + +void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std::string& nqn) +{ + std::stringstream s1(key); + + std::getline(s1, gw_id, GW_DELIM); + std::getline(s1, nqn, GW_DELIM); +} + bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 8a631fa85735..3786865fea61 100755 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -86,6 +86,7 @@ class NVMeofGwMon: public PaxosService void check_subs(); void check_sub(Subscription *sub); void inject1(); + void get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std::string& nqn); }; diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 8dbc0d608a59..6eae4ae61737 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -143,20 +143,59 @@ int NVMeofGw::init() return 0; } -void NVMeofGw::send_beacon() + +static auto GW_NAME = "GW1"; +static auto NQN = "nqn.2004.subsystem1"; + +void NVMeofGw::send_config_beacon() { ceph_assert(ceph_mutex_is_locked_by_me(lock)); - dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; - - auto m = ceph::make_message(); + dout(0) << "sending config beacon as gid " << monc.get_global_id() << dendl; + + NqnState state = {NQN,{GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE},1}; + GwSubsystems subs;// {state}; + subs.push_back(state); + auto m = ceph::make_message(GW_NAME, subs, GW_AVAILABILITY_E::GW_CREATED, 0); monc.send_mon_message(std::move(m)); } +void NVMeofGw::send_beacon() +{ + ceph_assert(ceph_mutex_is_locked_by_me(lock)); + dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; + GwSubsystems subs; + NqnState state = {NQN,{GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE}, 1}; + subs.push_back(state); + + /*Debugging encode/decode : dont remove this code for now!! + MNVMeofGwBeacon mbeacon("GW1", subs, GW_AVAILABILITY_E::GW_AVAILABLE, 0); + std::stringstream out; + mbeacon.print(out); + dout(0) << out.str() <(GW_NAME, subs, GW_AVAILABILITY_E::GW_AVAILABLE, map.epoch); + monc.send_mon_message(std::move(m)); +} + void NVMeofGw::tick() { + int static cnt = 0; dout(0) << dendl; - send_beacon(); + if(cnt++ < 3) + send_config_beacon(); + else + send_beacon(); timer.add_event_after( g_conf().get_val("mgr_tick_period").count(), @@ -201,10 +240,11 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) { dout(0) << "handle nvmeof gw map" << dendl; // NVMeofGwMap - auto &map = mmap->get_map(); - dout(0) << "received map epoch " << map.get_epoch() << dendl; + auto &mp = mmap->get_map(); + dout(0) << "received map epoch " << mp.get_epoch() << dendl; std::stringstream ss; - map._dump_gwmap(ss); + mp._dump_gwmap(ss); + map = mp; dout(0) << ss.str() << dendl; } diff --git a/src/nvmeof/NVMeofGw.h b/src/nvmeof/NVMeofGw.h index cc4e9c9ec550..f629afec6d00 100644 --- a/src/nvmeof/NVMeofGw.h +++ b/src/nvmeof/NVMeofGw.h @@ -42,8 +42,9 @@ class NVMeofGw : public Dispatcher, int orig_argc; const char **orig_argv; + void send_config_beacon(); void send_beacon(); - + public: NVMeofGw(int argc, const char **argv); ~NVMeofGw() override; From 5f3d4f3ce471a4a0f6aa9b7ef2437d11e1f46ce9 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Fri, 3 Nov 2023 20:33:25 +0200 Subject: [PATCH 21/65] grpc integration + proto compiling new build dependecies yum install -y protobuf-devel grpc-devel Signed-off-by: Alexander Indenbaum --- .gitmodules | 3 +++ src/CMakeLists.txt | 65 ++++++++++++++++++++++++++++++++++++++++++++-- src/nvmeof/gateway | 1 + 3 files changed, 67 insertions(+), 2 deletions(-) create mode 160000 src/nvmeof/gateway diff --git a/.gitmodules b/.gitmodules index 088ae3b577ce..74d452227356 100644 --- a/.gitmodules +++ b/.gitmodules @@ -75,3 +75,6 @@ [submodule "src/jaegertracing/opentelemetry-cpp"] path = src/jaegertracing/opentelemetry-cpp url = https://github.com/open-telemetry/opentelemetry-cpp.git +[submodule "src/nvmeof/gateway"] + path = src/nvmeof/gateway + url = https://github.com/baum/ceph-nvmeof.git diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 04dc028ae583..29e4a53b8b6b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -865,14 +865,75 @@ if(WITH_FUSE) install(PROGRAMS mount.fuse.ceph DESTINATION ${CMAKE_INSTALL_SBINDIR}) endif(WITH_FUSE) +# NVMEOF GATEWAY MONITOR CLIENT + +# Find Protobuf installation +# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. +option(protobuf_MODULE_COMPATIBLE TRUE) +find_package(Protobuf REQUIRED) + +set(_REFLECTION grpc++_reflection) +if(CMAKE_CROSSCOMPILING) + find_program(_PROTOBUF_PROTOC protoc) +else() + set(_PROTOBUF_PROTOC $) +endif() + +# Find gRPC installation +# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. +find_package(gRPC CONFIG REQUIRED) +message(STATUS "Using gRPC ${gRPC_VERSION}") +set(_GRPC_GRPCPP gRPC::grpc++) +if(CMAKE_CROSSCOMPILING) + find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) +else() + set(_GRPC_CPP_PLUGIN_EXECUTABLE $) +endif() + +# Proto file +get_filename_component(nvmeof_gateway_proto "nvmeof/gateway/control/proto/gateway.proto" ABSOLUTE) +get_filename_component(nvmeof_gateway_proto_path "${nvmeof_gateway_proto}" PATH) + +# Generated sources +set(nvmeof_gateway_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.cc") +set(nvmeof_gateway_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.h") +set(nvmeof_gateway_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.cc") +set(nvmeof_gateway_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.h") + +add_custom_command( + OUTPUT "${nvmeof_gateway_proto_srcs}" "${nvmeof_gateway_proto_hdrs}" "${nvmeof_gateway_grpc_srcs}" "${nvmeof_gateway_grpc_hdrs}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${nvmeof_gateway_proto_path}" + --experimental_allow_proto3_optional + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "${nvmeof_gateway_proto}" + DEPENDS "${nvmeof_gateway_proto}") + +# Include generated *.pb.h files +include_directories("${CMAKE_CURRENT_BINARY_DIR}") + set(ceph_nvmeof_srcs + ${nvmeof_gateway_proto_srcs} + ${nvmeof_gateway_proto_hdrs} + ${nvmeof_gateway_grpc_srcs} + ${nvmeof_gateway_grpc_hdrs} ceph_nvmeof.cc nvmeof/NVMeofGw.cc) add_executable(ceph-nvmeof ${ceph_nvmeof_srcs}) add_dependencies(ceph-nvmeof ceph-common) -target_link_libraries(ceph-nvmeof client mon os global-static ceph-common) +target_link_libraries(ceph-nvmeof + client + mon + os + global-static + ceph-common + ${_REFLECTION} + ${_GRPC_GRPCPP} + ) install(TARGETS ceph-nvmeof DESTINATION bin) - +# END OF NVMEOF GATEWAY MONITOR CLIENT if(WITH_DOKAN) add_subdirectory(dokan) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway new file mode 160000 index 000000000000..741e3c12f8a7 --- /dev/null +++ b/src/nvmeof/gateway @@ -0,0 +1 @@ +Subproject commit 741e3c12f8a79fce220451a52ebd11dedb44332b From edf3e2ac9f28b08f6fd184c4f711b4b66d9875f0 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Wed, 8 Nov 2023 15:53:45 +0000 Subject: [PATCH 22/65] Initial gateway GRPC client Signed-off-by: Alexander Indenbaum --- src/CMakeLists.txt | 2 +- src/nvmeof/NVMeofGwClient.cc | 23 +++++++++++++++++++++ src/nvmeof/NVMeofGwClient.h | 39 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 src/nvmeof/NVMeofGwClient.cc create mode 100644 src/nvmeof/NVMeofGwClient.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 29e4a53b8b6b..13b168c8a0d5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -920,13 +920,13 @@ set(ceph_nvmeof_srcs ${nvmeof_gateway_grpc_srcs} ${nvmeof_gateway_grpc_hdrs} ceph_nvmeof.cc + nvmeof/NVMeofGwClient.cc nvmeof/NVMeofGw.cc) add_executable(ceph-nvmeof ${ceph_nvmeof_srcs}) add_dependencies(ceph-nvmeof ceph-common) target_link_libraries(ceph-nvmeof client mon - os global-static ceph-common ${_REFLECTION} diff --git a/src/nvmeof/NVMeofGwClient.cc b/src/nvmeof/NVMeofGwClient.cc new file mode 100644 index 000000000000..def3c91b8bbc --- /dev/null +++ b/src/nvmeof/NVMeofGwClient.cc @@ -0,0 +1,23 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include "NVMeofGwClient.h" + +bool NVMeofGwClient::get_subsystems(subsystems_info& reply) { + get_subsystems_req request; + ClientContext context; + + Status status = stub_->get_subsystems(&context, request, &reply); + + return status.ok(); +} diff --git a/src/nvmeof/NVMeofGwClient.h b/src/nvmeof/NVMeofGwClient.h new file mode 100644 index 000000000000..4e27b0697abc --- /dev/null +++ b/src/nvmeof/NVMeofGwClient.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + + +#ifndef __NVMEOFGWCLIENT_H__ +#define __NVMEOFGWCLIENT_H__ +#include +#include +#include + +#include + +#include "gateway.grpc.pb.h" + +using grpc::Channel; +using grpc::ClientContext; +using grpc::Status; + +class NVMeofGwClient { + public: + NVMeofGwClient(std::shared_ptr channel) + : stub_(Gateway::NewStub(channel)) {} + + bool get_subsystems(subsystems_info& reply); + + private: + std::unique_ptr stub_; +}; +#endif From 763c77ac29d16fd4f241c46a7d83833d68198f18 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 9 Nov 2023 08:20:37 +0000 Subject: [PATCH 23/65] fix map distribution to clients, fix in beacon payload --- src/mon/Monitor.cc | 4 ++++ src/mon/NVMeofGwMap.cc | 48 ++++++++++++++++++------------------------ src/mon/NVMeofGwMap.h | 5 ++--- src/mon/NVMeofGwMon.cc | 48 ++++++++++++++++++++++++++---------------- src/mon/NVMeofGwMon.h | 4 ++-- src/nvmeof/NVMeofGw.cc | 11 ++++++++-- 6 files changed, 67 insertions(+), 53 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 8e6cc935d7ba..e85438af6f74 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -5336,6 +5336,10 @@ void Monitor::handle_subscribe(MonOpRequestRef op) } else if (p->first.find("kv:") == 0) { kvmon()->check_sub(s->sub_map[p->first]); } + else if (p->first == "NVMeofGw") { + dout(10) << "NVMeofGw->check_sub " << dendl; + nvmegwmon()->check_sub(s->sub_map[p->first]); + } } if (reply) { diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index a73a4e8f3aa2..fdbbe58bbb20 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -19,6 +19,18 @@ static ostream& _prefix(std::ostream *_dout, const NVMeofGwMap *h,//const Monito return *_dout << "gw-mon." << map->mon->name << "@" << map->mon->rank; } +static std::string G_gw_avail[] = { + "GW_CREATED", + "GW_AVAILAB", + "GW_UNAVAIL"}; + +static std::string G_gw_ana_states[] = { + "IDLE_STATE ", + "STANDBY_STATE ", + "ACTIVE_STATE ", + "BLOCKED_OWNER ", + "WAIT_FLBACK_RDY" +}; int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid) { @@ -26,12 +38,12 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uin if (find_gw_map(gw_id, nqn)) { dout(4) << __func__ << " ERROR :GW already exists in map " << gw_id << dendl; - return 1; + return -EEXIST ; } if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) { dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; - return 1; + return -EINVAL ; } //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert @@ -73,9 +85,10 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { for (auto& itr : Gmap) { for (auto& ptr : itr.second) { - ss << " NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id << " available " << (int)ptr.second.availability << " States: "; + ss << " NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id << + " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - ss << (int)ptr.second.sm_state[i] << " " ; + ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } ss << std::endl; } @@ -89,9 +102,9 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { for (auto& itr : Gmap) { for (auto& ptr : itr.second) { ss << " NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) - << (int)ptr.second.optimized_ana_group_id << " available " << (int)ptr.second.availability << " States: "; + << (int)ptr.second.optimized_ana_group_id << " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - ss << (int)ptr.second.sm_state[i] << " " ; + ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } ss << std::endl; } @@ -103,7 +116,7 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { int NVMeofGwMap:: update_active_timers( bool &propose_pending ){ - dout(4) << __func__ << " called " << mon << dendl; + dout(4) << __func__ << " called, p_monitor: " << mon << dendl; for (auto& itr : Gmetadata) { for (auto& ptr : itr.second) { GW_METADATA_T *metadata = &ptr.second; @@ -122,27 +135,6 @@ int NVMeofGwMap:: update_active_timers( bool &propose_pending ){ } -int NVMeofGwMap::_dump_active_timers( )const { - - dout(4) << __func__ << " called " << mon << dendl; - std::ostringstream ss; - ss << std::endl; - for (auto& itr : Gmetadata) { - for (auto& ptr : itr.second) { - ss << " NQN " << itr.first << " GW_ID " << ptr.first << std::endl; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - if (ptr.second.anagrp_sm_tstamps[i] != INVALID_GW_TIMER){ - ss << "timer for GW " << ptr.first << " ANA GRP " << i <<" :"<< ptr.second.anagrp_sm_tstamps[i]; - } - ss << std::endl; - } - } - } - dout(4) << ss.str() <("mon_mgr_beacon_grace");//TODO - if(pending_map.delay_propose){ - check_subs(); // to send map to clients - pending_map.delay_propose = false; - } + dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; + - dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; last_tick = now; bool propose = false; @@ -160,7 +161,8 @@ void NVMeofGwMon::tick(){ _propose_pending |= propose; if(_propose_pending){ - pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos" + //pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos" + dout(4) << "decision to delayed_map" <type << dendl; - check_sub(sub); + dout(4) << "sub-type "<< sub->type << " delay_propose until next tick" << t << dendl; + if (t) map.delay_propose = true; + else check_sub(sub); } } @@ -345,7 +351,10 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ if(avail == GW_AVAILABILITY_E::GW_CREATED){ // create gw call cfg_add_gw for (const NqnState& st: subsystems) { - pending_map.cfg_add_gw( gw_id, st.nqn, st.opt_ana_gid ); + int rc = pending_map.cfg_add_gw( gw_id, st.nqn, st.opt_ana_gid ); + if(rc == -EEXIST){ + propose = true; // for synchronization with GW that starts + } } } else if(avail == GW_AVAILABILITY_E::GW_AVAILABLE){ @@ -369,8 +378,11 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ } } } - if (propose) + if (propose){ + // pending_map.delay_propose = true; + dout(4) << "decision to delayed_map in prepare_beacon" <> &versions); -private: - void check_subs(); + void check_subs(bool type); void check_sub(Subscription *sub); +private: void inject1(); void get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std::string& nqn); diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 6eae4ae61737..32384a5b9bda 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -166,6 +166,14 @@ void NVMeofGw::send_beacon() dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; GwSubsystems subs; NqnState state = {NQN,{GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE}, 1}; + + GW_STATE_T* gw_state = map.find_gw_map(GW_NAME, NQN); + if(gw_state){ // If some valid map is present + for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){ + state.sm_state[i] = gw_state->sm_state[i]; + } + } + subs.push_back(state); /*Debugging encode/decode : dont remove this code for now!! @@ -183,7 +191,6 @@ void NVMeofGw::send_beacon() dout(0) << out1.str() <(GW_NAME, subs, GW_AVAILABILITY_E::GW_AVAILABLE, map.epoch); monc.send_mon_message(std::move(m)); } @@ -192,7 +199,7 @@ void NVMeofGw::tick() { int static cnt = 0; dout(0) << dendl; - if(cnt++ < 3) + if(cnt++ < 1) send_config_beacon(); else send_beacon(); From 6013ba286d8925de6e97eaa5c8417df9ec357419 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Fri, 10 Nov 2023 15:28:08 +0000 Subject: [PATCH 24/65] prepare delete GW, simulate 2 GW clients Signed-off-by: Leonid Chernin --- src/common/options/mon.yaml.in | 8 + src/mon/NVMeofGwMap.cc | 81 +++- src/mon/NVMeofGwMap.h | 15 +- src/mon/NVMeofGwMon.cc | 790 +++++++++++++++++---------------- src/nvmeof/NVMeofGw.cc | 61 ++- 5 files changed, 551 insertions(+), 404 deletions(-) diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index ff8813c982f9..a7f0849f9199 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -63,6 +63,14 @@ options: default: 30 services: - mon +- name: mon_nvmeofgw_beacon_grace + type: secs + level: advanced + desc: Period in seconds from last beacon to monitor marking a manager daemon as + failed + default: 10 + services: + - mon - name: mon_mgr_inactive_grace type: int level: advanced diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index fdbbe58bbb20..c6ee261fe054 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -37,16 +37,26 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uin GW_STATE_T state{ {GW_IDLE_STATE,} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, 0 }; if (find_gw_map(gw_id, nqn)) { - dout(4) << __func__ << " ERROR :GW already exists in map " << gw_id << dendl; + dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; return -EEXIST ; } if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) { - dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; - return -EINVAL ; + dout(4) << __func__ << " ERROR create GW: " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; + return -EINVAL; } //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert + // check that there is no GW in the DB configured on the same ANA-GRPID + + auto subsyst_it = find_subsystem_map(nqn); + if(subsyst_it) { + for (auto& itr : *subsyst_it) + if (itr.second.optimized_ana_group_id == ana_grpid) { + dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA group in use " <<(int)ana_grpid << " by GW " << itr.first << dendl; + return -EINVAL; + } + } if(Gmap[nqn].size() ==0 ) Gmap.insert(make_pair(nqn, SUBSYST_GWMAP())); @@ -58,6 +68,24 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uin } +int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid){ + + if (!find_gw_map(gw_id, nqn)) { + dout(4) << __func__ << " ERROR :GW not found in map " << gw_id << dendl; + return -ENODEV ; + } + if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) + { + dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; + return -EINVAL ; + } + Gmap[nqn].erase(gw_id); + delete_metadata(gw_id, nqn); + dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << ana_grpid << dendl; + return 0; +} + + GW_METADATA_T* NVMeofGwMap::find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn) { auto it = Gmetadata.find(nqn); @@ -199,6 +227,7 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn else{ dout(4) << __func__ << "ERROR GW-id was not found in the map " << gw_id << dendl; rc = 1; + ceph_assert(false); } return rc; } @@ -278,11 +307,13 @@ int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_id, const std::string& nqn, start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; found_candidate = true; + break; } else found_some_gw = true; } some_found = found_candidate |found_some_gw; + //TODO cleanup myself (gw_id) from the Block-List return 0; } @@ -367,6 +398,7 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin case GW_ACTIVE_STATE: { GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + //TODO Start Block-List on this GW context find_failover_candidate( gw_id, nqn, gw_state, grpid, map_modified); } break; @@ -379,6 +411,49 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin return 0; } + int NVMeofGwMap::fsm_handle_gw_delete (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state , int grpid, bool &map_modified) + { + switch (state) + { + case GW_STANDBY_STATE: + case GW_IDLE_STATE: + // nothing to do + break; + + case GW_WAIT_FAILBACK_PREPARED: + { + cancel_timer(gw_id, nqn, grpid); + auto subsyst_it = find_subsystem_map(nqn); + for (auto& itr : *subsyst_it){ + if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER) // found GW that was intended for Failback for this ana grp + { + dout(4) << "Warning: Outgoing Failback when GW is deleted - to rollback it" << nqn <<" GW " <sm_state[grpid] = GW_STANDBY_STATE; + } + break; + + default:{ + ceph_assert(false); + } + + } + return 0; + } + int NVMeofGwMap::fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& nqn, int grpid, bool &map_modified) { diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 024548c799a8..33eb094f55da 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -56,7 +56,8 @@ typedef enum { enum class GW_AVAILABILITY_E { GW_CREATED = 0, GW_AVAILABLE, - GW_UNAVAILABLE + GW_UNAVAILABLE, + GW_DELETED }; #define MAX_SUPPORTED_ANA_GROUPS 5 @@ -125,7 +126,7 @@ class NVMeofGwMap public: Monitor *mon= NULL;// just for logs in the mon module file GWMAP Gmap; - GWMETADATA Gmetadata;//TODO !!! this map is used in the processing of Gmap - so it should be add to the encode/decode + GWMETADATA Gmetadata; epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm bool delay_propose = false; bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode @@ -210,6 +211,7 @@ class NVMeofGwMap int _dump_gwmap(GWMAP & Gmap)const; int _dump_gwmap(std::stringstream &ss)const ; int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); + int cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); int handle_abandoned_ana_groups(bool &propose_pending); @@ -223,6 +225,7 @@ class NVMeofGwMap private: int fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); + int fsm_handle_gw_delete (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); int fsm_handle_gw_up (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); int fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& nqn, int grpid, bool &map_modified); @@ -250,6 +253,12 @@ class NVMeofGwMap return 0; } + int delete_metadata(const GW_ID_T& gw_id, const std::string & nqn) + { + if(Gmetadata[nqn].size() != 0) + Gmetadata[nqn].erase(gw_id); + return 0; + } int start_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) { @@ -291,7 +300,7 @@ class NVMeofGwMap if(metadata->anagrp_sm_tstamps[i] != INVALID_GW_TIMER) break; if(i==MAX_SUPPORTED_ANA_GROUPS){ - Gmetadata[nqn].clear(); // remove all gw_id timers from the map + Gmetadata[nqn].erase(gw_id); // remove all gw_id timers from the map } } else { diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index add280671ad3..9be4345f6e05 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -1,389 +1,401 @@ -/* - * NVMeGWMonitor.cc - * - * Created on: Oct 17, 2023 - * Author: - */ - - -#include -#include "include/stringify.h" -#include "NVMeofGwMon.h" -#include "messages/MNVMeofGwBeacon.h" -#include "messages/MNVMeofGwMap.h" - -using std::map; -using std::make_pair; -using std::ostream; -using std::ostringstream; - - -#define dout_subsys ceph_subsys_mon -#undef dout_prefix -#define dout_prefix _prefix(_dout, this, this) -using namespace TOPNSPC::common; - -static ostream& _prefix(std::ostream *_dout, const NVMeofGwMon *h,//const Monitor &mon, - const NVMeofGwMon *hmon) { - return *_dout << "gw-mon." << hmon->mon.name << "@" << hmon->mon.rank; -} -#define MY_MON_PREFFIX " NVMeGW " - - -void NVMeofGwMon::init(){ - dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; -} - -void NVMeofGwMon::on_restart(){ - dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; - last_beacon.clear(); - last_tick = ceph::coarse_mono_clock::now(); -} - - -void NVMeofGwMon::on_shutdown() { - -} - -static int cnt ; -#define start_cnt 6 -void NVMeofGwMon::inject1(){ - bool propose = false; - if( ++cnt == 4 ){// simulation that new configuration was added - pending_map.cfg_add_gw("gw1", "nqn2008.node1", 1); - pending_map.cfg_add_gw("gw2", "nqn2008.node1", 2); - pending_map.cfg_add_gw("gw3", "nqn2008.node1", 3); - pending_map.cfg_add_gw("gw1", "nqn2008.node2", 2); - pending_map._dump_gwmap(pending_map.Gmap); - pending_map.debug_encode_decode(); - dout(4) << "Dump map after decode encode:" <("mon_mgr_beacon_grace");//TODO - dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; - - - last_tick = now; - bool propose = false; - - pending_map.update_active_timers(propose); // Periodic: check active FSM timers - _propose_pending |= propose; - - - //TODO handle exception of tick overdued in oreder to avoid false detection of overdued beacons , see MgrMonitor::tick - - const auto cutoff = now - nvmegw_beacon_grace; - for(auto &itr : last_beacon){// Pass over all the stored beacons - auto last_beacon_time = itr.second; - GW_ID_T gw_id; - std::string nqn; - if(last_beacon_time < cutoff){ - get_gw_and_nqn_from_key(itr.first, gw_id, nqn); - dout(4) << "beacon timeout for GW " << gw_id << dendl; - pending_map.process_gw_map_gw_down( gw_id, nqn, propose); - _propose_pending |= propose; - last_beacon.erase(itr.first); - } - else{ - dout(4) << "beacon live for GW " << gw_id << dendl; - } - } - - pending_map.handle_abandoned_ana_groups(propose); // Periodic: take care of not handled ANA groups - _propose_pending |= propose; - - if(_propose_pending){ - //pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos" - dout(4) << "decision to delayed_map" <type == "NVMeofGw") { - dout(4) << "sub->next , map-epoch " << sub->next << " " << map.get_epoch() << dendl; - if (sub->next <= map.get_epoch()) - { - dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl; - sub->session->con->send_message2(make_message(map)); - - - if (sub->onetime) { - mon.session_map.remove_sub(sub); - } else { - sub->next = map.get_epoch() + 1; - } - } - //} - -} - - -void NVMeofGwMon::check_subs(bool t) -{ - const std::string type = "NVMeofGw"; - dout(4) << MY_MON_PREFFIX << __func__ << " count " << mon.session_map.subs.count(type) << dendl; - - if (mon.session_map.subs.count(type) == 0){ - return; - } - for (auto sub : *(mon.session_map.subs[type])) { - dout(4) << "sub-type "<< sub->type << " delay_propose until next tick" << t << dendl; - if (t) map.delay_propose = true; - else check_sub(sub); - } -} - - -bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - - auto m = op->get_req(); - switch (m->get_type()) { - case MSG_MNVMEOF_GW_BEACON: - return preprocess_beacon(op); - /* case MSG_MON_COMMAND: - try { - return preprocess_command(op); - } catch (const bad_cmd_get& e) { - bufferlist bl; - mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); - return true; - } -*/ - default: - mon.no_reply(op); - derr << "Unhandled message type " << m->get_type() << dendl; - return true; - } - return false; -} - -bool NVMeofGwMon::prepare_update(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - auto m = op->get_req(); - switch (m->get_type()) { - case MSG_MNVMEOF_GW_BEACON: - return prepare_beacon(op); - - case MSG_MON_COMMAND: - try { - return prepare_command(op); - } catch (const bad_cmd_get& e) { - bufferlist bl; - mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); - return false; /* nothing to propose! */ - } - - default: - mon.no_reply(op); - derr << "Unhandled message type " << m->get_type() << dendl; - return false; /* nothing to propose! */ - } - return true; -} - -bool NVMeofGwMon::preprocess_command(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - auto m = op->get_req(); - mon.no_reply(op); // we never reply to beacons - dout(4) << "beacon from " << m->get_type() << dendl; - return false; -} - -bool NVMeofGwMon::prepare_command(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - return true; -} - - -bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - auto m = op->get_req(); - mon.no_reply(op); // we never reply to beacons - dout(4) << "beacon from " << m->get_type() << dendl; - return false; // allways return false to call leader's prepare beacon -} - - -#define GW_DELIM ',' - -void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std::string& nqn) -{ - std::stringstream s1(key); - - std::getline(s1, gw_id, GW_DELIM); - std::getline(s1, nqn, GW_DELIM); -} - -bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - - auto m = op->get_req(); - - // dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; - std::stringstream out; - m->print(out); - dout(4) << out.str() <get_gw_id(); - GW_AVAILABILITY_E avail = m->get_availability(); - const GwSubsystems& subsystems = m->get_subsystems(); - bool propose = false; - - if(avail == GW_AVAILABILITY_E::GW_CREATED){ - // create gw call cfg_add_gw - for (const NqnState& st: subsystems) { - int rc = pending_map.cfg_add_gw( gw_id, st.nqn, st.opt_ana_gid ); - if(rc == -EEXIST){ - propose = true; // for synchronization with GW that starts - } - } - } - else if(avail == GW_AVAILABILITY_E::GW_AVAILABLE){ - - auto now = ceph::coarse_mono_clock::now(); - // check pending_map.epoch vs m->get_version() - if different - drop the beacon - - for (const NqnState& st: subsystems) { - last_beacon[(gw_id + GW_DELIM + st.nqn)] = now; - pending_map.process_gw_map_ka( gw_id, st.nqn, propose ); - } - } - else if(avail == GW_AVAILABILITY_E::GW_UNAVAILABLE){ // state set by GW client application - // TODO: remove from last_beacon if found . if gw was found in last_beacon call process_gw_map_gw_down - for (const NqnState& st: subsystems) { - - auto it = last_beacon.find(gw_id + GW_DELIM + st.nqn); - if (it != last_beacon.end()){ - last_beacon.erase(gw_id + GW_DELIM + st.nqn); - pending_map.process_gw_map_gw_down( gw_id, st.nqn, propose ); - } - } - } - if (propose){ - // pending_map.delay_propose = true; - dout(4) << "decision to delayed_map in prepare_beacon" < +#include "include/stringify.h" +#include "NVMeofGwMon.h" +#include "messages/MNVMeofGwBeacon.h" +#include "messages/MNVMeofGwMap.h" + +using std::map; +using std::make_pair; +using std::ostream; +using std::ostringstream; + + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, this, this) +using namespace TOPNSPC::common; + +static ostream& _prefix(std::ostream *_dout, const NVMeofGwMon *h,//const Monitor &mon, + const NVMeofGwMon *hmon) { + return *_dout << "gw-mon." << hmon->mon.name << "@" << hmon->mon.rank; +} +#define MY_MON_PREFFIX " NVMeGW " + + +void NVMeofGwMon::init(){ + dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; +} + +void NVMeofGwMon::on_restart(){ + dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; + last_beacon.clear(); + last_tick = ceph::coarse_mono_clock::now(); +} + + +void NVMeofGwMon::on_shutdown() { + +} + +static int cnt ; +#define start_cnt 6 +void NVMeofGwMon::inject1(){ + bool propose = false; + if( ++cnt == 4 ){// simulation that new configuration was added + pending_map.cfg_add_gw("gw1", "nqn2008.node1", 1); + pending_map.cfg_add_gw("gw2", "nqn2008.node1", 2); + pending_map.cfg_add_gw("gw3", "nqn2008.node1", 3); + pending_map.cfg_add_gw("gw1", "nqn2008.node2", 2); + pending_map._dump_gwmap(pending_map.Gmap); + pending_map.debug_encode_decode(); + dout(4) << "Dump map after decode encode:" <("mon_nvmeofgw_beacon_grace"); + dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; + + + last_tick = now; + bool propose = false; + + pending_map.update_active_timers(propose); // Periodic: check active FSM timers + _propose_pending |= propose; + + + //TODO handle exception of tick overdued in oreder to avoid false detection of overdued beacons , see MgrMonitor::tick + + const auto cutoff = now - nvmegw_beacon_grace; + for(auto &itr : last_beacon){// Pass over all the stored beacons + auto last_beacon_time = itr.second; + GW_ID_T gw_id; + std::string nqn; + if(last_beacon_time < cutoff){ + get_gw_and_nqn_from_key(itr.first, gw_id, nqn); + dout(4) << "beacon timeout for GW " << gw_id << dendl; + pending_map.process_gw_map_gw_down( gw_id, nqn, propose); + _propose_pending |= propose; + last_beacon.erase(itr.first); + } + else{ + dout(4) << "beacon live for GW " << gw_id << dendl; + } + } + + pending_map.handle_abandoned_ana_groups(propose); // Periodic: take care of not handled ANA groups + _propose_pending |= propose; + + if(_propose_pending){ + //pending_map.delay_propose = true; // not to send map to clients immediately in "update_from_paxos" + dout(4) << "decision to delayed_map" <type == "NVMeofGw") { + dout(4) << "sub->next , map-epoch " << sub->next << " " << map.get_epoch() << dendl; + if (sub->next <= map.get_epoch()) + { + dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl; + sub->session->con->send_message2(make_message(map)); + + + if (sub->onetime) { + mon.session_map.remove_sub(sub); + } else { + sub->next = map.get_epoch() + 1; + } + } + //} + +} + + +void NVMeofGwMon::check_subs(bool t) +{ + const std::string type = "NVMeofGw"; + dout(4) << MY_MON_PREFFIX << __func__ << " count " << mon.session_map.subs.count(type) << dendl; + + if (mon.session_map.subs.count(type) == 0){ + return; + } + for (auto sub : *(mon.session_map.subs[type])) { + dout(4) << "sub-type "<< sub->type << " delay_propose until next tick" << t << dendl; + if (t) map.delay_propose = true; + else check_sub(sub); + } +} + + +bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + + auto m = op->get_req(); + switch (m->get_type()) { + case MSG_MNVMEOF_GW_BEACON: + return preprocess_beacon(op); + /* case MSG_MON_COMMAND: + try { + return preprocess_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return true; + } +*/ + default: + mon.no_reply(op); + derr << "Unhandled message type " << m->get_type() << dendl; + return true; + } + return false; +} + +bool NVMeofGwMon::prepare_update(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + auto m = op->get_req(); + switch (m->get_type()) { + case MSG_MNVMEOF_GW_BEACON: + return prepare_beacon(op); + + case MSG_MON_COMMAND: + try { + return prepare_command(op); + } catch (const bad_cmd_get& e) { + bufferlist bl; + mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); + return false; /* nothing to propose! */ + } + + default: + mon.no_reply(op); + derr << "Unhandled message type " << m->get_type() << dendl; + return false; /* nothing to propose! */ + } + return true; +} + +bool NVMeofGwMon::preprocess_command(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + auto m = op->get_req(); + mon.no_reply(op); // we never reply to beacons + dout(4) << "beacon from " << m->get_type() << dendl; + return false; +} + +bool NVMeofGwMon::prepare_command(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + return true; +} + + +bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + auto m = op->get_req(); + mon.no_reply(op); // we never reply to beacons + dout(4) << "beacon from " << m->get_type() << dendl; + return false; // allways return false to call leader's prepare beacon +} + + +#define GW_DELIM ',' + +void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std::string& nqn) +{ + std::stringstream s1(key); + + std::getline(s1, gw_id, GW_DELIM); + std::getline(s1, nqn, GW_DELIM); +} + +bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ + dout(4) << MY_MON_PREFFIX <<__func__ << dendl; + GW_STATE_T* gw_state = NULL; + auto m = op->get_req(); + + // dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; + std::stringstream out; + m->print(out); + dout(4) << out.str() <get_gw_id(); + GW_AVAILABILITY_E avail = m->get_availability(); + const GwSubsystems& subsystems = m->get_subsystems(); + bool propose = false; + + // Validation gw is in the database + if(avail != GW_AVAILABILITY_E::GW_CREATED){ + for (const NqnState& st: subsystems) { + gw_state = pending_map.find_gw_map(gw_id, st.nqn); + if(gw_state == NULL) { + dout(4) << "ERROR: GW is not in database: " << gw_id <<" " << st.nqn << dendl; + ceph_assert(false);// TODO + } + } + } + + if(avail == GW_AVAILABILITY_E::GW_CREATED){ + // create gw call cfg_add_gw + for (const NqnState& st: subsystems) { + int rc = pending_map.cfg_add_gw( gw_id, st.nqn, st.opt_ana_gid ); + if(rc == -EEXIST){ + propose = true; // for synchronization with GW that starts + } + } + } + else if(avail == GW_AVAILABILITY_E::GW_AVAILABLE){ + + auto now = ceph::coarse_mono_clock::now(); + // check pending_map.epoch vs m->get_version() - if different - drop the beacon + + for (const NqnState& st: subsystems) { + last_beacon[(gw_id + GW_DELIM + st.nqn)] = now; + pending_map.process_gw_map_ka( gw_id, st.nqn, propose ); + } + } + else if(avail == GW_AVAILABILITY_E::GW_UNAVAILABLE){ // state set by GW client application + // TODO: remove from last_beacon if found . if gw was found in last_beacon call process_gw_map_gw_down + + for (const NqnState& st: subsystems) { + + auto it = last_beacon.find(gw_id + GW_DELIM + st.nqn); + if (it != last_beacon.end()){ + last_beacon.erase(gw_id + GW_DELIM + st.nqn); + pending_map.process_gw_map_gw_down( gw_id, st.nqn, propose ); + } + } + } + if (propose){ + // pending_map.delay_propose = true; + dout(4) << "decision to delayed_map in prepare_beacon" <("mgr_tick_period").count(), new LambdaContext([this](int r){ @@ -271,7 +301,20 @@ bool NVMeofGw::ms_dispatch2(const ref_t& m) int NVMeofGw::main(vector args) { - client_messenger->wait(); + for (auto &it: args){ + + if(!strcmp(it,"a")){ + GW_NAME = "GW1"; + ana_grp = 1; + } + else if(!strcmp(it,"b")){ + GW_NAME = "GW2"; + ana_grp = 2; + } + dout(0) << "Dump arg value: " << it << " "<< GW_NAME <<"Ana grpid: " << ana_grp << dendl; + } + + client_messenger->wait(); // Disable signal handlers unregister_async_signal_handler(SIGHUP, sighup_handler); From 5e0dd9f307ac746b3bd09b200d01bde137b92caf Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Mon, 13 Nov 2023 13:27:44 +0200 Subject: [PATCH 25/65] gateway monitor client: command line parameters Signed-off-by: Alexander Indenbaum --- .gitmodules | 2 +- src/nvmeof/NVMeofGw.cc | 136 +++++++++++++---------------------------- src/nvmeof/NVMeofGw.h | 7 +++ src/nvmeof/gateway | 2 +- 4 files changed, 51 insertions(+), 96 deletions(-) diff --git a/.gitmodules b/.gitmodules index 74d452227356..a5fa46943ca3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -77,4 +77,4 @@ url = https://github.com/open-telemetry/opentelemetry-cpp.git [submodule "src/nvmeof/gateway"] path = src/nvmeof/gateway - url = https://github.com/baum/ceph-nvmeof.git + url = https://github.com/ceph/ceph-nvmeof.git diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 2b649cce6be7..0b25d099cb8a 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -15,6 +15,7 @@ #include "common/errno.h" #include "common/signal.h" +#include "common/ceph_argparse.h" #include "include/compat.h" #include "include/stringify.h" @@ -25,6 +26,7 @@ #include "messages/MNVMeofGwBeacon.h" #include "messages/MNVMeofGwMap.h" #include "NVMeofGw.h" +#include "NVMeofGwClient.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mgr @@ -36,11 +38,6 @@ using std::string; using std::stringstream; using std::vector; - -static std::string GW_NAME = "NULL"; -static std::string NQN = "nqn.2004.subsystem1"; -static uint16_t ana_grp = 0; - NVMeofGw::NVMeofGw(int argc, const char **argv) : Dispatcher(g_ceph_context), monc{g_ceph_context, poolctx}, @@ -52,26 +49,6 @@ NVMeofGw::NVMeofGw(int argc, const char **argv) : orig_argc(argc), orig_argv(argv) { - /* char *cvalue = NULL; - int index; - int c; - - while ((c = getopt (argc, argv, "icnk:")) != -1) - switch (c) - { - case 'i': - cvalue = optarg; // set the name of the GW - if(*cvalue == 'a') - GW_NAME = "GW1"; - else if(*cvalue == 'b') - GW_NAME = "GW2"; - - break; - - default: - break; - } - */ } NVMeofGw::~NVMeofGw() = default; @@ -84,10 +61,34 @@ const char** NVMeofGw::get_tracked_conf_keys() const return KEYS; } - int NVMeofGw::init() { dout(0) << dendl; + std::string val; + auto args = argv_to_vec(orig_argc, orig_argv); + for (std::vector::iterator i = args.begin(); i != args.end(); ) { + if (ceph_argparse_double_dash(args, i)) { + break; + } else if (ceph_argparse_witharg(args, i, &val, "--name", (char*)NULL)) { + name = val; + } else if (ceph_argparse_witharg(args, i, &val, "--gateway-address", (char*)NULL)) { + gateway_address = val; + } else if (ceph_argparse_witharg(args, i, &val, "--server-key", (char*)NULL)) { + server_key = val; + } else if (ceph_argparse_witharg(args, i, &val, "--server-cert", (char*)NULL)) { + server_cert = val; + } else if (ceph_argparse_witharg(args, i, &val, "--client-cert", (char*)NULL)) { + client_cert = val; + } else { + ++i; + } + } + + ceph_assert(name != "" && gateway_address != ""); + + // todo + ceph_assert(server_key == "" && server_cert == "" && client_cert == ""); + init_async_signal_handler(); register_async_signal_handler(SIGHUP, sighup_handler); @@ -169,71 +170,31 @@ int NVMeofGw::init() return 0; } - - - -void NVMeofGw::send_config_beacon() -{ - ceph_assert(ceph_mutex_is_locked_by_me(lock)); - - - dout(0) << "sending config beacon as gid " << monc.get_global_id() << dendl; - - NqnState state = {NQN,{GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE},ana_grp}; - GwSubsystems subs;// {state}; - subs.push_back(state); - - auto m = ceph::make_message(GW_NAME, subs, GW_AVAILABILITY_E::GW_CREATED, 0); - monc.send_mon_message(std::move(m)); -} - void NVMeofGw::send_beacon() { ceph_assert(ceph_mutex_is_locked_by_me(lock)); dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; GwSubsystems subs; - NqnState state = {NQN,{GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE, GW_IDLE_STATE},ana_grp}; - - GW_STATE_T* gw_state = map.find_gw_map(GW_NAME, NQN); - if(gw_state){ // If some valid map is present - for(int i=0; i< MAX_SUPPORTED_ANA_GROUPS; i++){ - state.sm_state[i] = gw_state->sm_state[i]; - } - } - - subs.push_back(state); - - /*Debugging encode/decode : dont remove this code for now!! - MNVMeofGwBeacon mbeacon("GW1", subs, GW_AVAILABILITY_E::GW_AVAILABLE, 0); - std::stringstream out; - mbeacon.print(out); - dout(0) << out.str() <(GW_NAME, subs, GW_AVAILABILITY_E::GW_AVAILABLE, map.epoch); - monc.send_mon_message(std::move(m)); + NVMeofGwClient gw_client( + grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); + subsystems_info gw_subsystems; + bool ok = gw_client.get_subsystems(gw_subsystems); + dout(0) << "got gw response ok: " << ok << "susbsystems: " << gw_subsystems.subsystems() << dendl; + // TODO: create structured response for get_subsystems + + auto m = ceph::make_message( + name, + subs, + ok? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_CREATED, + map.epoch); + monc.send_mon_message(std::move(m)); } void NVMeofGw::tick() { - int static cnt = 0; dout(0) << dendl; + send_beacon(); - if(GW_NAME != "NULL") - { - if(cnt++ < 1) - send_config_beacon(); - else - send_beacon(); - } timer.add_event_after( g_conf().get_val("mgr_tick_period").count(), new LambdaContext([this](int r){ @@ -301,20 +262,7 @@ bool NVMeofGw::ms_dispatch2(const ref_t& m) int NVMeofGw::main(vector args) { - for (auto &it: args){ - - if(!strcmp(it,"a")){ - GW_NAME = "GW1"; - ana_grp = 1; - } - else if(!strcmp(it,"b")){ - GW_NAME = "GW2"; - ana_grp = 2; - } - dout(0) << "Dump arg value: " << it << " "<< GW_NAME <<"Ana grpid: " << ana_grp << dendl; - } - - client_messenger->wait(); + client_messenger->wait(); // Disable signal handlers unregister_async_signal_handler(SIGHUP, sighup_handler); diff --git a/src/nvmeof/NVMeofGw.h b/src/nvmeof/NVMeofGw.h index f629afec6d00..1655f347edda 100644 --- a/src/nvmeof/NVMeofGw.h +++ b/src/nvmeof/NVMeofGw.h @@ -28,6 +28,13 @@ class NVMeofGw : public Dispatcher, public md_config_obs_t { +private: + std::string name; + std::string gateway_address; + std::string server_key; + std::string server_cert; + std::string client_cert; + protected: ceph::async::io_context_pool poolctx; MonClient monc; diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 741e3c12f8a7..41a88dc48e2a 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 741e3c12f8a79fce220451a52ebd11dedb44332b +Subproject commit 41a88dc48e2a3cb7b4a2c5eb8c8bde7b85fe81f2 From ad9a3c4adfb21757995334440dd4842b814079c3 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Tue, 14 Nov 2023 08:29:34 +0000 Subject: [PATCH 26/65] some exception handling in mon, changes in map --- src/mon/NVMeofGwMap.h | 4 ++-- src/mon/NVMeofGwMon.cc | 27 ++++++++++++++++++++++----- src/nvmeof/NVMeofGw.cc | 21 ++++++++++++++++++++- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 33eb094f55da..f24471afe601 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -195,13 +195,13 @@ class NVMeofGwMap //NVMeofGwMap( ) {} - GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) + GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) const { auto it = Gmap.find(nqn); if (it != Gmap.end() /* && it->first == nqn*/) { auto it2 = it->second.find(gw_id); if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; - return &it2->second; + return (GW_STATE_T *) &it2->second; } } return NULL; diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 9be4345f6e05..248272ce0f64 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -130,6 +130,19 @@ void NVMeofGwMon::tick(){ const auto nvmegw_beacon_grace = g_conf().get_val("mon_nvmeofgw_beacon_grace"); dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; + const auto mgr_tick_period = g_conf().get_val("mgr_tick_period"); + + if (last_tick != ceph::coarse_mono_clock::zero() + && (now - last_tick > (nvmegw_beacon_grace - mgr_tick_period))) { + // This case handles either local slowness (calls being delayed + // for whatever reason) or cluster election slowness (a long gap + // between calls while an election happened) + dout(4) << __func__ << ": resetting beacon timeouts due to mon delay " + "(slow election?) of " << now - last_tick << " seconds" << dendl; + for (auto &i : last_beacon) { + i.second = now; + } + } last_tick = now; bool propose = false; @@ -138,7 +151,7 @@ void NVMeofGwMon::tick(){ _propose_pending |= propose; - //TODO handle exception of tick overdued in oreder to avoid false detection of overdued beacons , see MgrMonitor::tick + //TODO handle exception of tick overdued in order to avoid false detection of overdued beacons , see MgrMonitor::tick const auto cutoff = now - nvmegw_beacon_grace; for(auto &itr : last_beacon){// Pass over all the stored beacons @@ -147,13 +160,13 @@ void NVMeofGwMon::tick(){ std::string nqn; if(last_beacon_time < cutoff){ get_gw_and_nqn_from_key(itr.first, gw_id, nqn); - dout(4) << "beacon timeout for GW " << gw_id << dendl; + dout(4) << "beacon timeout for GW " << gw_id << " nqn " << nqn << dendl; pending_map.process_gw_map_gw_down( gw_id, nqn, propose); _propose_pending |= propose; last_beacon.erase(itr.first); } else{ - dout(4) << "beacon live for GW " << gw_id << dendl; + dout(4) << "beacon live for GW key: " << itr.first << dendl; } } @@ -231,8 +244,6 @@ void NVMeofGwMon::check_sub(Subscription *sub) sub->next = map.get_epoch() + 1; } } - //} - } @@ -319,6 +330,12 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ auto m = op->get_req(); mon.no_reply(op); // we never reply to beacons dout(4) << "beacon from " << m->get_type() << dendl; + MonSession *session = op->get_session(); + if (!session){ + dout(4) << "beacon no session " << dendl; + return true; + } + return false; // allways return false to call leader's prepare beacon } diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 0b25d099cb8a..77750af7f42b 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -242,9 +242,28 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) dout(0) << "received map epoch " << mp.get_epoch() << dendl; std::stringstream ss; mp._dump_gwmap(ss); - map = mp; dout(0) << ss.str() << dendl; + GW_STATE_T dummy_state { {GW_IDLE_STATE,} , ana_grp, GW_AVAILABILITY_E::GW_CREATED, 0 }; + GW_STATE_T* gw_state = map.find_gw_map(GW_NAME, NQN); + GW_STATE_T* new_gw_state = mp.find_gw_map(GW_NAME, NQN); + + //ceph_assert(new_gw_state); + if(!gw_state) + gw_state = &dummy_state; + if(new_gw_state) + for(int i=0; ism_state[i] != gw_state->sm_state[i]) + { + dout(0) << "inside:" << new_gw_state->sm_state[i] << " , "<< gw_state->sm_state[i] << dendl; + // build array of tuples : {ana-grpid , new-state, died_gw-id(in case the state = Active and ana-grpid != my_optimised_grpid) + } + } + + + + map = mp; + } bool NVMeofGw::ms_dispatch2(const ref_t& m) From b696a84c402529122695291dd92c42ca8088bb0a Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 19 Nov 2023 05:29:07 +0000 Subject: [PATCH 27/65] Add array of failover peers to the map Signed-off-by: Leonid Chernin --- src/mon/NVMeofGwMap.cc | 37 ++++++++++++++++++++++++++++--------- src/mon/NVMeofGwMap.h | 16 ++++++++++++---- src/nvmeof/NVMeofGw.cc | 11 +++++++---- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index c6ee261fe054..55a49f84449a 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -34,7 +34,7 @@ static std::string G_gw_ana_states[] = { int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid) { - GW_STATE_T state{ {GW_IDLE_STATE,} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, 0 }; + GW_STATE_T state{ {GW_IDLE_STATE,}, {"NULL","NULL","NULL","NULL","NULL",} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, 0 }; if (find_gw_map(gw_id, nqn)) { dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; @@ -118,6 +118,10 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } + ss << "Failover peers: " << std::endl << " "; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + ss << ptr.second.failover_peer[i] << " " ; + } ss << std::endl; } } @@ -134,6 +138,10 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } + ss << "Failover peers: " << std::endl << " "; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + ss << ptr.second.failover_peer[i] << " " ; + } ss << std::endl; } } @@ -171,11 +179,11 @@ int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& if (gw_state) { dout(4) << "GW down " << gw_id << " nqn " <availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; - for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i ++) { bool map_modified; fsm_handle_gw_down (gw_id, nqn, gw_state->sm_state[i], i, map_modified); if(map_modified) propose_pending = true; - gw_state->sm_state[i] = GW_STANDBY_STATE; + set_gw_standby_state(gw_state, i); } } else { @@ -284,10 +292,11 @@ int NVMeofGwMap::handle_abandoned_ana_groups(bool & propose) return 0; } -int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid) +int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &failed_gw_id, const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid) { GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); gw_state->sm_state[ANA_groupid] = GW_ACTIVE_STATE; + gw_state->failover_peer[ANA_groupid] = failed_gw_id; //publish_map_to_gws(nqn); dout(4) << "Set failower GW " << gw_id << " for ANA group " << (int)ANA_groupid << dendl; return 0; @@ -302,6 +311,8 @@ int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_id, const std::string& nqn, for (auto& itr : *subsyst_it) { //cout << "Found GW " << itr.second.gw_id << endl; if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { + ceph_assert(itr.second.failover_peer[gw_state->optimized_ana_group_id] == gw_id); + dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_id << dendl; itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation @@ -357,7 +368,7 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin } if (min_loaded_gw_id != ILLEGAL_GW_ID) { propose_pending = true; - set_failover_gw_for_ANA_group(min_loaded_gw_id, nqn, grpid); + set_failover_gw_for_ANA_group(gw_id, min_loaded_gw_id, nqn, grpid); } else propose_pending = false; gw_state->sm_state[grpid] = GW_STANDBY_STATE; @@ -428,7 +439,8 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER) // found GW that was intended for Failback for this ana grp { dout(4) << "Warning: Outgoing Failback when GW is deleted - to rollback it" << nqn <<" GW " <sm_state[grpid] = GW_STANDBY_STATE; + set_gw_standby_state(gw_state, grpid); } break; @@ -466,7 +478,7 @@ int NVMeofGwMap::fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& cancel_timer(gw_id, nqn, grpid); for (auto& itr : *subsyst_it) { if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - gw_state->sm_state[grpid] = GW_STANDBY_STATE; + set_gw_standby_state(gw_state, grpid); itr.second.sm_state[grpid] = GW_ACTIVE_STATE; dout(4) << "Failback from GW " << gw_id << " to " << itr.first << dendl; map_modified = true; @@ -477,7 +489,7 @@ int NVMeofGwMap::fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& itr.second.sm_state[grpid] = GW_ACTIVE_STATE; // GW failed and started during the persistency interval dout(4) << "Failback unsuccessfull. GW: " << itr.first << "becomes Active for the ana group " << grpid << dendl; } - gw_state->sm_state[grpid] = GW_STANDBY_STATE; + set_gw_standby_state(gw_state, grpid); dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << grpid << dendl; map_modified = true; break; @@ -486,3 +498,10 @@ int NVMeofGwMap::fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& } return 0; } + +int NVMeofGwMap::set_gw_standby_state(GW_STATE_T* gw_state, uint8_t ANA_groupid) +{ + gw_state->sm_state[ANA_groupid] = GW_STANDBY_STATE; + gw_state->failover_peer[ANA_groupid] = "NULL"; + return 0; +} diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index f24471afe601..90a12ae7401f 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -64,7 +64,8 @@ enum class GW_AVAILABILITY_E { #define INVALID_GW_TIMER 0xffff #define REDUNDANT_GW_ANA_GROUP_ID 0xFF typedef struct GW_STATE_T { - GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group + GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group + GW_ID_T failover_peer[MAX_SUPPORTED_ANA_GROUPS]; uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable uint64_t version; // version per all GWs of the same subsystem. subsystem version @@ -84,6 +85,9 @@ inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { for(int i = 0; i mmap) { dout(0) << "handle nvmeof gw map" << dendl; @@ -244,9 +247,9 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) mp._dump_gwmap(ss); dout(0) << ss.str() << dendl; - GW_STATE_T dummy_state { {GW_IDLE_STATE,} , ana_grp, GW_AVAILABILITY_E::GW_CREATED, 0 }; - GW_STATE_T* gw_state = map.find_gw_map(GW_NAME, NQN); - GW_STATE_T* new_gw_state = mp.find_gw_map(GW_NAME, NQN); + GW_STATE_T dummy_state { {GW_IDLE_STATE,} , {"NULL","NULL","NULL","NULL","NULL",} , 1/*ana_grp*/, GW_AVAILABILITY_E::GW_CREATED, 0 }; + GW_STATE_T* gw_state = map.find_gw_map(name, NQN); + GW_STATE_T* new_gw_state = mp.find_gw_map(name, NQN); //ceph_assert(new_gw_state); if(!gw_state) @@ -255,7 +258,7 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) for(int i=0; ism_state[i] != gw_state->sm_state[i]) { - dout(0) << "inside:" << new_gw_state->sm_state[i] << " , "<< gw_state->sm_state[i] << dendl; + dout(0) << " " << new_gw_state->sm_state[i] << " , "<< gw_state->sm_state[i] << dendl; // build array of tuples : {ana-grpid , new-state, died_gw-id(in case the state = Active and ana-grpid != my_optimised_grpid) } } From 7cd0c92c37ef5a320f57f8a2f23e24f223a3c3a3 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Mon, 20 Nov 2023 08:16:22 +0000 Subject: [PATCH 28/65] Initial beacon logic, still requires map handling Signed-off-by: Alexander Indenbaum --- src/messages/MNVMeofGwBeacon.h | 13 ++++++++++--- src/nvmeof/NVMeofGw.cc | 18 +++++++++++++++--- src/nvmeof/gateway | 2 +- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index 9a0604853089..d81098a76e1c 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -28,6 +28,12 @@ struct NqnState { std::string nqn; // subsystem NQN SM_STATE sm_state; // susbsystem's state machine state uint16_t opt_ana_gid; // optimized ANA group index + + // Default constructor + NqnState(const std::string& _nqn) : nqn(_nqn), opt_ana_gid(0) { + for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + sm_state[i] = GW_STATES_PER_AGROUP_E::GW_IDLE_STATE; + } }; typedef std::vector GwSubsystems; @@ -139,10 +145,11 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { decode(n, p); // Reserve memory for the vector to avoid reallocations subsystems.clear(); - //subsystems.reserve(n); + subsystems.reserve(n); for (int i = 0; i < n; i++) { - NqnState st; - decode(st.nqn, p); + std::string nqn; + decode(nqn, p); + NqnState st(nqn); for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { decode(tmp, p); st.sm_state[j] = static_cast(tmp); diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 709b106a163a..b56e6add339b 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -179,13 +179,25 @@ void NVMeofGw::send_beacon() grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); subsystems_info gw_subsystems; bool ok = gw_client.get_subsystems(gw_subsystems); - dout(0) << "got gw response ok: " << ok << "susbsystems: " << gw_subsystems.subsystems() << dendl; - // TODO: create structured response for get_subsystems + if (ok) { + for (int i = 0; i < gw_subsystems.subsystems_size(); i++) { + const subsystem& sub = gw_subsystems.subsystems(i); + struct NqnState nqn_state(sub.nqn()); + if (false) { // handled map already, update sm_state, opt_ana_gid + } + subs.push_back(nqn_state); + } + } + + GW_AVAILABILITY_E gw_availability = GW_AVAILABILITY_E::GW_CREATED; + if (false) { // handled map already + gw_availability = ok ? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_CREATED; + } auto m = ceph::make_message( name, subs, - ok? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_CREATED, + gw_availability, map.epoch); monc.send_mon_message(std::move(m)); } diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 41a88dc48e2a..2616e5fb1fe7 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 41a88dc48e2a3cb7b4a2c5eb8c8bde7b85fe81f2 +Subproject commit 2616e5fb1fe7d23ce7879c68a43a7c5c7caaeb51 From 2002ab291122b9ae38f0214be4f42d606441a541 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 20 Nov 2023 08:54:24 +0000 Subject: [PATCH 29/65] Monitor allocates ANA group for GW --- src/mon/NVMeofGwMap.cc | 36 ++- src/mon/NVMeofGwMap.h | 646 ++++++++++++++++++++--------------------- src/mon/NVMeofGwMon.cc | 12 +- 3 files changed, 349 insertions(+), 345 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 55a49f84449a..f450d21a0dc7 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -33,37 +33,41 @@ static std::string G_gw_ana_states[] = { }; -int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid) { - GW_STATE_T state{ {GW_IDLE_STATE,}, {"NULL","NULL","NULL","NULL","NULL",} , ana_grpid, GW_AVAILABILITY_E::GW_CREATED, 0 }; +int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn) { + + GW_STATE_T state{ {GW_IDLE_STATE,}, {""}, 0, GW_AVAILABILITY_E::GW_CREATED, 0 }; + for(int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) state.failover_peer[i] = "NULL"; if (find_gw_map(gw_id, nqn)) { dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; return -EEXIST ; } - if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) - { - dout(4) << __func__ << " ERROR create GW: " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; - return -EINVAL; - } //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert - // check that there is no GW in the DB configured on the same ANA-GRPID - + //Allocate AnaGrpId for the new GW + bool allocated[MAX_SUPPORTED_ANA_GROUPS+1] = {false}; auto subsyst_it = find_subsystem_map(nqn); if(subsyst_it) { for (auto& itr : *subsyst_it) - if (itr.second.optimized_ana_group_id == ana_grpid) { - dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA group in use " <<(int)ana_grpid << " by GW " << itr.first << dendl; - return -EINVAL; - } + allocated[itr.second.optimized_ana_group_id] = true; + //dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA group in use " <<(int)ana_grpid << " by GW " << itr.first << dendl; } - - if(Gmap[nqn].size() ==0 ) + for(int i=1; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ + if (allocated[i] == false){ + state.optimized_ana_group_id = i; + break; + } + } + if(state.optimized_ana_group_id == 0){ + dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA groupId was not allocated " << dendl; + return -EINVAL; + } + if(Gmap[nqn].size() == 0) Gmap.insert(make_pair(nqn, SUBSYST_GWMAP())); Gmap[nqn].insert({gw_id, state}); create_metadata(gw_id, nqn); - dout(4) << " Add GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << ana_grpid << dendl; + dout(4) << " Add GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state.optimized_ana_group_id << dendl; return 0; } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 90a12ae7401f..d7f4e33e776d 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -1,323 +1,323 @@ -/* - * NVMeofGwMap.h - * - * Created on: Oct 17, 2023 - * Author: 227870756 - */ - -#ifndef MON_NVMEOFGWMAP_H_ -#define MON_NVMEOFGWMAP_H_ -#include "string" -#include -#include "map" -#include -#include -#include -#include "include/encoding.h" -#include "include/utime.h" -#include "common/Formatter.h" -#include "common/ceph_releases.h" -#include "common/version.h" -#include "common/options.h" -#include "common/Clock.h" -#include "PaxosService.h" -#include "msg/Message.h" -#include "common/ceph_time.h" -/*#include "NVMeofGwMon.h" - -using std::ostream; - -#define dout_subsys ceph_subsys_mon -#undef dout_prefix -#define dout_prefix _prefix(_dout, mon, this) -using namespace TOPNSPC::common; - -class NVMeofGwMap; - -inline ostream& _prefix(std::ostream *_dout, const Monitor &mon, - const NVMeofGwMap *map) { - return *_dout << "mon." << mon.name << "@" << mon.rank; -} - */ - -using ceph::coarse_mono_clock; - - -using GW_ID_T = std::string; - -typedef enum { - GW_IDLE_STATE = 0, //invalid state - GW_STANDBY_STATE, - GW_ACTIVE_STATE, - GW_BLOCKED_AGROUP_OWNER, - GW_WAIT_FAILBACK_PREPARED -}GW_STATES_PER_AGROUP_E; - -enum class GW_AVAILABILITY_E { - GW_CREATED = 0, - GW_AVAILABLE, - GW_UNAVAILABLE, - GW_DELETED -}; - -#define MAX_SUPPORTED_ANA_GROUPS 5 -#define INVALID_GW_TIMER 0xffff -#define REDUNDANT_GW_ANA_GROUP_ID 0xFF -typedef struct GW_STATE_T { - GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group - GW_ID_T failover_peer[MAX_SUPPORTED_ANA_GROUPS]; - uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF - GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable - uint64_t version; // version per all GWs of the same subsystem. subsystem version -}GW_STATE_T; - -typedef struct GW_METADATA_T { - int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state -}GW_METADATA_T; - - -using GWMAP = std::map >; -using GWMETADATA = std::map >; -using SUBSYST_GWMAP = std::map; -using SUBSYST_GWMETA = std::map; - -inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { - for(int i = 0; i module_options; - void encode(ceph::buffer::list &bl) const { - ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); - encode((int) epoch, bl);// global map epoch - // encode(delay_propose,bl); - encode ((int)Gmap.size(),bl); // number nqn - for (auto& itr : Gmap) { - encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : map - } - // Encode Gmetadata - encode ((int)Gmetadata.size(),bl); - for (auto& itr : Gmetadata) { - encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : - } - - ENCODE_FINISH(bl); - } - - void decode(ceph::buffer::list::const_iterator &bl) { - DECODE_START(1, bl); - int num_subsystems; - std::string nqn; - decode(epoch, bl); - // decode(delay_propose,bl); - decode(num_subsystems, bl); - SUBSYST_GWMAP gw_map; - Gmap.clear(); - //_dump_gwmap(Gmap); - for(int i = 0; i < num_subsystems; i++){ - decode(nqn, bl); - Gmap.insert(make_pair(nqn, std::map())); - //decode the map - gw_map.clear(); - decode(gw_map, bl); - //insert the qw_map to Gmap - for(auto &itr: gw_map ){ - Gmap[nqn].insert({itr.first, itr.second}); - } - } - // decode Gmetadata - decode(num_subsystems, bl); - SUBSYST_GWMETA gw_meta; - Gmetadata.clear(); - //_dump_gwmap(Gmap); - for(int i = 0; i < num_subsystems; i++){ - decode(nqn, bl); - Gmetadata.insert(make_pair(nqn, std::map())); - //decode the map - gw_meta.clear(); - decode(gw_meta, bl); - //insert the gw_meta to Gmap - for(auto &itr: gw_meta ){ - Gmetadata[nqn].insert({itr.first, itr.second}); - } - } - DECODE_FINISH(bl); - } - - //NVMeofGwMap( ) {} - - GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) const - { - auto it = Gmap.find(nqn); - if (it != Gmap.end() /* && it->first == nqn*/) { - auto it2 = it->second.find(gw_id); - if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; - return (GW_STATE_T *) &it2->second; - } - } - return NULL; - } - int update_active_timers( bool &propose_pending); - epoch_t get_epoch() const { return epoch; } - int _dump_gwmap(GWMAP & Gmap)const; - int _dump_gwmap(std::stringstream &ss)const ; - int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); - int cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); - int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); - int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); - int handle_abandoned_ana_groups(bool &propose_pending); - - void debug_encode_decode(){ - ceph::buffer::list bl; - encode(bl); - auto p = bl.cbegin(); - decode(p); - } - -private: - int fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); - int fsm_handle_gw_delete (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); - int fsm_handle_gw_up (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); - int fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& nqn, int grpid, bool &map_modified); - - int find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, int grpid, bool &propose_pending); - int find_failback_gw (const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &found); - int set_failover_gw_for_ANA_group (const GW_ID_T &failed_gw_id, const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid); - int set_gw_standby_state(GW_STATE_T* gw_state, uint8_t ANA_groupid); - - SUBSYST_GWMAP * find_subsystem_map(const std::string& nqn) - { - auto it = Gmap.find(nqn); - if (it != Gmap.end() ){ - return &it->second; - } - return NULL; - } - - int create_metadata(const GW_ID_T& gw_id, const std::string & nqn) - { - - if(Gmetadata[nqn].size() == 0) - Gmetadata.insert(make_pair(nqn, std::map())); - //Gmetadata[nqn].insert({ gw_id, new_metadata }); - return 0; - } - - int delete_metadata(const GW_ID_T& gw_id, const std::string & nqn) - { - if(Gmetadata[nqn].size() != 0) - Gmetadata[nqn].erase(gw_id); - return 0; - } - - int start_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) - { - GW_METADATA_T* metadata; - //const auto now = ceph::coarse_mono_clock::now(); - if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { - metadata->anagrp_sm_tstamps[anagrpid] = 0;// set timer - } - else { - GW_METADATA_T new_metadata = {INVALID_GW_TIMER,}; - for (int i=0; ianagrp_sm_tstamps[anagrpid] != INVALID_GW_TIMER); - return metadata->anagrp_sm_tstamps[anagrpid]; - } - else{ - ceph_assert(false); - } - } - - int cancel_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) - { - GW_METADATA_T* metadata; - int i; - if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { - metadata->anagrp_sm_tstamps[anagrpid] = INVALID_GW_TIMER; - for(i=0; ianagrp_sm_tstamps[i] != INVALID_GW_TIMER) - break; - if(i==MAX_SUPPORTED_ANA_GROUPS){ - Gmetadata[nqn].erase(gw_id); // remove all gw_id timers from the map - } - } - else { - ceph_assert(false); - } - return 0; - } - - GW_METADATA_T* find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn); -}; - -#endif /* SRC_MON_NVMEOFGWMAP_H_ */ +/* + * NVMeofGwMap.h + * + * Created on: Oct 17, 2023 + * Author: 227870756 + */ + +#ifndef MON_NVMEOFGWMAP_H_ +#define MON_NVMEOFGWMAP_H_ +#include "string" +#include +#include "map" +#include +#include +#include +#include "include/encoding.h" +#include "include/utime.h" +#include "common/Formatter.h" +#include "common/ceph_releases.h" +#include "common/version.h" +#include "common/options.h" +#include "common/Clock.h" +#include "PaxosService.h" +#include "msg/Message.h" +#include "common/ceph_time.h" +/*#include "NVMeofGwMon.h" + +using std::ostream; + +#define dout_subsys ceph_subsys_mon +#undef dout_prefix +#define dout_prefix _prefix(_dout, mon, this) +using namespace TOPNSPC::common; + +class NVMeofGwMap; + +inline ostream& _prefix(std::ostream *_dout, const Monitor &mon, + const NVMeofGwMap *map) { + return *_dout << "mon." << mon.name << "@" << mon.rank; +} + */ + +using ceph::coarse_mono_clock; + + +using GW_ID_T = std::string; + +typedef enum { + GW_IDLE_STATE = 0, //invalid state + GW_STANDBY_STATE, + GW_ACTIVE_STATE, + GW_BLOCKED_AGROUP_OWNER, + GW_WAIT_FAILBACK_PREPARED +}GW_STATES_PER_AGROUP_E; + +enum class GW_AVAILABILITY_E { + GW_CREATED = 0, + GW_AVAILABLE, + GW_UNAVAILABLE, + GW_DELETED +}; + +#define MAX_SUPPORTED_ANA_GROUPS 5 +#define INVALID_GW_TIMER 0xffff +#define REDUNDANT_GW_ANA_GROUP_ID 0xFF +typedef struct GW_STATE_T { + GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group + GW_ID_T failover_peer[MAX_SUPPORTED_ANA_GROUPS]; + uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable + uint64_t version; // version per all GWs of the same subsystem. subsystem version +}GW_STATE_T; + +typedef struct GW_METADATA_T { + int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state +}GW_METADATA_T; + + +using GWMAP = std::map >; +using GWMETADATA = std::map >; +using SUBSYST_GWMAP = std::map; +using SUBSYST_GWMETA = std::map; + +inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { + for(int i = 0; i module_options; + void encode(ceph::buffer::list &bl) const { + ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); + encode((int) epoch, bl);// global map epoch + // encode(delay_propose,bl); + encode ((int)Gmap.size(),bl); // number nqn + for (auto& itr : Gmap) { + encode((const std::string &)itr.first, bl);// nqn + encode( itr.second, bl);// encode the full map of this nqn : map + } + // Encode Gmetadata + encode ((int)Gmetadata.size(),bl); + for (auto& itr : Gmetadata) { + encode((const std::string &)itr.first, bl);// nqn + encode( itr.second, bl);// encode the full map of this nqn : + } + + ENCODE_FINISH(bl); + } + + void decode(ceph::buffer::list::const_iterator &bl) { + DECODE_START(1, bl); + int num_subsystems; + std::string nqn; + decode(epoch, bl); + // decode(delay_propose,bl); + decode(num_subsystems, bl); + SUBSYST_GWMAP gw_map; + Gmap.clear(); + //_dump_gwmap(Gmap); + for(int i = 0; i < num_subsystems; i++){ + decode(nqn, bl); + Gmap.insert(make_pair(nqn, std::map())); + //decode the map + gw_map.clear(); + decode(gw_map, bl); + //insert the qw_map to Gmap + for(auto &itr: gw_map ){ + Gmap[nqn].insert({itr.first, itr.second}); + } + } + // decode Gmetadata + decode(num_subsystems, bl); + SUBSYST_GWMETA gw_meta; + Gmetadata.clear(); + //_dump_gwmap(Gmap); + for(int i = 0; i < num_subsystems; i++){ + decode(nqn, bl); + Gmetadata.insert(make_pair(nqn, std::map())); + //decode the map + gw_meta.clear(); + decode(gw_meta, bl); + //insert the gw_meta to Gmap + for(auto &itr: gw_meta ){ + Gmetadata[nqn].insert({itr.first, itr.second}); + } + } + DECODE_FINISH(bl); + } + + //NVMeofGwMap( ) {} + + GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) const + { + auto it = Gmap.find(nqn); + if (it != Gmap.end() /* && it->first == nqn*/) { + auto it2 = it->second.find(gw_id); + if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; + return (GW_STATE_T *) &it2->second; + } + } + return NULL; + } + int update_active_timers( bool &propose_pending); + epoch_t get_epoch() const { return epoch; } + int _dump_gwmap(GWMAP & Gmap)const; + int _dump_gwmap(std::stringstream &ss)const ; + int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn); + int cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); + int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); + int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); + int handle_abandoned_ana_groups(bool &propose_pending); + + void debug_encode_decode(){ + ceph::buffer::list bl; + encode(bl); + auto p = bl.cbegin(); + decode(p); + } + +private: + int fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); + int fsm_handle_gw_delete (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); + int fsm_handle_gw_up (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); + int fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& nqn, int grpid, bool &map_modified); + + int find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, int grpid, bool &propose_pending); + int find_failback_gw (const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &found); + int set_failover_gw_for_ANA_group (const GW_ID_T &failed_gw_id, const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid); + int set_gw_standby_state(GW_STATE_T* gw_state, uint8_t ANA_groupid); + + SUBSYST_GWMAP * find_subsystem_map(const std::string& nqn) + { + auto it = Gmap.find(nqn); + if (it != Gmap.end() ){ + return &it->second; + } + return NULL; + } + + int create_metadata(const GW_ID_T& gw_id, const std::string & nqn) + { + + if(Gmetadata[nqn].size() == 0) + Gmetadata.insert(make_pair(nqn, std::map())); + //Gmetadata[nqn].insert({ gw_id, new_metadata }); + return 0; + } + + int delete_metadata(const GW_ID_T& gw_id, const std::string & nqn) + { + if(Gmetadata[nqn].size() != 0) + Gmetadata[nqn].erase(gw_id); + return 0; + } + + int start_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) + { + GW_METADATA_T* metadata; + //const auto now = ceph::coarse_mono_clock::now(); + if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { + metadata->anagrp_sm_tstamps[anagrpid] = 0;// set timer + } + else { + GW_METADATA_T new_metadata = {INVALID_GW_TIMER,}; + for (int i=0; ianagrp_sm_tstamps[anagrpid] != INVALID_GW_TIMER); + return metadata->anagrp_sm_tstamps[anagrpid]; + } + else{ + ceph_assert(false); + } + } + + int cancel_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) + { + GW_METADATA_T* metadata; + int i; + if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { + metadata->anagrp_sm_tstamps[anagrpid] = INVALID_GW_TIMER; + for(i=0; ianagrp_sm_tstamps[i] != INVALID_GW_TIMER) + break; + if(i==MAX_SUPPORTED_ANA_GROUPS){ + Gmetadata[nqn].erase(gw_id); // remove all gw_id timers from the map + } + } + else { + ceph_assert(false); + } + return 0; + } + + GW_METADATA_T* find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn); +}; + +#endif /* SRC_MON_NVMEOFGWMAP_H_ */ diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 248272ce0f64..828153fb0d1a 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -50,17 +50,17 @@ static int cnt ; void NVMeofGwMon::inject1(){ bool propose = false; if( ++cnt == 4 ){// simulation that new configuration was added - pending_map.cfg_add_gw("gw1", "nqn2008.node1", 1); - pending_map.cfg_add_gw("gw2", "nqn2008.node1", 2); - pending_map.cfg_add_gw("gw3", "nqn2008.node1", 3); - pending_map.cfg_add_gw("gw1", "nqn2008.node2", 2); + pending_map.cfg_add_gw("gw1", "nqn2008.node1" ); + pending_map.cfg_add_gw("gw2", "nqn2008.node1" ); + pending_map.cfg_add_gw("gw3", "nqn2008.node1" ); + pending_map.cfg_add_gw("gw1", "nqn2008.node2" ); pending_map._dump_gwmap(pending_map.Gmap); pending_map.debug_encode_decode(); dout(4) << "Dump map after decode encode:" < Date: Mon, 20 Nov 2023 21:04:29 +0000 Subject: [PATCH 30/65] gateway container and start up integration Signed-off-by: Alexander Indenbaum --- src/nvmeof/gateway | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 2616e5fb1fe7..ac925fae220e 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 2616e5fb1fe7d23ce7879c68a43a7c5c7caaeb51 +Subproject commit ac925fae220ee1a1dffa3f22196de3863eb4eac2 From 84af81425e1ac4d005d4adeac5804760a6b68621 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 21 Nov 2023 15:02:27 +0000 Subject: [PATCH 31/65] Utilize distinct command line arguments. Signed-off-by: Alexander Indenbaum --- src/nvmeof/NVMeofGw.cc | 4 +++- src/nvmeof/gateway | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index b56e6add339b..5ba026ef8ed4 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -66,10 +66,11 @@ int NVMeofGw::init() dout(0) << dendl; std::string val; auto args = argv_to_vec(orig_argc, orig_argv); + for (std::vector::iterator i = args.begin(); i != args.end(); ) { if (ceph_argparse_double_dash(args, i)) { break; - } else if (ceph_argparse_witharg(args, i, &val, "--name", (char*)NULL)) { + } else if (ceph_argparse_witharg(args, i, &val, "--gateway-name", (char*)NULL)) { name = val; } else if (ceph_argparse_witharg(args, i, &val, "--gateway-address", (char*)NULL)) { gateway_address = val; @@ -84,6 +85,7 @@ int NVMeofGw::init() } } + dout(0) << "gateway name: " << name << " address: " << gateway_address << dendl; ceph_assert(name != "" && gateway_address != ""); // todo diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index ac925fae220e..64b614d30037 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit ac925fae220ee1a1dffa3f22196de3863eb4eac2 +Subproject commit 64b614d300372a14796ab49c1b63921c999785dd From 297ba8924bc4750bf8b95e79ee88822febd6209a Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 21 Nov 2023 15:24:39 +0000 Subject: [PATCH 32/65] pass ceph configuration Signed-off-by: Alexander Indenbaum --- src/nvmeof/gateway | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 64b614d30037..e35283825697 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 64b614d300372a14796ab49c1b63921c999785dd +Subproject commit e35283825697f270cb2f8cfabd078eab16db615f From cd05fcbf5bd27e693785a8e0ce20c9ff1dc94824 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 22 Nov 2023 13:42:50 +0000 Subject: [PATCH 33/65] Beacon logic Signed-off-by: Leonid Chernin --- src/nvmeof/NVMeofGw.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 5ba026ef8ed4..22e01f5b55a6 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -185,15 +185,19 @@ void NVMeofGw::send_beacon() for (int i = 0; i < gw_subsystems.subsystems_size(); i++) { const subsystem& sub = gw_subsystems.subsystems(i); struct NqnState nqn_state(sub.nqn()); - if (false) { // handled map already, update sm_state, opt_ana_gid + if (map.epoch > 0) { // handled map already, update sm_state, opt_ana_gid + GW_STATE_T* gw_state = map.find_gw_map(name, nqn_state.nqn); + nqn_state.opt_ana_gid = gw_state->optimized_ana_group_id; + for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + nqn_state.sm_state[i] = gw_state->sm_state[i]; } subs.push_back(nqn_state); } } GW_AVAILABILITY_E gw_availability = GW_AVAILABILITY_E::GW_CREATED; - if (false) { // handled map already - gw_availability = ok ? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_CREATED; + if (map.epoch > 0) { // handled map already + gw_availability = ok ? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_UNAVAILABLE; } auto m = ceph::make_message( From 6390dae3173432b4792872b56818eb161660efb6 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 22 Nov 2023 14:18:38 +0000 Subject: [PATCH 34/65] null pointer fix Signed-off-by: Leonid Chernin --- src/nvmeof/NVMeofGw.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 22e01f5b55a6..47e4675b48e3 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -187,9 +187,11 @@ void NVMeofGw::send_beacon() struct NqnState nqn_state(sub.nqn()); if (map.epoch > 0) { // handled map already, update sm_state, opt_ana_gid GW_STATE_T* gw_state = map.find_gw_map(name, nqn_state.nqn); - nqn_state.opt_ana_gid = gw_state->optimized_ana_group_id; - for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - nqn_state.sm_state[i] = gw_state->sm_state[i]; + if (gw_state) { + nqn_state.opt_ana_gid = gw_state->optimized_ana_group_id; + for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + nqn_state.sm_state[i] = gw_state->sm_state[i]; + } } subs.push_back(nqn_state); } From 0eaa84b920b20ef7ed3954eb515d6906c68c5cca Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 23 Nov 2023 07:48:58 +0000 Subject: [PATCH 35/65] Process ceph commands gw-create gw delete in NVMeofGwMon Signed-off-by: Leonid Chernin --- src/mon/MonCommands.h | 14 ++++++ src/mon/NVMeofGwMap.cc | 18 +++++--- src/mon/NVMeofGwMap.h | 8 ++-- src/mon/NVMeofGwMon.cc | 98 +++++++++++++++++++++++++++++++++++++----- 4 files changed, 117 insertions(+), 21 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 74ef2206c02b..2b87f98e412d 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1339,6 +1339,20 @@ COMMAND("config generate-minimal-conf", "Generate a minimal ceph.conf file", "config", "r") +/* NVMeofGwMon*/ +COMMAND("nvme-gw create " + "name=subsystem-nqn,type=CephString" + "name=ids,type=CephString,n=N", + "set gw(s) [...] create, " + "create gws inside subsystem", + "gw", "rw") +COMMAND("nvme-gw delete " + "name=subsystem-nqn,type=CephString" + "name=ids,type=CephString,n=N", + "set gw(s) [...] delete, " + "delete gws inside subsystem", + "gw", "rw") + diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index f450d21a0dc7..7f251423e945 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -72,20 +72,24 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn) { } -int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid){ +int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, bool & map_modified){ - if (!find_gw_map(gw_id, nqn)) { + GW_STATE_T * state; + if (!(state = find_gw_map(gw_id, nqn) ) ) { dout(4) << __func__ << " ERROR :GW not found in map " << gw_id << dendl; return -ENODEV ; } - if (ana_grpid >= MAX_SUPPORTED_ANA_GROUPS && ana_grpid != REDUNDANT_GW_ANA_GROUP_ID) - { - dout(4) << __func__ << " ERROR :GW " << gw_id << " bad ANA group " <<(int)ana_grpid << dendl; - return -EINVAL ; + //TODO for all ana groups call fsm_handle_gw_delete + bool modified = false; + map_modified = false; + for(int i=0; ism_state[i], i, modified); + map_modified |= modified; } + dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; Gmap[nqn].erase(gw_id); delete_metadata(gw_id, nqn); - dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << ana_grpid << dendl; + return 0; } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index d7f4e33e776d..2efa75ed5d6d 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -219,10 +219,10 @@ class NVMeofGwMap epoch_t get_epoch() const { return epoch; } int _dump_gwmap(GWMAP & Gmap)const; int _dump_gwmap(std::stringstream &ss)const ; - int cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn); - int cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, uint16_t ana_grpid); - int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); - int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); + int cfg_add_gw (const GW_ID_T &gw_id, const std::string& nqn); + int cfg_delete_gw (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); + int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); + int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); int handle_abandoned_ana_groups(bool &propose_pending); void debug_encode_decode(){ diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 828153fb0d1a..22601607c502 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -16,7 +16,8 @@ using std::map; using std::make_pair; using std::ostream; using std::ostringstream; - +using std::string; +using std::vector; #define dout_subsys ceph_subsys_mon #undef dout_prefix @@ -268,9 +269,11 @@ bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ auto m = op->get_req(); switch (m->get_type()) { + case MSG_MNVMEOF_GW_BEACON: return preprocess_beacon(op); - /* case MSG_MON_COMMAND: + + case MSG_MON_COMMAND: try { return preprocess_command(op); } catch (const bad_cmd_get& e) { @@ -278,7 +281,7 @@ bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ mon.reply_command(op, -EINVAL, e.what(), bl, get_last_committed()); return true; } -*/ + default: mon.no_reply(op); derr << "Unhandled message type " << m->get_type() << dendl; @@ -311,16 +314,91 @@ bool NVMeofGwMon::prepare_update(MonOpRequestRef op){ return true; } -bool NVMeofGwMon::preprocess_command(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - auto m = op->get_req(); - mon.no_reply(op); // we never reply to beacons - dout(4) << "beacon from " << m->get_type() << dendl; +bool NVMeofGwMon::preprocess_command(MonOpRequestRef op) +{ + dout(4) << MY_MON_PREFFIX << __func__ << dendl; + auto m = op->get_req(); + std::stringstream ss; + bufferlist rdata; + + cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) + { + string rs = ss.str(); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + return true; + } + MonSession *session = op->get_session(); + if (!session) + { + mon.reply_command(op, -EACCES, "access denied", rdata, + get_last_committed()); + return true; + } + string format = cmd_getval_or(cmdmap, "format", "plain"); + boost::scoped_ptr f(Formatter::create(format)); + + string prefix; + cmd_getval(cmdmap, "prefix", prefix); + dout(4) << "MonCommand : "<< prefix << dendl; + // TODO need to check formatter per preffix - if f is NULL + return false; } -bool NVMeofGwMon::prepare_command(MonOpRequestRef op){ - dout(4) << MY_MON_PREFFIX <<__func__ << dendl; +bool NVMeofGwMon::prepare_command(MonOpRequestRef op) +{ + dout(4) << MY_MON_PREFFIX << __func__ << dendl; + auto m = op->get_req(); + int rc; + std::stringstream ss; + bufferlist rdata; + + cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) + { + string rs = ss.str(); + mon.reply_command(op, -EINVAL, rs, rdata, get_last_committed()); + return true; + } + + MonSession *session = op->get_session(); + if (!session) + { + mon.reply_command(op, -EACCES, "access denied", rdata, get_last_committed()); + return true; + } + + string format = cmd_getval_or(cmdmap, "format", "plain"); + boost::scoped_ptr f(Formatter::create(format)); + + const auto prefix = cmd_getval_or(cmdmap, "prefix", string{}); + + dout(4) << "MonCommand : "<< prefix << dendl; + bool map_modified = false; + if( prefix == "nvme-gw create" || prefix == "nvme-gw delete" ) { + vector idvec; + string subs_nqn; + cmd_getval(cmdmap, "subsystem-nqn", idvec); + cmd_getval(cmdmap, "ids", idvec); + if(prefix == "nvme-gw create"){ + for (unsigned i = 0; i < idvec.size(); i ++){ + rc = pending_map.cfg_add_gw( idvec[i], subs_nqn ); + ceph_assert(rc!= -EINVAL); + } + } + else{ + bool modified; + for (unsigned i = 0; i < idvec.size(); i ++){ + rc = pending_map.cfg_delete_gw( idvec[i], subs_nqn, modified); + map_modified |= modified; + //ceph_assert(rc!= -EINVAL); + } + } + if(map_modified){ + propose_pending(); + } + } return true; } From 6a61b8ffc87a6cbe4cae332d80fbf1a9218fc35d Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 23 Nov 2023 18:36:05 +0000 Subject: [PATCH 36/65] fix gw delete in NVMeofGwMap Fix response to cli configuration messages Please enter the commit message for your changes --- src/mon/NVMeofGwMap.cc | 20 ++++++++++++++++---- src/mon/NVMeofGwMon.cc | 22 ++++++++++++++++++++-- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 7f251423e945..ac00557df895 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -436,7 +436,22 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin { case GW_STANDBY_STATE: case GW_IDLE_STATE: - // nothing to do + case GW_BLOCKED_AGROUP_OWNER: + { + GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + if(grpid == gw_state->optimized_ana_group_id) {// Try to find GW that temporary owns my group - if found, this GW should pass to standby for this group + auto subsyst_it = find_subsystem_map(nqn); + for (auto& itr : *subsyst_it){ + if (itr.second.sm_state[grpid] == GW_ACTIVE_STATE || itr.second.sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED){ + set_gw_standby_state(&itr.second, grpid); + map_modified = true; + if (itr.second.sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED) + cancel_timer(itr.first, nqn, grpid); + break; + } + } + } + } break; case GW_WAIT_FAILBACK_PREPARED: @@ -456,10 +471,8 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin } break; - case GW_BLOCKED_AGROUP_OWNER: case GW_ACTIVE_STATE: { - //TODO if in ACTIVE state Start Block-List on this GW context GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); map_modified = true; set_gw_standby_state(gw_state, grpid); @@ -469,7 +482,6 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin default:{ ceph_assert(false); } - } return 0; } diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 22601607c502..1fe97dd4fc8e 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -353,8 +353,10 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) int rc; std::stringstream ss; bufferlist rdata; - + string rs; + int err = 0; cmdmap_t cmdmap; + if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) { string rs = ss.str(); @@ -396,9 +398,25 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) } } if(map_modified){ - propose_pending(); + propose_pending(); + goto update; + } + else { + goto reply_no_propose; } } + + reply_no_propose: + getline(ss, rs); + if (err < 0 && rs.length() == 0) + rs = cpp_strerror(err); + mon.reply_command(op, err, rs, rdata, get_last_committed()); + return false; /* nothing to propose */ + + update: + getline(ss, rs); + wait_for_commit(op, new Monitor::C_Command(mon, op, 0, rs, + get_last_committed() + 1)); return true; } From b18052c9a248e5c6fd4e84dbc0b6d813b34c8405 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Sun, 26 Nov 2023 09:50:45 +0000 Subject: [PATCH 37/65] NVMeofGw::handle_nvmeof_gw_map(): calculate gateway GRPC set_ana_info request Signed-off-by: Alexander Indenbaum --- src/nvmeof/NVMeofGw.cc | 80 ++++++++++++++++++++++++++---------------- src/nvmeof/gateway | 2 +- 2 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 47e4675b48e3..eb8e7277b114 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -254,39 +254,59 @@ void NVMeofGw::shutdown() finisher.stop(); } -//TODO temp,just for compilation -#define NQN "2004.nqn.12345" - void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) { - dout(0) << "handle nvmeof gw map" << dendl; - // NVMeofGwMap - auto &mp = mmap->get_map(); - dout(0) << "received map epoch " << mp.get_epoch() << dendl; - std::stringstream ss; - mp._dump_gwmap(ss); - dout(0) << ss.str() << dendl; - - GW_STATE_T dummy_state { {GW_IDLE_STATE,} , {"NULL","NULL","NULL","NULL","NULL",} , 1/*ana_grp*/, GW_AVAILABILITY_E::GW_CREATED, 0 }; - GW_STATE_T* gw_state = map.find_gw_map(name, NQN); - GW_STATE_T* new_gw_state = mp.find_gw_map(name, NQN); - - //ceph_assert(new_gw_state); - if(!gw_state) - gw_state = &dummy_state; - if(new_gw_state) - for(int i=0; ism_state[i] != gw_state->sm_state[i]) - { - dout(0) << " " << new_gw_state->sm_state[i] << " , "<< gw_state->sm_state[i] << dendl; - // build array of tuples : {ana-grpid , new-state, died_gw-id(in case the state = Active and ana-grpid != my_optimised_grpid) - } + dout(0) << "handle nvmeof gw map" << dendl; + auto &mp = mmap->get_map(); + dout(0) << "received map epoch " << mp.get_epoch() << dendl; + std::stringstream ss; + mp._dump_gwmap(ss); + dout(0) << ss.str() << dendl; + + ana_info ai; + // Interate over NQNs + for (const auto& subsystemPair : mp.Gmap) { + const std::string& nqn = subsystemPair.first; + const auto& idStateMap = subsystemPair.second; + nqn_ana_states nas; + nas.set_nqn(nqn); + + // This gateway state for the current subsystem / nqn + const auto& new_gateway_state = idStateMap.find(name); + + // There is no subsystem update for this gateway + if (new_gateway_state == idStateMap.end()) continue; + + // Previously monitor distributed state + GW_STATE_T* old_gw_state = map.find_gw_map(name, nqn); + + // Iterate over possible ANA Groups + for (uint32_t ana_grp_index = 0; ana_grp_index < MAX_SUPPORTED_ANA_GROUPS; ana_grp_index++) { + ana_group_state gs; + gs.set_grp_id(ana_grp_index + 1); // offset by 1, index 0 is ANAGRP1 + + // There is no state change for this ANA Group + auto old_state = old_gw_state ? old_gw_state->sm_state[ana_grp_index] : GW_STATES_PER_AGROUP_E::GW_IDLE_STATE; + if (old_state == new_gateway_state->second.sm_state[ana_grp_index]) continue; + + // detect was active, but not any more transition + if ((old_state == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE || old_state == GW_STATES_PER_AGROUP_E::GW_IDLE_STATE ) && + new_gateway_state->second.sm_state[ana_grp_index] != GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { + gs.set_state(INACCESSIBLE); // Set the ANA state + nas.mutable_states()->Add(std::move(gs)); + // detect was not active, but becaome one transition + } else if (old_state != GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE && + new_gateway_state->second.sm_state[ana_grp_index] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { + gs.set_state(OPTIMIZED); // Set the ANA state + nas.mutable_states()->Add(std::move(gs)); + } else continue; // Avoid dealing with intermediate states. } - - - - map = mp; - + if (nas.states_size()) ai.mutable_states()->Add(std::move(nas)); + } + if (ai.states_size()) { + // TODO: grpc gateway set_ana_state() + } + map = mp; } bool NVMeofGw::ms_dispatch2(const ref_t& m) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index e35283825697..bfcf38b5c32a 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit e35283825697f270cb2f8cfabd078eab16db615f +Subproject commit bfcf38b5c32aab3c03dbc90fe57eef21d5200bf8 From 72c1b84a6346c11d035714f4b97f4ceeedcc3377 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 26 Nov 2023 21:11:42 +0000 Subject: [PATCH 38/65] Added created_gws vector and usage, add it to encode/decode, dump it removed nqn from the definition of nvme create/delete cli commands completed internal integration of new gw_create database per map TODO - the full NVMeofGwMap object is distributed to all GW clients, instead of just GMap part of it. To fix by applying full decode/encode methods and partial decode/encode in the NVMeofGwMap --- src/mon/MonCommands.h | 2 - src/mon/NVMeofGwMap.cc | 107 +++++++++++++++++++++++------------------ src/mon/NVMeofGwMap.h | 51 ++++++++++++++++++-- src/mon/NVMeofGwMon.cc | 66 +++++++++++++------------ 4 files changed, 141 insertions(+), 85 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 2b87f98e412d..153e6df756be 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1341,13 +1341,11 @@ COMMAND("config generate-minimal-conf", /* NVMeofGwMon*/ COMMAND("nvme-gw create " - "name=subsystem-nqn,type=CephString" "name=ids,type=CephString,n=N", "set gw(s) [...] create, " "create gws inside subsystem", "gw", "rw") COMMAND("nvme-gw delete " - "name=subsystem-nqn,type=CephString" "name=ids,type=CephString,n=N", "set gw(s) [...] delete, " "delete gws inside subsystem", diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index ac00557df895..4541f9bd485c 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -32,64 +32,67 @@ static std::string G_gw_ana_states[] = { "WAIT_FLBACK_RDY" }; - -int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string & nqn) { - - GW_STATE_T state{ {GW_IDLE_STATE,}, {""}, 0, GW_AVAILABILITY_E::GW_CREATED, 0 }; - for(int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) state.failover_peer[i] = "NULL"; - - if (find_gw_map(gw_id, nqn)) { - dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; - return -EEXIST ; - } - - //TODO check that all MAX_SUPPORTED_ANA_GROUPS are occupied in the subsystem - assert - //Allocate AnaGrpId for the new GW - bool allocated[MAX_SUPPORTED_ANA_GROUPS+1] = {false}; - auto subsyst_it = find_subsystem_map(nqn); - if(subsyst_it) { - for (auto& itr : *subsyst_it) - allocated[itr.second.optimized_ana_group_id] = true; - //dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA group in use " <<(int)ana_grpid << " by GW " << itr.first << dendl; - } - for(int i=1; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ - if (allocated[i] == false){ - state.optimized_ana_group_id = i; - break; - } - } - if(state.optimized_ana_group_id == 0){ - dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA groupId was not allocated " << dendl; +int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { + GW_CREATED_T gw_created = {0, gw_id}; + bool allocated[MAX_SUPPORTED_ANA_GROUPS+1] = {false}; + + for (unsigned i = 0; i < Created_gws.size(); i ++){ + allocated[Created_gws[i].ana_grp_id] = true; + if(Created_gws[i].gw_name == gw_id){ + dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; + return -EEXIST ; + } + } + for(int i=1; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ + if (allocated[i] == false){ + gw_created.ana_grp_id = i; + break; + } + } + if(gw_created.ana_grp_id == 0){ + dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA groupId was not allocated " << dendl; return -EINVAL; - } - if(Gmap[nqn].size() == 0) - Gmap.insert(make_pair(nqn, SUBSYST_GWMAP())); - Gmap[nqn].insert({gw_id, state}); + } - create_metadata(gw_id, nqn); - dout(4) << " Add GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state.optimized_ana_group_id << dendl; - return 0; + Created_gws.push_back(gw_created); + dout(4) << __func__ << "Created GW: " << gw_id << dendl; + std::stringstream ss; + _dump_created_gws(ss); + dout(4) << ss.str() << dendl; + return 0; } int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, bool & map_modified){ GW_STATE_T * state; - if (!(state = find_gw_map(gw_id, nqn) ) ) { - dout(4) << __func__ << " ERROR :GW not found in map " << gw_id << dendl; + bool found = false; + unsigned index; + + for (index = 0; index < Created_gws.size(); index ++){ + if(Created_gws[index].gw_name == gw_id){ + found = true; + break; + } + } + if(!found) { + dout(4) << __func__ << " ERROR :GW was not created " << gw_id << dendl; return -ENODEV ; } - //TODO for all ana groups call fsm_handle_gw_delete - bool modified = false; - map_modified = false; - for(int i=0; ism_state[i], i, modified); - map_modified |= modified; + // TODO tracerse the GMap , find gw in the map for all nqns - nqn is not a parameter of a function + if ((state = find_gw_map(gw_id, nqn) ) ) { // GW was created and started + + bool modified = false; + map_modified = false; + for(int i=0; ism_state[i], i, modified); + map_modified |= modified; + } + dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; + Gmap[nqn].erase(gw_id); + delete_metadata(gw_id, nqn); } - dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; - Gmap[nqn].erase(gw_id); - delete_metadata(gw_id, nqn); - + Created_gws.erase(Created_gws.begin() + index); return 0; } @@ -157,6 +160,16 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { return 0; } +int NVMeofGwMap::_dump_created_gws(std::stringstream &ss)const { + ss << __func__ << " called " << std::endl; + for (auto& itr : Created_gws) { + ss << " gw :" << itr.gw_name << ", ana: " << itr.ana_grp_id ; + } + ss << std::endl; + return 0; +} + + int NVMeofGwMap:: update_active_timers( bool &propose_pending ){ diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 2efa75ed5d6d..9f1eb15b0ecf 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -75,6 +75,10 @@ typedef struct GW_METADATA_T { int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state }GW_METADATA_T; +typedef struct { + int ana_grp_id; + std::string gw_name; +} GW_CREATED_T; using GWMAP = std::map >; using GWMETADATA = std::map >; @@ -136,10 +140,11 @@ class NVMeofGwMap Monitor *mon= NULL;// just for logs in the mon module file GWMAP Gmap; GWMETADATA Gmetadata; + std::vector Created_gws; epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm bool delay_propose = false; - bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode - uint32_t listen_mode_start_tick{0}; + //bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode + //uint32_t listen_mode_start_tick{0}; //std::map module_options; @@ -158,7 +163,12 @@ class NVMeofGwMap encode((const std::string &)itr.first, bl);// nqn encode( itr.second, bl);// encode the full map of this nqn : } - + //Encode created GWs + encode ((int)Created_gws.size(), bl); + for(auto &itr : Created_gws){ + encode(itr.gw_name, bl); + encode(itr.ana_grp_id, bl); + } ENCODE_FINISH(bl); } @@ -199,11 +209,29 @@ class NVMeofGwMap Gmetadata[nqn].insert({itr.first, itr.second}); } } + //Decode created GWs + int num_created_gws; + decode(num_created_gws, bl); + Created_gws.clear(); + for(int i = 0; i("mon_nvmeofgw_beacon_grace"); dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; @@ -221,7 +222,12 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ auto p = bl.cbegin(); map.decode(p); - if(!mon.is_leader()) map._dump_gwmap(map.Gmap); + if(!mon.is_leader()) { + std::stringstream ss; + map._dump_created_gws(ss); + dout(4) << ss.str() << dendl; + map._dump_gwmap(map.Gmap); + } check_subs(true); } @@ -380,19 +386,18 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) bool map_modified = false; if( prefix == "nvme-gw create" || prefix == "nvme-gw delete" ) { vector idvec; - string subs_nqn; - cmd_getval(cmdmap, "subsystem-nqn", idvec); + cmd_getval(cmdmap, "ids", idvec); if(prefix == "nvme-gw create"){ for (unsigned i = 0; i < idvec.size(); i ++){ - rc = pending_map.cfg_add_gw( idvec[i], subs_nqn ); + rc = pending_map.cfg_add_gw( idvec[i] ); ceph_assert(rc!= -EINVAL); } } else{ bool modified; for (unsigned i = 0; i < idvec.size(); i ++){ - rc = pending_map.cfg_delete_gw( idvec[i], subs_nqn, modified); + rc = pending_map.cfg_delete_gw( idvec[i], "Null", modified); map_modified |= modified; //ceph_assert(rc!= -EINVAL); } @@ -461,28 +466,28 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ const GwSubsystems& subsystems = m->get_subsystems(); bool propose = false; - // Validation gw is in the database - if(avail != GW_AVAILABILITY_E::GW_CREATED){ - for (const NqnState& st: subsystems) { - gw_state = pending_map.find_gw_map(gw_id, st.nqn); - if(gw_state == NULL) { - dout(4) << "ERROR: GW is not in database: " << gw_id <<" " << st.nqn << dendl; - ceph_assert(false);// TODO + // Validation gw is in the database + for (const NqnState &st : subsystems) + { + gw_state = pending_map.find_gw_map(gw_id, st.nqn); + if (gw_state == NULL) + { + dout(4) << "GW + NQN pair is not in the database: " << gw_id << " " << st.nqn << dendl; + // if GW is created + int ana_grp_id = 0; + if(pending_map.find_created_gw(gw_id, ana_grp_id) == 0) {// GW is created administratively + pending_map.insert_gw_to_map(gw_id, st.nqn, ana_grp_id); + dout(4) << "GW + NQN pair " << gw_id << " " << st.nqn << " inserted to map, ANA grp-id " << ana_grp_id << dendl; } - } - } - - if(avail == GW_AVAILABILITY_E::GW_CREATED){ - // create gw call cfg_add_gw - for (const NqnState& st: subsystems) { - int rc = pending_map.cfg_add_gw( gw_id, st.nqn ); - if(rc == -EEXIST){ - propose = true; // for synchronization with GW that starts + else { + //drop beacon on the floor silently discard + return 0; } } } - else if(avail == GW_AVAILABILITY_E::GW_AVAILABLE){ + if(avail == GW_AVAILABILITY_E::GW_AVAILABLE) + { auto now = ceph::coarse_mono_clock::now(); // check pending_map.epoch vs m->get_version() - if different - drop the beacon @@ -508,7 +513,6 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << "decision to delayed_map in prepare_beacon" < Date: Mon, 27 Nov 2023 16:28:55 +0000 Subject: [PATCH 39/65] fix receiving MonCommands by GW monitor Signed-off-by: Leonid Chernin --- src/mon/Monitor.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index e85438af6f74..9b12c2b0f7d5 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -3593,7 +3593,11 @@ void Monitor::handle_command(MonOpRequestRef op) mgrmon()->dispatch(op); return; } - + if (module == "nvme-gw"){ + nvmegwmon()->dispatch(op); + dout(10) << " Dispatching module " << module << " to NVMeofGwMon" << dendl; + return; + } if (prefix == "fsid") { if (f) { f->open_object_section("fsid"); From 16653b0a6df1f4650b593f4055e4bed9f0497ed3 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Mon, 27 Nov 2023 18:15:12 +0000 Subject: [PATCH 40/65] grpc gateway set_ana_state() Signed-off-by: Alexander Indenbaum --- src/nvmeof/NVMeofGw.cc | 6 +++++- src/nvmeof/NVMeofGwClient.cc | 9 +++++++++ src/nvmeof/NVMeofGwClient.h | 1 + src/nvmeof/gateway | 2 +- 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index eb8e7277b114..e3dfebb5e35d 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -294,17 +294,21 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) new_gateway_state->second.sm_state[ana_grp_index] != GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { gs.set_state(INACCESSIBLE); // Set the ANA state nas.mutable_states()->Add(std::move(gs)); + dout(0) << "nqn: " << nqn << " grpid " << (ana_grp_index + 1) << " INACCESSIBLE" <second.sm_state[ana_grp_index] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { gs.set_state(OPTIMIZED); // Set the ANA state nas.mutable_states()->Add(std::move(gs)); + dout(0) << "nqn: " << nqn << " grpid " << (ana_grp_index + 1) << " OPTIMIZED" <Add(std::move(nas)); } if (ai.states_size()) { - // TODO: grpc gateway set_ana_state() + NVMeofGwClient gw_client( + grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); + if (!gw_client.set_ana_state(ai)) dout(0) << "GRPC set_ana_state failed" << dendl; } map = mp; } diff --git a/src/nvmeof/NVMeofGwClient.cc b/src/nvmeof/NVMeofGwClient.cc index def3c91b8bbc..977e69f0ae28 100644 --- a/src/nvmeof/NVMeofGwClient.cc +++ b/src/nvmeof/NVMeofGwClient.cc @@ -21,3 +21,12 @@ bool NVMeofGwClient::get_subsystems(subsystems_info& reply) { return status.ok(); } + +bool NVMeofGwClient::set_ana_state(const ana_info& info) { + req_status reply; + ClientContext context; + + Status status = stub_->set_ana_state(&context, info, &reply); + + return status.ok() && reply.status(); +} diff --git a/src/nvmeof/NVMeofGwClient.h b/src/nvmeof/NVMeofGwClient.h index 4e27b0697abc..dcb16c65f76c 100644 --- a/src/nvmeof/NVMeofGwClient.h +++ b/src/nvmeof/NVMeofGwClient.h @@ -32,6 +32,7 @@ class NVMeofGwClient { : stub_(Gateway::NewStub(channel)) {} bool get_subsystems(subsystems_info& reply); + bool set_ana_state(const ana_info& info); private: std::unique_ptr stub_; diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index bfcf38b5c32a..dabb6d65419b 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit bfcf38b5c32aab3c03dbc90fe57eef21d5200bf8 +Subproject commit dabb6d65419b63c9dbff0949bb7fd96c9a82e7af From 756d64ff1050bb07b85f95de67d48c30c06a05e6 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 28 Nov 2023 07:17:03 +0000 Subject: [PATCH 41/65] Don't compile by default NVMEOF GATEWAY MONITOR CLIENT Signed-off-by: Alexander Indenbaum --- src/CMakeLists.txt | 132 +++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 64 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 13b168c8a0d5..ff47b17e7f27 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -867,72 +867,76 @@ endif(WITH_FUSE) # NVMEOF GATEWAY MONITOR CLIENT -# Find Protobuf installation -# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. -option(protobuf_MODULE_COMPATIBLE TRUE) -find_package(Protobuf REQUIRED) - -set(_REFLECTION grpc++_reflection) -if(CMAKE_CROSSCOMPILING) - find_program(_PROTOBUF_PROTOC protoc) -else() - set(_PROTOBUF_PROTOC $) -endif() +option(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT "build nvmeof gateway monitor client" OFF) +if(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT) -# Find gRPC installation -# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. -find_package(gRPC CONFIG REQUIRED) -message(STATUS "Using gRPC ${gRPC_VERSION}") -set(_GRPC_GRPCPP gRPC::grpc++) -if(CMAKE_CROSSCOMPILING) - find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) -else() - set(_GRPC_CPP_PLUGIN_EXECUTABLE $) -endif() + # Find Protobuf installation + # Looks for protobuf-config.cmake file installed by Protobuf's cmake installation. + option(protobuf_MODULE_COMPATIBLE TRUE) + find_package(Protobuf REQUIRED) + + set(_REFLECTION grpc++_reflection) + if(CMAKE_CROSSCOMPILING) + find_program(_PROTOBUF_PROTOC protoc) + else() + set(_PROTOBUF_PROTOC $) + endif() -# Proto file -get_filename_component(nvmeof_gateway_proto "nvmeof/gateway/control/proto/gateway.proto" ABSOLUTE) -get_filename_component(nvmeof_gateway_proto_path "${nvmeof_gateway_proto}" PATH) - -# Generated sources -set(nvmeof_gateway_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.cc") -set(nvmeof_gateway_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.h") -set(nvmeof_gateway_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.cc") -set(nvmeof_gateway_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.h") - -add_custom_command( - OUTPUT "${nvmeof_gateway_proto_srcs}" "${nvmeof_gateway_proto_hdrs}" "${nvmeof_gateway_grpc_srcs}" "${nvmeof_gateway_grpc_hdrs}" - COMMAND ${_PROTOBUF_PROTOC} - ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" - --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" - -I "${nvmeof_gateway_proto_path}" - --experimental_allow_proto3_optional - --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" - "${nvmeof_gateway_proto}" - DEPENDS "${nvmeof_gateway_proto}") - -# Include generated *.pb.h files -include_directories("${CMAKE_CURRENT_BINARY_DIR}") - -set(ceph_nvmeof_srcs - ${nvmeof_gateway_proto_srcs} - ${nvmeof_gateway_proto_hdrs} - ${nvmeof_gateway_grpc_srcs} - ${nvmeof_gateway_grpc_hdrs} - ceph_nvmeof.cc - nvmeof/NVMeofGwClient.cc - nvmeof/NVMeofGw.cc) -add_executable(ceph-nvmeof ${ceph_nvmeof_srcs}) -add_dependencies(ceph-nvmeof ceph-common) -target_link_libraries(ceph-nvmeof - client - mon - global-static - ceph-common - ${_REFLECTION} - ${_GRPC_GRPCPP} - ) -install(TARGETS ceph-nvmeof DESTINATION bin) + # Find gRPC installation + # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation. + find_package(gRPC CONFIG REQUIRED) + message(STATUS "Using gRPC ${gRPC_VERSION}") + set(_GRPC_GRPCPP gRPC::grpc++) + if(CMAKE_CROSSCOMPILING) + find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin) + else() + set(_GRPC_CPP_PLUGIN_EXECUTABLE $) + endif() + + # Proto file + get_filename_component(nvmeof_gateway_proto "nvmeof/gateway/control/proto/gateway.proto" ABSOLUTE) + get_filename_component(nvmeof_gateway_proto_path "${nvmeof_gateway_proto}" PATH) + + # Generated sources + set(nvmeof_gateway_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.cc") + set(nvmeof_gateway_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.pb.h") + set(nvmeof_gateway_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.cc") + set(nvmeof_gateway_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/gateway.grpc.pb.h") + + add_custom_command( + OUTPUT "${nvmeof_gateway_proto_srcs}" "${nvmeof_gateway_proto_hdrs}" "${nvmeof_gateway_grpc_srcs}" "${nvmeof_gateway_grpc_hdrs}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${nvmeof_gateway_proto_path}" + --experimental_allow_proto3_optional + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "${nvmeof_gateway_proto}" + DEPENDS "${nvmeof_gateway_proto}") + + # Include generated *.pb.h files + include_directories("${CMAKE_CURRENT_BINARY_DIR}") + + set(ceph_nvmeof_srcs + ${nvmeof_gateway_proto_srcs} + ${nvmeof_gateway_proto_hdrs} + ${nvmeof_gateway_grpc_srcs} + ${nvmeof_gateway_grpc_hdrs} + ceph_nvmeof.cc + nvmeof/NVMeofGwClient.cc + nvmeof/NVMeofGw.cc) + add_executable(ceph-nvmeof ${ceph_nvmeof_srcs}) + add_dependencies(ceph-nvmeof ceph-common) + target_link_libraries(ceph-nvmeof + client + mon + global-static + ceph-common + ${_REFLECTION} + ${_GRPC_GRPCPP} + ) + install(TARGETS ceph-nvmeof DESTINATION bin) +endif() # END OF NVMEOF GATEWAY MONITOR CLIENT if(WITH_DOKAN) From e1927e9b2f5c3012310b7c2d0202c5e13b2a4d13 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 29 Nov 2023 11:38:10 +0000 Subject: [PATCH 42/65] send to the GW client just GMap and map epoch - previously the full NVMeofGwMap object was encoded Signed-off-by: Leonid Chernin --- src/messages/MNVMeofGwMap.h | 4 +- src/mon/NVMeofGwMap.h | 87 ++++++++++++++++++++----------------- src/mon/NVMeofGwMon.cc | 3 ++ 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/messages/MNVMeofGwMap.h b/src/messages/MNVMeofGwMap.h index 66ffbd396624..14b1eed5a27f 100644 --- a/src/messages/MNVMeofGwMap.h +++ b/src/messages/MNVMeofGwMap.h @@ -43,13 +43,13 @@ class MNVMeofGwMap final : public Message { void decode_payload() override { // ../src/messages/MNVMeofGwMap.h:46:11: error: no matching function for call to ‘decode(NVMeofGwMap&, ceph::buffer::v15_2_0::list::iterator_impl&)’ auto p = payload.cbegin(); - map.decode( p); + map.decode( p, false); } void encode_payload(uint64_t features) override { //../src/messages/MNVMeofGwMap.h:51:11: error: no matching function for call to ‘encode(NVMeofGwMap&, ceph::buffer::v15_2_0::list&, uint64_t&)’ //using ceph::encode; //encode(map, payload, features); - map.encode(payload); + map.encode(payload, false); } private: using RefCountedObject::put; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 9f1eb15b0ecf..f7d67fb5fae8 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -148,36 +148,39 @@ class NVMeofGwMap //std::map module_options; - void encode(ceph::buffer::list &bl) const { + void encode(ceph::buffer::list &bl, bool full_encode = true) const { ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); encode((int) epoch, bl);// global map epoch - // encode(delay_propose,bl); + // encode(delay_propose,bl); encode ((int)Gmap.size(),bl); // number nqn for (auto& itr : Gmap) { encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : map + encode( itr.second, bl);// encode the full map of this nqn : std::map } - // Encode Gmetadata - encode ((int)Gmetadata.size(),bl); - for (auto& itr : Gmetadata) { - encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : - } - //Encode created GWs - encode ((int)Created_gws.size(), bl); - for(auto &itr : Created_gws){ - encode(itr.gw_name, bl); - encode(itr.ana_grp_id, bl); + + if(full_encode) { + // Encode Gmetadata + encode ((int)Gmetadata.size(),bl); + for (auto& itr : Gmetadata) { + encode((const std::string &)itr.first, bl);// nqn + encode( itr.second, bl);// encode the full map of this nqn : + } + //Encode created GWs + encode ((int)Created_gws.size(), bl); + for(auto &itr : Created_gws){ + encode(itr.gw_name, bl); + encode(itr.ana_grp_id, bl); + } } ENCODE_FINISH(bl); } - void decode(ceph::buffer::list::const_iterator &bl) { + void decode(ceph::buffer::list::const_iterator &bl, bool full_decode = true) { DECODE_START(1, bl); int num_subsystems; std::string nqn; decode(epoch, bl); - // decode(delay_propose,bl); + // decode(delay_propose,bl); decode(num_subsystems, bl); SUBSYST_GWMAP gw_map; Gmap.clear(); @@ -193,33 +196,35 @@ class NVMeofGwMap Gmap[nqn].insert({itr.first, itr.second}); } } - // decode Gmetadata - decode(num_subsystems, bl); - SUBSYST_GWMETA gw_meta; - Gmetadata.clear(); - //_dump_gwmap(Gmap); - for(int i = 0; i < num_subsystems; i++){ - decode(nqn, bl); - Gmetadata.insert(make_pair(nqn, std::map())); - //decode the map - gw_meta.clear(); - decode(gw_meta, bl); - //insert the gw_meta to Gmap - for(auto &itr: gw_meta ){ - Gmetadata[nqn].insert({itr.first, itr.second}); + + if(full_decode){ + // decode Gmetadata + decode(num_subsystems, bl); + SUBSYST_GWMETA gw_meta; + Gmetadata.clear(); + //_dump_gwmap(Gmap); + for(int i = 0; i < num_subsystems; i++){ + decode(nqn, bl); + Gmetadata.insert(make_pair(nqn, std::map())); + //decode the map + gw_meta.clear(); + decode(gw_meta, bl); + //insert the gw_meta to Gmap + for(auto &itr: gw_meta ){ + Gmetadata[nqn].insert({itr.first, itr.second}); + } + } + //Decode created GWs + int num_created_gws; + decode(num_created_gws, bl); + Created_gws.clear(); + for(int i = 0; i Date: Wed, 29 Nov 2023 15:11:13 +0000 Subject: [PATCH 43/65] nvmeof keyring: mon use 'allow *' Signed-off-by: Alexander Indenbaum --- src/nvmeof/gateway | 2 +- src/pybind/mgr/cephadm/services/nvmeof.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index dabb6d65419b..391ab8a6b3fd 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit dabb6d65419b63c9dbff0949bb7fd96c9a82e7af +Subproject commit 391ab8a6b3fd0ef14b1d9a0c527d0a7b3d388a16 diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 7d2dd16cf0d6..8d785aa31584 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -29,7 +29,7 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD host_ip = self.mgr.inventory.get_addr(daemon_spec.host) keyring = self.get_keyring_with_caps(self.get_auth_entity(nvmeof_gw_id), - ['mon', 'profile rbd', + ['mon', 'allow *', 'osd', 'allow all tag rbd *=*']) # TODO: check if we can force jinja2 to generate dicts with double quotes instead of using json.dumps From 2a5f6122bb8cddb8b5a80ccf8e1ac3ec2498cc5f Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Thu, 30 Nov 2023 12:37:10 +0000 Subject: [PATCH 44/65] nvmeof gw version update Signed-off-by: Alexander Indenbaum --- src/nvmeof/gateway | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 391ab8a6b3fd..5eee6f45f9a8 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 391ab8a6b3fd0ef14b1d9a0c527d0a7b3d388a16 +Subproject commit 5eee6f45f9a8708bfc2ac97bb742fd6cad548e48 From 55bb96d947ccec8ff3aab4be6956826645269e14 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 30 Nov 2023 14:52:11 +0000 Subject: [PATCH 45/65] GWMap class ana-grp index is zero based , later in gw each ana-grp-id is incremented by 1 for set in listener --- src/mon/NVMeofGwMap.cc | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 4541f9bd485c..a388f7d8a2c3 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -33,29 +33,29 @@ static std::string G_gw_ana_states[] = { }; int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { - GW_CREATED_T gw_created = {0, gw_id}; - bool allocated[MAX_SUPPORTED_ANA_GROUPS+1] = {false}; + GW_CREATED_T gw_created = {-1, gw_id}; + bool allocated[MAX_SUPPORTED_ANA_GROUPS] = {false}; for (unsigned i = 0; i < Created_gws.size(); i ++){ - allocated[Created_gws[i].ana_grp_id] = true; + allocated[Created_gws[i].ana_grp_id ] = true; if(Created_gws[i].gw_name == gw_id){ dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; return -EEXIST ; } } - for(int i=1; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ + for(int i=0; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ if (allocated[i] == false){ gw_created.ana_grp_id = i; break; } } - if(gw_created.ana_grp_id == 0){ + if(gw_created.ana_grp_id == -1){ dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA groupId was not allocated " << dendl; return -EINVAL; } Created_gws.push_back(gw_created); - dout(4) << __func__ << "Created GW: " << gw_id << dendl; + dout(4) << __func__ << "Created GW: " << gw_id << " grpid " << gw_created.ana_grp_id << dendl; std::stringstream ss; _dump_created_gws(ss); dout(4) << ss.str() << dendl; @@ -124,12 +124,12 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { for (auto& itr : Gmap) { for (auto& ptr : itr.second) { - ss << " NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id << + ss << "(gw-mon) NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id+1 << " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } - ss << "Failover peers: " << std::endl << " "; + ss << " Failover peers: " << std::endl << " "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { ss << ptr.second.failover_peer[i] << " " ; } @@ -144,12 +144,12 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { ss << __func__ << " called " << mon << std::endl; for (auto& itr : Gmap) { for (auto& ptr : itr.second) { - ss << " NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) - << (int)ptr.second.optimized_ana_group_id << " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; + ss << "(gw-mon) NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) + << (int)ptr.second.optimized_ana_group_id+1 << " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } - ss << "Failover peers: " << std::endl << " "; + ss << " Failover peers: " << std::endl << " "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { ss << ptr.second.failover_peer[i] << " " ; } @@ -391,7 +391,11 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin propose_pending = true; set_failover_gw_for_ANA_group(gw_id, min_loaded_gw_id, nqn, grpid); } - else propose_pending = false; + else { + propose_pending = true; + dout(4) << "gw down no candidate found " << dendl; + _dump_gwmap(Gmap); + } gw_state->sm_state[grpid] = GW_STANDBY_STATE; } return 0; From 433c4f736dcdd76e97347dec0220b891d0d1baac Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sat, 2 Dec 2023 15:51:35 +0000 Subject: [PATCH 46/65] fix issue with extra distributions of map --- src/mon/NVMeofGwMap.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index a388f7d8a2c3..2101b27e62c0 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -392,9 +392,11 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin set_failover_gw_for_ANA_group(gw_id, min_loaded_gw_id, nqn, grpid); } else { - propose_pending = true; - dout(4) << "gw down no candidate found " << dendl; - _dump_gwmap(Gmap); + if (gw_state->sm_state[grpid] == GW_ACTIVE_STATE){// not found candidate but map changed. + propose_pending = true; + dout(4) << "gw down no candidate found " << dendl; + _dump_gwmap(Gmap); + } } gw_state->sm_state[grpid] = GW_STANDBY_STATE; } From 5188f9146d689a8a69a44a9d507775c695eaa2de Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 4 Dec 2023 17:38:52 +0000 Subject: [PATCH 47/65] Send map with vector of created-gws when received beacon shows GW availability::Created state --- src/mon/NVMeofGwMap.h | 44 +++++++++++++++++++----------------------- src/mon/NVMeofGwMon.cc | 25 +++++++++++++++--------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index f7d67fb5fae8..0554e2dda65b 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -94,7 +94,6 @@ inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { } encode(state.optimized_ana_group_id, bl); encode((int)state.availability, bl); - // encode(state.gw_id, bl); encode(state.version, bl); } @@ -113,7 +112,6 @@ inline void decode(GW_STATE_T& state, ceph::bufferlist::const_iterator& bl) { int avail; decode(avail, bl); state.availability = (GW_AVAILABILITY_E)avail; - //decode(state.gw_id, bl); decode(state.version, bl); } @@ -143,15 +141,17 @@ class NVMeofGwMap std::vector Created_gws; epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm bool delay_propose = false; - //bool listen_mode{ false }; // "listen" mode. started when detected invalid maps from some GW in the beacon messages. "Listen" mode Designed as Synchronisation mode - //uint32_t listen_mode_start_tick{0}; - - //std::map module_options; void encode(ceph::buffer::list &bl, bool full_encode = true) const { ENCODE_START(2, 1, bl); //encode(name, bl); encode(can_run, bl);encode(error_string, bl);encode(module_options, bl); encode((int) epoch, bl);// global map epoch // encode(delay_propose,bl); + //Encode created GWs + encode ((int)Created_gws.size(), bl); + for(auto &itr : Created_gws){ + encode(itr.gw_name, bl); + encode(itr.ana_grp_id, bl); + } encode ((int)Gmap.size(),bl); // number nqn for (auto& itr : Gmap) { encode((const std::string &)itr.first, bl);// nqn @@ -165,12 +165,6 @@ class NVMeofGwMap encode((const std::string &)itr.first, bl);// nqn encode( itr.second, bl);// encode the full map of this nqn : } - //Encode created GWs - encode ((int)Created_gws.size(), bl); - for(auto &itr : Created_gws){ - encode(itr.gw_name, bl); - encode(itr.ana_grp_id, bl); - } } ENCODE_FINISH(bl); } @@ -180,6 +174,17 @@ class NVMeofGwMap int num_subsystems; std::string nqn; decode(epoch, bl); + + //Decode created GWs + int num_created_gws; + decode(num_created_gws, bl); + Created_gws.clear(); + for(int i = 0; isession->con << " " << sub->session->con->get_peer_addr() << dendl; sub->session->con->send_message2(make_message(map)); - if (sub->onetime) { mon.session_map.remove_sub(sub); } else { @@ -253,7 +251,6 @@ void NVMeofGwMon::check_sub(Subscription *sub) } } - void NVMeofGwMon::check_subs(bool t) { const std::string type = "NVMeofGw"; @@ -269,13 +266,11 @@ void NVMeofGwMon::check_subs(bool t) } } - bool NVMeofGwMon::preprocess_query(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; auto m = op->get_req(); switch (m->get_type()) { - case MSG_MNVMEOF_GW_BEACON: return preprocess_beacon(op); @@ -468,6 +463,19 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ GW_AVAILABILITY_E avail = m->get_availability(); const GwSubsystems& subsystems = m->get_subsystems(); bool propose = false; + int ana_grp_id = 0; + + if (avail == GW_AVAILABILITY_E::GW_CREATED){ + // in this special state GWs receive map with just "created_gws" vector + if(pending_map.find_created_gw(gw_id, ana_grp_id) == 0) {// GW is created administratively + dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP" << dendl; + propose = true; + } + else{ + dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; + } + goto set_propose; + } // Validation gw is in the database for (const NqnState &st : subsystems) @@ -477,7 +485,6 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ { dout(4) << "GW + NQN pair is not in the database: " << gw_id << " " << st.nqn << dendl; // if GW is created - int ana_grp_id = 0; if(pending_map.find_created_gw(gw_id, ana_grp_id) == 0) {// GW is created administratively pending_map.insert_gw_to_map(gw_id, st.nqn, ana_grp_id); dout(4) << "GW + NQN pair " << gw_id << " " << st.nqn << " inserted to map, ANA grp-id " << ana_grp_id << dendl; @@ -506,13 +513,13 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ auto it = last_beacon.find(gw_id + GW_DELIM + st.nqn); if (it != last_beacon.end()){ - last_beacon.erase(gw_id + GW_DELIM + st.nqn); - pending_map.process_gw_map_gw_down( gw_id, st.nqn, propose ); + last_beacon.erase(gw_id + GW_DELIM + st.nqn); + pending_map.process_gw_map_gw_down( gw_id, st.nqn, propose ); } } } +set_propose: if (propose){ - // pending_map.delay_propose = true; dout(4) << "decision to delayed_map in prepare_beacon" < Date: Tue, 5 Dec 2023 07:33:05 +0000 Subject: [PATCH 48/65] Add grpc service client to set monitor provided group id Signed-off-by: Alexander Indenbaum --- src/CMakeLists.txt | 29 ++++++++++++++++++++++++- src/mon/NVMeofGwMap.h | 2 +- src/nvmeof/NVMeofGw.cc | 49 +++++++++++++++++++++++++++--------------- src/nvmeof/NVMeofGw.h | 1 + src/nvmeof/gateway | 2 +- 5 files changed, 63 insertions(+), 20 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ff47b17e7f27..a1be041685f7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -893,7 +893,7 @@ if(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT) set(_GRPC_CPP_PLUGIN_EXECUTABLE $) endif() - # Proto file + # Gateway Proto file get_filename_component(nvmeof_gateway_proto "nvmeof/gateway/control/proto/gateway.proto" ABSOLUTE) get_filename_component(nvmeof_gateway_proto_path "${nvmeof_gateway_proto}" PATH) @@ -914,6 +914,28 @@ if(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT) "${nvmeof_gateway_proto}" DEPENDS "${nvmeof_gateway_proto}") + + # Monitor Proto file + get_filename_component(nvmeof_monitor_proto "nvmeof/gateway/control/proto/monitor.proto" ABSOLUTE) + get_filename_component(nvmeof_monitor_proto_path "${nvmeof_monitor_proto}" PATH) + + # Generated sources + set(nvmeof_monitor_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/monitor.pb.cc") + set(nvmeof_monitor_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/monitor.pb.h") + set(nvmeof_monitor_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/monitor.grpc.pb.cc") + set(nvmeof_monitor_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/monitor.grpc.pb.h") + + add_custom_command( + OUTPUT "${nvmeof_monitor_proto_srcs}" "${nvmeof_monitor_proto_hdrs}" "${nvmeof_monitor_grpc_srcs}" "${nvmeof_monitor_grpc_hdrs}" + COMMAND ${_PROTOBUF_PROTOC} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --cpp_out "${CMAKE_CURRENT_BINARY_DIR}" + -I "${nvmeof_monitor_proto_path}" + --experimental_allow_proto3_optional + --plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}" + "${nvmeof_monitor_proto}" + DEPENDS "${nvmeof_monitor_proto}") + # Include generated *.pb.h files include_directories("${CMAKE_CURRENT_BINARY_DIR}") @@ -922,8 +944,13 @@ if(WITH_NVMEOF_GATEWAY_MONITOR_CLIENT) ${nvmeof_gateway_proto_hdrs} ${nvmeof_gateway_grpc_srcs} ${nvmeof_gateway_grpc_hdrs} + ${nvmeof_monitor_proto_srcs} + ${nvmeof_monitor_proto_hdrs} + ${nvmeof_monitor_grpc_srcs} + ${nvmeof_monitor_grpc_hdrs} ceph_nvmeof.cc nvmeof/NVMeofGwClient.cc + nvmeof/NVMeofGwMonitorGroupClient.cc nvmeof/NVMeofGw.cc) add_executable(ceph-nvmeof ${ceph_nvmeof_srcs}) add_dependencies(ceph-nvmeof ceph-common) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 0554e2dda65b..018efca14183 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -60,7 +60,7 @@ enum class GW_AVAILABILITY_E { GW_DELETED }; -#define MAX_SUPPORTED_ANA_GROUPS 5 +#define MAX_SUPPORTED_ANA_GROUPS 32 #define INVALID_GW_TIMER 0xffff #define REDUNDANT_GW_ANA_GROUP_ID 0xFF typedef struct GW_STATE_T { diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index e3dfebb5e35d..59bd7dfc0046 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -27,6 +27,7 @@ #include "messages/MNVMeofGwMap.h" #include "NVMeofGw.h" #include "NVMeofGwClient.h" +#include "NVMeofGwMonitorGroupClient.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mgr @@ -74,6 +75,8 @@ int NVMeofGw::init() name = val; } else if (ceph_argparse_witharg(args, i, &val, "--gateway-address", (char*)NULL)) { gateway_address = val; + } else if (ceph_argparse_witharg(args, i, &val, "--monitor-address", (char*)NULL)) { + monitor_address = val; } else if (ceph_argparse_witharg(args, i, &val, "--server-key", (char*)NULL)) { server_key = val; } else if (ceph_argparse_witharg(args, i, &val, "--server-cert", (char*)NULL)) { @@ -86,7 +89,7 @@ int NVMeofGw::init() } dout(0) << "gateway name: " << name << " address: " << gateway_address << dendl; - ceph_assert(name != "" && gateway_address != ""); + ceph_assert(name != "" && gateway_address != "" && monitor_address != ""); // todo ceph_assert(server_key == "" && server_cert == "" && client_cert == ""); @@ -176,29 +179,26 @@ void NVMeofGw::send_beacon() { ceph_assert(ceph_mutex_is_locked_by_me(lock)); dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; + GW_AVAILABILITY_E gw_availability = GW_AVAILABILITY_E::GW_CREATED; GwSubsystems subs; - NVMeofGwClient gw_client( - grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); - subsystems_info gw_subsystems; - bool ok = gw_client.get_subsystems(gw_subsystems); - if (ok) { - for (int i = 0; i < gw_subsystems.subsystems_size(); i++) { - const subsystem& sub = gw_subsystems.subsystems(i); - struct NqnState nqn_state(sub.nqn()); - if (map.epoch > 0) { // handled map already, update sm_state, opt_ana_gid + if (map.epoch > 0) { // handled map already + NVMeofGwClient gw_client( + grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); + subsystems_info gw_subsystems; + bool ok = gw_client.get_subsystems(gw_subsystems); + if (ok) { + for (int i = 0; i < gw_subsystems.subsystems_size(); i++) { + const subsystem& sub = gw_subsystems.subsystems(i); + struct NqnState nqn_state(sub.nqn()); GW_STATE_T* gw_state = map.find_gw_map(name, nqn_state.nqn); if (gw_state) { nqn_state.opt_ana_gid = gw_state->optimized_ana_group_id; for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) nqn_state.sm_state[i] = gw_state->sm_state[i]; } + subs.push_back(nqn_state); } - subs.push_back(nqn_state); } - } - - GW_AVAILABILITY_E gw_availability = GW_AVAILABILITY_E::GW_CREATED; - if (map.epoch > 0) { // handled map already gw_availability = ok ? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_UNAVAILABLE; } @@ -262,8 +262,24 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) std::stringstream ss; mp._dump_gwmap(ss); dout(0) << ss.str() << dendl; - ana_info ai; + if (map.epoch == 0) { // initial map + auto it = std::find_if(mp.Created_gws.begin(), mp.Created_gws.end(), + [&](const GW_CREATED_T& item) { + return item.gw_name == name; + }); + + // Check if the element was found + if (it == mp.Created_gws.end()) { + dout(0) << "Failed to find created gw for " << name << dendl; + return; + } + + NVMeofGwMonitorGroupClient monitor_group_client( + grpc::CreateChannel(monitor_address, grpc::InsecureChannelCredentials())); + if (!monitor_group_client.set_group_id(it->ana_grp_id)) dout(0) << "GRPC set_group_id failed" << dendl; + } + // Interate over NQNs for (const auto& subsystemPair : mp.Gmap) { const std::string& nqn = subsystemPair.first; @@ -318,7 +334,6 @@ bool NVMeofGw::ms_dispatch2(const ref_t& m) std::lock_guard l(lock); dout(0) << "got map type " << m->get_type() << dendl; - if (m->get_type() == MSG_MNVMEOF_GW_MAP) { handle_nvmeof_gw_map(ref_cast(m)); } diff --git a/src/nvmeof/NVMeofGw.h b/src/nvmeof/NVMeofGw.h index 1655f347edda..9460a5369c83 100644 --- a/src/nvmeof/NVMeofGw.h +++ b/src/nvmeof/NVMeofGw.h @@ -31,6 +31,7 @@ class NVMeofGw : public Dispatcher, private: std::string name; std::string gateway_address; + std::string monitor_address; std::string server_key; std::string server_cert; std::string client_cert; diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 5eee6f45f9a8..4a02b73fc650 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 5eee6f45f9a8708bfc2ac97bb742fd6cad548e48 +Subproject commit 4a02b73fc6505c3080b0a2f985d51461c22a4b50 From 9eef58079389c4cd68561f6776178c802ea3d440 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 5 Dec 2023 18:08:35 +0000 Subject: [PATCH 49/65] update gateway version Signed-off-by: Alexander Indenbaum --- src/nvmeof/gateway | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 4a02b73fc650..697ac6b936a1 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 4a02b73fc6505c3080b0a2f985d51461c22a4b50 +Subproject commit 697ac6b936a1c235bd43f0369777e05db615ed84 From 64c8ba151e2a02cbab440080b8055c9291a98485 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 6 Dec 2023 12:44:50 +0000 Subject: [PATCH 50/65] Add NVMeofGwMonitorGroupClient.cc/h files to git --- src/nvmeof/NVMeofGwMonitorGroupClient.cc | 25 +++++++++++++++ src/nvmeof/NVMeofGwMonitorGroupClient.h | 39 ++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/nvmeof/NVMeofGwMonitorGroupClient.cc create mode 100644 src/nvmeof/NVMeofGwMonitorGroupClient.h diff --git a/src/nvmeof/NVMeofGwMonitorGroupClient.cc b/src/nvmeof/NVMeofGwMonitorGroupClient.cc new file mode 100644 index 000000000000..cbea04e66f48 --- /dev/null +++ b/src/nvmeof/NVMeofGwMonitorGroupClient.cc @@ -0,0 +1,25 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#include "NVMeofGwMonitorGroupClient.h" + +bool NVMeofGwMonitorGroupClient::set_group_id(const uint32_t& id) { + group_id_req request; + request.set_id(id); + google::protobuf::Empty reply; + ClientContext context; + + Status status = stub_->group_id(&context, request, &reply); + + return status.ok(); +} diff --git a/src/nvmeof/NVMeofGwMonitorGroupClient.h b/src/nvmeof/NVMeofGwMonitorGroupClient.h new file mode 100644 index 000000000000..f4ca4c4f3d19 --- /dev/null +++ b/src/nvmeof/NVMeofGwMonitorGroupClient.h @@ -0,0 +1,39 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2023 + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + + +#ifndef __NVMEOFGWMONITORGROUPCLIENT_H__ +#define __NVMEOFGWMONITORGROUPCLIENT_H__ +#include +#include +#include + +#include + +#include "monitor.grpc.pb.h" + +using grpc::Channel; +using grpc::ClientContext; +using grpc::Status; + +class NVMeofGwMonitorGroupClient { + public: + NVMeofGwMonitorGroupClient(std::shared_ptr channel) + : stub_(MonitorGroup::NewStub(channel)) {} + + bool set_group_id(const uint32_t& id); + + private: + std::unique_ptr stub_; +}; +#endif From 65fa4835eb84e981bc399febd87efe7e7e90a720 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Thu, 7 Dec 2023 06:47:30 +0000 Subject: [PATCH 51/65] monitor : handle removal of subsystems, limit max ana groups to 16 Add flag to bypass the GW create cli, applied the flag Signed-off-by: Leonid Chernin --- src/mon/NVMeofGwMap.cc | 31 ++++++++++++++++++++++++++++--- src/mon/NVMeofGwMap.h | 5 +++-- src/mon/NVMeofGwMon.cc | 14 ++++++++++++-- src/nvmeof/NVMeofGw.cc | 4 ++-- 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 2101b27e62c0..f83958e729dc 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -162,6 +162,7 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { int NVMeofGwMap::_dump_created_gws(std::stringstream &ss)const { ss << __func__ << " called " << std::endl; + ss << "(gw-mon) "; for (auto& itr : Created_gws) { ss << " gw :" << itr.gw_name << ", ana: " << itr.ana_grp_id ; } @@ -173,7 +174,7 @@ int NVMeofGwMap::_dump_created_gws(std::stringstream &ss)const { int NVMeofGwMap:: update_active_timers( bool &propose_pending ){ - dout(4) << __func__ << " called, p_monitor: " << mon << dendl; + //dout(4) << __func__ << " called, p_monitor: " << mon << dendl; for (auto& itr : Gmetadata) { for (auto& ptr : itr.second) { GW_METADATA_T *metadata = &ptr.second; @@ -222,7 +223,7 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); if (gw_state) { dout(4) << "KA beacon from the GW " << gw_id << " in state " << (int)gw_state->availability << dendl; - propose_pending = false; + if (gw_state->availability == GW_AVAILABILITY_E::GW_CREATED) { // first time appears - allow IO traffic for this GW gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; @@ -313,6 +314,30 @@ int NVMeofGwMap::handle_abandoned_ana_groups(bool & propose) return 0; } + +int NVMeofGwMap::handle_removed_subsystems (const std::vector &created_subsystems, bool &propose_pending) +{ + bool found = false;; + for (auto& m_itr : Gmap) { + //if not found in the vector of configured subsystems, need to remove the nqn from the map + found = false; + for(auto v_itr : created_subsystems){ + if (m_itr.first == v_itr){ + found = true; + break; + } + } + if(!found){ + // remove m_itr.first from the map + dout(4) << "seems subsystem nqn was removed - to remove nqn from the map " << m_itr.first < &created_subsystems, bool &propose_pending); void debug_encode_decode(){ ceph::buffer::list bl; diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 1735bc2bbff8..fd9bdbf95807 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -211,7 +211,7 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t){ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ version_t version = get_last_committed(); - dout(4) << MY_MON_PREFFIX << __func__ << " version " << version << " map.epoch " << map.epoch << dendl; + //dout(4) << MY_MON_PREFFIX << __func__ << " version " << version << " map.epoch " << map.epoch << dendl; if (version != map.epoch) { dout(4) << " NVMeGW loading version " << version << " " << map.epoch << dendl; @@ -449,6 +449,8 @@ void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std std::getline(s1, nqn, GW_DELIM); } +#define BYPASS_GW_CREATE_CLI + bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; GW_STATE_T* gw_state = NULL; @@ -464,6 +466,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ const GwSubsystems& subsystems = m->get_subsystems(); bool propose = false; int ana_grp_id = 0; + std::vector configured_subsystems; if (avail == GW_AVAILABILITY_E::GW_CREATED){ // in this special state GWs receive map with just "created_gws" vector @@ -473,6 +476,11 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ } else{ dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; +#ifdef BYPASS_GW_CREATE_CLI + pending_map.cfg_add_gw(gw_id); + dout(4) << "GW " << gw_id << " created since mode is bypass-create-cli "<< dendl; + propose= true; +#endif } goto set_propose; } @@ -480,7 +488,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ // Validation gw is in the database for (const NqnState &st : subsystems) { - gw_state = pending_map.find_gw_map(gw_id, st.nqn); + gw_state = pending_map.find_gw_map( gw_id, st.nqn ); if (gw_state == NULL) { dout(4) << "GW + NQN pair is not in the database: " << gw_id << " " << st.nqn << dendl; @@ -494,7 +502,9 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ return 0; } } + configured_subsystems.push_back(st.nqn); } + pending_map.handle_removed_subsystems( configured_subsystems, propose ); if(avail == GW_AVAILABILITY_E::GW_AVAILABLE) { diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 59bd7dfc0046..ac6bf3cf57b1 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -178,7 +178,7 @@ int NVMeofGw::init() void NVMeofGw::send_beacon() { ceph_assert(ceph_mutex_is_locked_by_me(lock)); - dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; + //dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl; GW_AVAILABILITY_E gw_availability = GW_AVAILABILITY_E::GW_CREATED; GwSubsystems subs; if (map.epoch > 0) { // handled map already @@ -201,7 +201,7 @@ void NVMeofGw::send_beacon() } gw_availability = ok ? GW_AVAILABILITY_E::GW_AVAILABLE : GW_AVAILABILITY_E::GW_UNAVAILABLE; } - + dout(0) << "sending beacon as gid " << monc.get_global_id() << " availability " << (int)gw_availability << dendl; auto m = ceph::make_message( name, subs, From 75a857aab9e45a8ba53850c8e003897b05817413 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Wed, 13 Dec 2023 15:17:52 +0000 Subject: [PATCH 52/65] nvmeof gateway, tracking monitor-client-rebase-20231213 branch Signed-off-by: Alexander Indenbaum --- src/nvmeof/gateway | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 697ac6b936a1..72b77ea1e816 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 697ac6b936a1c235bd43f0369777e05db615ed84 +Subproject commit 72b77ea1e8164ba6bbf096eb38814b58ba34cef4 From 355d3cbdbc5ad98c0baf6e5681b54004f5288a26 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Wed, 13 Dec 2023 16:47:04 +0000 Subject: [PATCH 53/65] handle gRPC call failures by retrying Signed-off-by: Alexander Indenbaum --- src/nvmeof/NVMeofGw.cc | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index ac6bf3cf57b1..3c95ff8787b4 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -269,15 +269,23 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) return item.gw_name == name; }); - // Check if the element was found + // Check if we got this gateway name if (it == mp.Created_gws.end()) { dout(0) << "Failed to find created gw for " << name << dendl; return; } - NVMeofGwMonitorGroupClient monitor_group_client( - grpc::CreateChannel(monitor_address, grpc::InsecureChannelCredentials())); - if (!monitor_group_client.set_group_id(it->ana_grp_id)) dout(0) << "GRPC set_group_id failed" << dendl; + bool set_group_id = false; + while (!set_group_id) { + NVMeofGwMonitorGroupClient monitor_group_client( + grpc::CreateChannel(monitor_address, grpc::InsecureChannelCredentials())); + dout(0) << "GRPC set_group_id: " << it->ana_grp_id << dendl; + set_group_id = monitor_group_client.set_group_id(it->ana_grp_id); + if (!set_group_id) { + dout(0) << "GRPC set_group_id failed" << dendl; + usleep(1000); // TODO: conf options + } + } } // Interate over NQNs @@ -322,9 +330,16 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) if (nas.states_size()) ai.mutable_states()->Add(std::move(nas)); } if (ai.states_size()) { - NVMeofGwClient gw_client( - grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); - if (!gw_client.set_ana_state(ai)) dout(0) << "GRPC set_ana_state failed" << dendl; + bool set_ana_state = false; + while (!set_ana_state) { + NVMeofGwClient gw_client( + grpc::CreateChannel(gateway_address, grpc::InsecureChannelCredentials())); + set_ana_state = gw_client.set_ana_state(ai); + if (!set_ana_state) { + dout(0) << "GRPC set_ana_state failed" << dendl; + usleep(1000); // TODO conf option + } + } } map = mp; } From 563ba078926bf89e012b92037a12da47b7582546 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 17 Dec 2023 08:24:34 +0000 Subject: [PATCH 54/65] Fix ceph command nvme-gw delete --- src/mon/NVMeofGwMap.cc | 34 ++++++++++++++++++++-------------- src/mon/NVMeofGwMap.h | 2 +- src/mon/NVMeofGwMon.cc | 2 +- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index f83958e729dc..2528d2d1c4e0 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -63,7 +63,7 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { } -int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, bool & map_modified){ +int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, bool & map_modified){ GW_STATE_T * state; bool found = false; @@ -79,19 +79,25 @@ int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string & nqn, dout(4) << __func__ << " ERROR :GW was not created " << gw_id << dendl; return -ENODEV ; } - // TODO tracerse the GMap , find gw in the map for all nqns - nqn is not a parameter of a function - if ((state = find_gw_map(gw_id, nqn) ) ) { // GW was created and started - - bool modified = false; - map_modified = false; - for(int i=0; ism_state[i], i, modified); - map_modified |= modified; - } - dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; - Gmap[nqn].erase(gw_id); - delete_metadata(gw_id, nqn); - } + // traverse the GMap , find gw in the map for all nqns + + map_modified = false; + for (auto& itr : Gmap) + for (auto& ptr : itr.second) { + GW_ID_T found_gw_id = ptr.first; + const std::string& nqn = itr.first; + state = &ptr.second; + if (gw_id == found_gw_id) { // GW was created + bool modified = false; + for(int i=0; ism_state[i], i, modified); + map_modified |= modified; + } + dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; + Gmap[itr.first].erase(gw_id); + delete_metadata(gw_id, nqn); + } + } Created_gws.erase(Created_gws.begin() + index); return 0; } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 098168ee1e59..a4a60ecf052f 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -262,7 +262,7 @@ class NVMeofGwMap int _dump_gwmap(std::stringstream &ss)const ; int _dump_created_gws(std::stringstream &ss)const ; int cfg_add_gw (const GW_ID_T &gw_id); - int cfg_delete_gw (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); + int cfg_delete_gw (const GW_ID_T &gw_id, bool &propose_pending); int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); int handle_abandoned_ana_groups (bool &propose_pending); diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index fd9bdbf95807..ef3b3ee4166e 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -392,7 +392,7 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) else{ bool modified; for (unsigned i = 0; i < idvec.size(); i ++){ - rc = pending_map.cfg_delete_gw( idvec[i], "Null", modified); + rc = pending_map.cfg_delete_gw( idvec[i], modified); map_modified |= modified; //ceph_assert(rc!= -EINVAL); } From ef9493e57d4adfc537bf491f90e7f681226b1658 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 17 Dec 2023 20:00:40 +0000 Subject: [PATCH 55/65] disable bypassing of cli nvme-gw create --- src/mon/NVMeofGwMon.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index ef3b3ee4166e..80ab14acceda 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -449,7 +449,7 @@ void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std std::getline(s1, nqn, GW_DELIM); } -#define BYPASS_GW_CREATE_CLI +//#define BYPASS_GW_CREATE_CLI bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; From 696893d4c00a7e5bdc6099c6b2f8e12d5f1c482d Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Mon, 18 Dec 2023 19:44:08 +0000 Subject: [PATCH 56/65] Use 0x800 as a bit mask for nvmeofgw messages. - per https://github.com/ceph/ceph/pull/54671#discussion_r1415665294 - update nvmeof gateway revision Signed-off-by: Alexander Indenbaum --- src/msg/Message.h | 4 ++-- src/nvmeof/gateway | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/msg/Message.h b/src/msg/Message.h index 47f83c3e1074..62f27109dce2 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -240,10 +240,10 @@ #define MSG_MGR_UPDATE 0x70b // *** nvmeof mon -> gw daemons *** -#define MSG_MNVMEOF_GW_MAP 0x70c +#define MSG_MNVMEOF_GW_MAP 0x800 // *** gw daemons -> nvmeof mon *** -#define MSG_MNVMEOF_GW_BEACON 0x70d +#define MSG_MNVMEOF_GW_BEACON 0x801 // ====================================================== diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 72b77ea1e816..49c840e7ca4c 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 72b77ea1e8164ba6bbf096eb38814b58ba34cef4 +Subproject commit 49c840e7ca4c745742cc2fa504c0e33a012e57d2 From 353d8bd6db42e6fab0c0375cd25cd03f138a7be2 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Wed, 20 Dec 2023 18:18:14 +0000 Subject: [PATCH 57/65] gw_created is an std-map, added nonce-vector to it Signed-off-by: Leonid Chernin --- src/mon/NVMeofGwMap.cc | 43 ++++++++++++------------- src/mon/NVMeofGwMap.h | 73 +++++++++++++++++++++++++++++++++--------- src/nvmeof/NVMeofGw.cc | 24 +++++++------- 3 files changed, 89 insertions(+), 51 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 2528d2d1c4e0..d3b0d8bb2639 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -33,16 +33,17 @@ static std::string G_gw_ana_states[] = { }; int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { - GW_CREATED_T gw_created = {-1, gw_id}; + GW_CREATED_T gw_created = {-1}; bool allocated[MAX_SUPPORTED_ANA_GROUPS] = {false}; - for (unsigned i = 0; i < Created_gws.size(); i ++){ - allocated[Created_gws[i].ana_grp_id ] = true; - if(Created_gws[i].gw_name == gw_id){ + for (auto& itr : Created_gws){ + allocated[itr.second.ana_grp_id ] = true; + if(itr.first == gw_id){ dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; return -EEXIST ; } } + for(int i=0; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ if (allocated[i] == false){ gw_created.ana_grp_id = i; @@ -54,7 +55,7 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { return -EINVAL; } - Created_gws.push_back(gw_created); + Created_gws.insert({gw_id, gw_created}); dout(4) << __func__ << "Created GW: " << gw_id << " grpid " << gw_created.ana_grp_id << dendl; std::stringstream ss; _dump_created_gws(ss); @@ -66,16 +67,9 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, bool & map_modified){ GW_STATE_T * state; - bool found = false; - unsigned index; - - for (index = 0; index < Created_gws.size(); index ++){ - if(Created_gws[index].gw_name == gw_id){ - found = true; - break; - } - } - if(!found) { + int ana_grp_id = 0; + if(find_created_gw(gw_id, ana_grp_id) != 0) + { dout(4) << __func__ << " ERROR :GW was not created " << gw_id << dendl; return -ENODEV ; } @@ -98,7 +92,8 @@ int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, bool & map_modified){ delete_metadata(gw_id, nqn); } } - Created_gws.erase(Created_gws.begin() + index); + Created_gws.erase(gw_id); + return 0; } @@ -132,11 +127,13 @@ int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { ss << "(gw-mon) NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id+1 << " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + int num_groups = Created_gws.size(); + for (int i = 0; i < num_groups; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } ss << " Failover peers: " << std::endl << " "; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + + for (int i = 0; i < num_groups; i++) { ss << ptr.second.failover_peer[i] << " " ; } ss << std::endl; @@ -152,11 +149,13 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { for (auto& ptr : itr.second) { ss << "(gw-mon) NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id+1 << " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + int num_groups = Created_gws.size(); + for (int i = 0; i < num_groups; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } ss << " Failover peers: " << std::endl << " "; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + + for (int i = 0; i < num_groups; i++) { ss << ptr.second.failover_peer[i] << " " ; } ss << std::endl; @@ -168,9 +167,9 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { int NVMeofGwMap::_dump_created_gws(std::stringstream &ss)const { ss << __func__ << " called " << std::endl; - ss << "(gw-mon) "; + ss << "(gw-mon) Created GWs:"; for (auto& itr : Created_gws) { - ss << " gw :" << itr.gw_name << ", ana: " << itr.ana_grp_id ; + ss << " gw :" << itr.first << ", Ana-grp: " << itr.second.ana_grp_id ; } ss << std::endl; return 0; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index a4a60ecf052f..61cb4e6d2bda 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -44,6 +44,7 @@ using ceph::coarse_mono_clock; using GW_ID_T = std::string; +using NONCE_VECTOR_T = std::vector; typedef enum { GW_IDLE_STATE = 0, //invalid state @@ -77,13 +78,14 @@ typedef struct GW_METADATA_T { typedef struct { int ana_grp_id; - std::string gw_name; + NONCE_VECTOR_T nonces; } GW_CREATED_T; using GWMAP = std::map >; using GWMETADATA = std::map >; using SUBSYST_GWMAP = std::map; using SUBSYST_GWMETA = std::map; +using GW_CREATED = std::map; inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { for(int i = 0; i Created_gws; epoch_t epoch = 0; // epoch is for Paxos synchronization mechanizm bool delay_propose = false; @@ -149,8 +152,13 @@ class NVMeofGwMap //Encode created GWs encode ((int)Created_gws.size(), bl); for(auto &itr : Created_gws){ - encode(itr.gw_name, bl); - encode(itr.ana_grp_id, bl); + encode(itr.first, bl);// GW_id + const GW_CREATED_T * gw_created = &itr.second; + encode(gw_created->ana_grp_id, bl); + encode ((int)gw_created->nonces.size(), bl); // set number of elements in nonce list + for(auto &list_it : gw_created->nonces ){ + encode(list_it, bl); + } } encode ((int)Gmap.size(),bl); // number nqn for (auto& itr : Gmap) { @@ -180,11 +188,20 @@ class NVMeofGwMap decode(num_created_gws, bl); Created_gws.clear(); for(int i = 0; isecond.ana_grp_id; + return 0; + } + return -1; + } + + GW_CREATED_T* find_created_gw(const GW_ID_T &gw_id ) + { + auto it = Created_gws.find(gw_id); + if (it != Created_gws.end()) { + return &it->second; + } + return NULL; + } + + int update_gw_nonce(const GW_ID_T &gw_id , NONCE_VECTOR_T &new_nonces) + { + GW_CREATED_T* gw_created = find_created_gw(gw_id); + if (new_nonces.size() >0){ + gw_created->nonces.clear(); + gw_created->nonces.reserve(new_nonces.size()); + for( auto &it : new_nonces){ + gw_created->nonces.push_back(it); + } + } + return 0; } + GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) const { auto it = Gmap.find(nqn); diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 3c95ff8787b4..717a157e9c54 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -263,27 +263,25 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) mp._dump_gwmap(ss); dout(0) << ss.str() << dendl; ana_info ai; - if (map.epoch == 0) { // initial map - auto it = std::find_if(mp.Created_gws.begin(), mp.Created_gws.end(), - [&](const GW_CREATED_T& item) { - return item.gw_name == name; - }); - - // Check if we got this gateway name - if (it == mp.Created_gws.end()) { + if (map.epoch == 0){ // initial map + int ana_grp_id = -1; + if(mp.find_created_gw(name ,ana_grp_id) !=0 ) + { dout(0) << "Failed to find created gw for " << name << dendl; return; } - + std::stringstream ss1; + mp._dump_created_gws(ss1); + dout(0) << ss1.str() << dendl; bool set_group_id = false; while (!set_group_id) { NVMeofGwMonitorGroupClient monitor_group_client( grpc::CreateChannel(monitor_address, grpc::InsecureChannelCredentials())); - dout(0) << "GRPC set_group_id: " << it->ana_grp_id << dendl; - set_group_id = monitor_group_client.set_group_id(it->ana_grp_id); + dout(0) << "GRPC set_group_id: " << ana_grp_id << dendl; + set_group_id = monitor_group_client.set_group_id( ana_grp_id); if (!set_group_id) { - dout(0) << "GRPC set_group_id failed" << dendl; - usleep(1000); // TODO: conf options + dout(0) << "GRPC set_group_id failed" << dendl; + usleep(1000); // TODO: conf options } } } From f65dad6eca0e75c04aedc326040bc2ad144e7d13 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Sun, 24 Dec 2023 16:29:14 +0200 Subject: [PATCH 58/65] refactor nvme-gw mon command, per gw and includes id, pool, group Signed-off-by: Alexander Indenbaum --- src/mon/MonCommands.h | 25 +++++++++---------- src/mon/NVMeofGwMon.cc | 21 +++++++--------- src/pybind/mgr/cephadm/services/nvmeof.py | 23 +++++++++++++++++ .../services/nvmeof/ceph-nvmeof.conf.j2 | 2 +- .../ceph/deployment/service_spec.py | 2 +- 5 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 153e6df756be..a680e793ecdd 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1340,19 +1340,18 @@ COMMAND("config generate-minimal-conf", "config", "r") /* NVMeofGwMon*/ -COMMAND("nvme-gw create " - "name=ids,type=CephString,n=N", - "set gw(s) [...] create, " - "create gws inside subsystem", - "gw", "rw") -COMMAND("nvme-gw delete " - "name=ids,type=CephString,n=N", - "set gw(s) [...] delete, " - "delete gws inside subsystem", - "gw", "rw") - - - +COMMAND("nvme-gw create" + " name=id,type=CephString" + " name=pool,type=CephString" + " name=group,type=CephString", + "create nvmeof gateway id for (pool, group)", + "nvme-gw", "rw") +COMMAND("nvme-gw delete" + " name=id,type=CephString" + " name=pool,type=CephString" + " name=group,type=CephString", + "delete nvmeof gateway id for (pool, group)", + "nvme-gw", "rw") // these are tell commands that were implemented as CLI commands in // the broken pre-octopus way that we want to allow to work when a diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 80ab14acceda..9fb4f0035260 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -380,22 +380,19 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) dout(4) << "MonCommand : "<< prefix << dendl; bool map_modified = false; if( prefix == "nvme-gw create" || prefix == "nvme-gw delete" ) { - vector idvec; + std::string id, pool, group; + + cmd_getval(cmdmap, "id", id); + cmd_getval(cmdmap, "pool", pool); + cmd_getval(cmdmap, "group", group); - cmd_getval(cmdmap, "ids", idvec); if(prefix == "nvme-gw create"){ - for (unsigned i = 0; i < idvec.size(); i ++){ - rc = pending_map.cfg_add_gw( idvec[i] ); - ceph_assert(rc!= -EINVAL); - } + rc = pending_map.cfg_add_gw( id ); + ceph_assert(rc!= -EINVAL); } else{ - bool modified; - for (unsigned i = 0; i < idvec.size(); i ++){ - rc = pending_map.cfg_delete_gw( idvec[i], modified); - map_modified |= modified; - //ceph_assert(rc!= -EINVAL); - } + rc = pending_map.cfg_delete_gw( id, map_modified); + ceph_assert(rc!= -EINVAL); } if(map_modified){ propose_pending(); diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py index 8d785aa31584..1d957d85ae01 100644 --- a/src/pybind/mgr/cephadm/services/nvmeof.py +++ b/src/pybind/mgr/cephadm/services/nvmeof.py @@ -52,6 +52,16 @@ def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonD daemon_spec.extra_files = {'ceph-nvmeof.conf': gw_conf} daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) daemon_spec.deps = [] + # Notify monitor about this gateway creation + cmd = { + 'prefix': 'nvme-gw create', + 'id': name, + 'group': spec.group, + 'pool': spec.pool + } + _, _, err = self.mgr.mon_command(cmd) + if err: + self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}") return daemon_spec def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: @@ -83,6 +93,19 @@ def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None Called after the daemon is removed. """ logger.debug(f'Post remove daemon {self.TYPE}.{daemon.daemon_id}') + spec = cast(NvmeofServiceSpec, self.mgr.spec_store[daemon.service_name].spec) + name = '{}.{}'.format(utils.name_to_config_section('nvmeof'), daemon.daemon_id) + # Notify monitor about this gateway deletion + cmd = { + 'prefix': 'nvme-gw delete', + 'id': name, + 'group': spec.group, + 'pool': spec.pool + } + _, _, err = self.mgr.mon_command(cmd) + if err: + self.mgr.log.error(f"Unable to send monitor command {cmd}, error {err}") + # TODO: remove config for dashboard nvmeof gateways if any # and any certificates being used for mTLS diff --git a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 index 72a3e5839edc..69b8332cde39 100644 --- a/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2 @@ -1,7 +1,7 @@ # {{ cephadm_managed }} [gateway] name = {{ name }} -group = {{ spec.group if spec.group is not none else '' }} +group = {{ spec.group }} addr = {{ addr }} port = {{ port }} enable_auth = {{ spec.enable_auth }} diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 4181ee2563e4..0126f1957d5c 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -1168,7 +1168,7 @@ def __init__(self, #: ``name`` name of the nvmeof gateway self.name = name #: ``group`` name of the nvmeof gateway - self.group = group + self.group = group or '' #: ``enable_auth`` enables user authentication on nvmeof gateway self.enable_auth = enable_auth #: ``server_key`` gateway server key From c5cf8dd3421934cc23315203161a7faf3bdae7ef Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 25 Dec 2023 11:03:06 +0000 Subject: [PATCH 59/65] Added nonce-map under Created_gw map, modified encode/decode, dump . tested --- src/mon/NVMeofGwMap.cc | 26 +++++++--- src/mon/NVMeofGwMap.h | 109 +++++++++++++++++++++++++++++++---------- src/mon/NVMeofGwMon.cc | 18 ++++++- 3 files changed, 117 insertions(+), 36 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index d3b0d8bb2639..b77c6dde7de4 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -33,8 +33,9 @@ static std::string G_gw_ana_states[] = { }; int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { - GW_CREATED_T gw_created = {-1}; + GW_CREATED_T gw_created; bool allocated[MAX_SUPPORTED_ANA_GROUPS] = {false}; + gw_created.ana_grp_id = 0xff; for (auto& itr : Created_gws){ allocated[itr.second.ana_grp_id ] = true; @@ -50,7 +51,7 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { break; } } - if(gw_created.ana_grp_id == -1){ + if(gw_created.ana_grp_id == 0xff){ dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA groupId was not allocated " << dendl; return -EINVAL; } @@ -92,7 +93,7 @@ int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, bool & map_modified){ delete_metadata(gw_id, nqn); } } - Created_gws.erase(gw_id); + Created_gws.erase(gw_id);//TODO check whether ana map with nonce vector is destroyed properly - probably not. to handle! return 0; } @@ -153,7 +154,7 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { for (int i = 0; i < num_groups; i++) { ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; } - ss << " Failover peers: " << std::endl << " "; + ss << "(gw-mon) Failover peers: " << std::endl << " "; for (int i = 0; i < num_groups; i++) { ss << ptr.second.failover_peer[i] << " " ; @@ -161,22 +162,29 @@ int NVMeofGwMap::_dump_gwmap(std::stringstream &ss)const { ss << std::endl; } } - //dout(0) << ss.str() <sm_state[ANA_groupid] = GW_STANDBY_STATE; diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 61cb4e6d2bda..0383637b709a 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -44,8 +44,7 @@ using ceph::coarse_mono_clock; using GW_ID_T = std::string; -using NONCE_VECTOR_T = std::vector; - +using ANA_GRP_ID_T = uint16_t; typedef enum { GW_IDLE_STATE = 0, //invalid state GW_STANDBY_STATE, @@ -67,25 +66,33 @@ enum class GW_AVAILABILITY_E { typedef struct GW_STATE_T { GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group GW_ID_T failover_peer[MAX_SUPPORTED_ANA_GROUPS]; - uint16_t optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + ANA_GRP_ID_T optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable - uint64_t version; // version per all GWs of the same subsystem. subsystem version + uint64_t version; // version per all GWs of the same subsystem. subsystem version }GW_STATE_T; typedef struct GW_METADATA_T { int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state }GW_METADATA_T; -typedef struct { - int ana_grp_id; - NONCE_VECTOR_T nonces; -} GW_CREATED_T; using GWMAP = std::map >; using GWMETADATA = std::map >; using SUBSYST_GWMAP = std::map; using SUBSYST_GWMETA = std::map; -using GW_CREATED = std::map; + +using NONCE_VECTOR_T = std::vector; +using GW_ANA_NONCE_MAP = std::map ; + + +typedef struct { + ANA_GRP_ID_T ana_grp_id; // ana-group-id allocated for this GW, GW owns this group-id + GW_ANA_NONCE_MAP nonce_map; +} GW_CREATED_T; + +using GW_CREATED_MAP = std::map; + + inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { for(int i = 0; i ana_grp_id, bl); - encode ((int)gw_created->nonces.size(), bl); // set number of elements in nonce list - for(auto &list_it : gw_created->nonces ){ - encode(list_it, bl); - } + encode_nonces(gw_created->nonce_map, bl, full_encode);//TODO "if not full_encode" to prevent sending full nonce map to the clients } encode ((int)Gmap.size(),bl); // number nqn for (auto& itr : Gmap) { @@ -192,13 +236,8 @@ class NVMeofGwMap std::string gw_name; decode(gw_name, bl); decode(gw_created.ana_grp_id, bl); - int num_created_nonces; - decode(num_created_nonces, bl); - for(int i = 0; i < num_created_nonces; i++){ - std::string nonce; - decode(nonce, bl); - gw_created.nonces.push_back(nonce); - } + decode_nonces(gw_created.nonce_map, bl,full_decode); //TODO if not full_decode + Created_gws.insert({gw_name,(gw_created)}); } @@ -259,19 +298,37 @@ class NVMeofGwMap return NULL; } - int update_gw_nonce(const GW_ID_T &gw_id , NONCE_VECTOR_T &new_nonces) + int update_gw_nonce(const GW_ID_T &gw_id, ANA_GRP_ID_T &ana_grp_id, NONCE_VECTOR_T &new_nonces) { GW_CREATED_T* gw_created = find_created_gw(gw_id); if (new_nonces.size() >0){ - gw_created->nonces.clear(); - gw_created->nonces.reserve(new_nonces.size()); + GW_ANA_NONCE_MAP & nonce_map = gw_created->nonce_map; + if(nonce_map[ana_grp_id].size() == 0){ + nonce_map.insert({ana_grp_id, NONCE_VECTOR_T()}) ; + } + nonce_map[ana_grp_id].clear(); + nonce_map[ana_grp_id].reserve(new_nonces.size()); //gw_created->nonces.clear();// gw_created->nonces.reserve(new_nonces.size()); for( auto &it : new_nonces){ - gw_created->nonces.push_back(it); + nonce_map[ana_grp_id].push_back(it); //gw_created->nonces.push_back(it); } } return 0; } + int destroy_gw(const GW_ID_T &gw_id) + { + GW_CREATED_T* gw_created = find_created_gw(gw_id); + if( gw_created ) + { + GW_ANA_NONCE_MAP & nonce_map = gw_created->nonce_map; + for(auto &it : nonce_map){ + it.second.clear();// clear the nonce contexts + } + nonce_map.clear(); + } + return 0; + } + GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) const { diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 9fb4f0035260..57eb1784e9eb 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -54,10 +54,24 @@ void NVMeofGwMon::inject1(){ pending_map.cfg_add_gw("GW1" ); pending_map.cfg_add_gw("GW2" ); pending_map.cfg_add_gw("GW3" ); - - pending_map._dump_gwmap(pending_map.Gmap); + NONCE_VECTOR_T new_nonces = {"abc", "def","hij"}; + ANA_GRP_ID_T grp = 1; + pending_map.update_gw_nonce("GW1", grp, new_nonces); + grp = 2; + pending_map.update_gw_nonce("GW1", grp, new_nonces); + std::stringstream ss; + pending_map._dump_created_gws(ss); + dout(4) << ss.str() << dendl; + + //pending_map._dump_gwmap(pending_map.Gmap); pending_map.debug_encode_decode(); dout(4) << "Dump map after decode encode:" < Date: Mon, 25 Dec 2023 14:11:48 +0200 Subject: [PATCH 60/65] nvmeof gw beacon: add pool and group Signed-off-by: Alexander Indenbaum --- src/messages/MNVMeofGwBeacon.h | 20 ++++++++++++++++---- src/nvmeof/NVMeofGw.cc | 13 +++++++++++-- src/nvmeof/NVMeofGw.h | 2 ++ src/nvmeof/gateway | 2 +- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index d81098a76e1c..f1249b63d34a 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -78,6 +78,8 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { protected: std::string gw_id; + std::string gw_pool; + std::string gw_group; GwSubsystems subsystems; // gateway susbsystem and their state machine states GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable uint32_t version; @@ -87,13 +89,15 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { : PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION} {} - MNVMeofGwBeacon(const std::string &gw_id_, + MNVMeofGwBeacon(const std::string &gw_id_, + const std::string& gw_pool_, + const std::string& gw_group_, const GwSubsystems& subsystems_, const GW_AVAILABILITY_E& availability_, const uint32_t& version_ ) : PaxosServiceMessage{MSG_MNVMEOF_GW_BEACON, 0, HEAD_VERSION, COMPAT_VERSION}, - gw_id(gw_id_), subsystems(subsystems_), + gw_id(gw_id_), gw_pool(gw_pool_), gw_group(gw_group_), subsystems(subsystems_), availability(availability_), version(version_) {} @@ -110,11 +114,15 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { std::string_view get_type_name() const override { return "nvmeofgwbeacon"; } void print(std::ostream& out) const override { - out << get_type_name() << " nvmeofgw " << "(" << gw_id << ", susbsystems: [ "; + out << get_type_name() << + " nvmeofgw id: " << gw_id << + ", pool:" << gw_pool << + ", group:" << gw_group << + ", susbsystems: [ "; for (const NqnState& st: subsystems) { out << st << " "; } - out << "], " << "availability: " << availability << ", version:" << version; + out << "], availability: " << availability << ", version:" << version; } void encode_payload(uint64_t features) override { @@ -123,6 +131,8 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { using ceph::encode; paxos_encode(); encode(gw_id, payload); + encode(gw_pool, payload); + encode(gw_group, payload); encode((int)subsystems.size(), payload); for (const NqnState& st: subsystems) { encode(st.nqn, payload); @@ -140,6 +150,8 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { paxos_decode(p); decode(gw_id, p); + decode(gw_pool, p); + decode(gw_group, p); int n; int tmp; decode(n, p); diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 717a157e9c54..a3e858d08f5f 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -73,6 +73,10 @@ int NVMeofGw::init() break; } else if (ceph_argparse_witharg(args, i, &val, "--gateway-name", (char*)NULL)) { name = val; + } else if (ceph_argparse_witharg(args, i, &val, "--gateway-pool", (char*)NULL)) { + pool = val; + } else if (ceph_argparse_witharg(args, i, &val, "--gateway-group", (char*)NULL)) { + group = val; } else if (ceph_argparse_witharg(args, i, &val, "--gateway-address", (char*)NULL)) { gateway_address = val; } else if (ceph_argparse_witharg(args, i, &val, "--monitor-address", (char*)NULL)) { @@ -88,8 +92,11 @@ int NVMeofGw::init() } } - dout(0) << "gateway name: " << name << " address: " << gateway_address << dendl; - ceph_assert(name != "" && gateway_address != "" && monitor_address != ""); + dout(0) << "gateway name: " << name << + " pool:" << pool << + " group:" << group << + " address: " << gateway_address << dendl; + ceph_assert(name != "" && pool != "" && gateway_address != "" && monitor_address != ""); // todo ceph_assert(server_key == "" && server_cert == "" && client_cert == ""); @@ -204,6 +211,8 @@ void NVMeofGw::send_beacon() dout(0) << "sending beacon as gid " << monc.get_global_id() << " availability " << (int)gw_availability << dendl; auto m = ceph::make_message( name, + pool, + group, subs, gw_availability, map.epoch); diff --git a/src/nvmeof/NVMeofGw.h b/src/nvmeof/NVMeofGw.h index 9460a5369c83..af5844c82f8a 100644 --- a/src/nvmeof/NVMeofGw.h +++ b/src/nvmeof/NVMeofGw.h @@ -30,6 +30,8 @@ class NVMeofGw : public Dispatcher, public md_config_obs_t { private: std::string name; + std::string pool; + std::string group; std::string gateway_address; std::string monitor_address; std::string server_key; diff --git a/src/nvmeof/gateway b/src/nvmeof/gateway index 49c840e7ca4c..53098bdc90fd 160000 --- a/src/nvmeof/gateway +++ b/src/nvmeof/gateway @@ -1 +1 @@ -Subproject commit 49c840e7ca4c745742cc2fa504c0e33a012e57d2 +Subproject commit 53098bdc90fdaa45990b0a2ff38c4ed82b7c6e25 From afca4c0ea7eb4892b5134a70e88f6b4d563cc212 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 25 Dec 2023 18:00:30 +0000 Subject: [PATCH 61/65] process ceph config commands dump and set-log-level - first commit --- src/common/options/mon.yaml.in | 19 +++++++++++++++++++ src/mon/NVMeofGwMon.cc | 31 +++++++++++++++++++++++++++++++ src/mon/NVMeofGwMon.h | 7 ++++++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index a7f0849f9199..a62999dc63dc 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -1346,3 +1346,22 @@ options: with_legacy: true see_also: - osd_heartbeat_use_min_delay_socket +- name: nvmf_mon_log_level + type: int + level: advanced + desc: log level of the nvmeofMon + fmt_desc: Monitor will set the log level. + default: 5 + services: + - mon + with_legacy: true +- name: nvmf_mon_mapdump + type: bool + level: advanced + desc: dump maps of nvmeofMon + fmt_desc: Monitor will dump maps + default: false + services: + - mon + with_legacy: true + diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 57eb1784e9eb..48d9e3114419 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -200,6 +200,37 @@ void NVMeofGwMon::tick(){ } +const char **NVMeofGwMon::get_tracked_conf_keys() const +{ + static const char* KEYS[] = { + "nvmf_mon_mapdump", + "nvmf_mon_log_level", + //"rocksdb_cache_size", + NULL + }; + return KEYS; +} + +void NVMeofGwMon::handle_conf_change(const ConfigProxy& conf, + const std::set &changed) +{ + dout(4) << __func__ << " " << changed << dendl; + + if (changed.count("nvmef_gw_mapdump")) { + //_set_cache_autotuning(); + std::stringstream ss1; + pending_map._dump_gwmap(ss1); + + std::stringstream ss2; + pending_map._dump_created_gws(ss2); + + } + if (changed.count("nvmf_mon_log_level")){ + dout(4) << "TODO SET LOG LEVEL >= " << g_conf()->nvmf_mon_log_level << dendl; + } +} + + void NVMeofGwMon::create_pending(){ pending_map = map;// deep copy of the object diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index cca3e383f326..97aad7e03433 100755 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -16,7 +16,8 @@ #include "MonCommand.h" #include "NVMeofGwMap.h" -class NVMeofGwMon: public PaxosService +class NVMeofGwMon: public PaxosService, + public md_config_obs_t { NVMeofGwMap map; //NVMeGWMap NVMeofGwMap pending_map; @@ -39,6 +40,10 @@ class NVMeofGwMon: public PaxosService ~NVMeofGwMon() override {} + // config observer + const char** get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, const std::set &changed) override; + //const MgrMap &get_map() const { return map; } // bool in_use() const { return map.epoch > 0; } From 61ad449dc54ae67c2ad67024ee63d590ab733442 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Mon, 25 Dec 2023 22:29:53 +0000 Subject: [PATCH 62/65] Support in maps gw-name key = concatenation of id pool group --- src/messages/MNVMeofGwBeacon.h | 2 + src/mon/NVMeofGwMap.cc | 114 ++++++++++++++++++--------------- src/mon/NVMeofGwMap.h | 19 +++++- src/mon/NVMeofGwMon.cc | 64 +++++++++--------- src/nvmeof/NVMeofGw.cc | 11 ++-- 5 files changed, 122 insertions(+), 88 deletions(-) diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index f1249b63d34a..114c0cbd6c01 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -102,6 +102,8 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { {} const std::string& get_gw_id() const { return gw_id; } + const std::string& get_gw_pool() const { return gw_pool; } + const std::string& get_gw_group() const { return gw_group; } const GW_AVAILABILITY_E& get_availability() const { return availability; } const uint32_t& get_version() const { return version; } const GwSubsystems& get_subsystems() const { return subsystems; }; diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index b77c6dde7de4..b3e3c6db8ca3 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -32,15 +32,21 @@ static std::string G_gw_ana_states[] = { "WAIT_FLBACK_RDY" }; -int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { +int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group) { GW_CREATED_T gw_created; bool allocated[MAX_SUPPORTED_ANA_GROUPS] = {false}; gw_created.ana_grp_id = 0xff; + std::string gw_name; + std::string gw_preffix; + NVMeofGwMap::gw_preffix_from_id_pool_group (gw_preffix, pool, group ); + NVMeofGwMap::gw_name_from_id_pool_group (gw_name, gw_id , pool, group ); for (auto& itr : Created_gws){ - allocated[itr.second.ana_grp_id ] = true; - if(itr.first == gw_id){ - dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; + // Allocate ana_grp_ids per pool + group pair + if((itr.first.find(gw_preffix) != std::string::npos)) // gw_name contains ".pool.group" string + allocated[itr.second.ana_grp_id ] = true; + if(itr.first == gw_name){ + dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_name << dendl; return -EEXIST ; } } @@ -52,12 +58,12 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { } } if(gw_created.ana_grp_id == 0xff){ - dout(4) << __func__ << " ERROR create GW: " << gw_id << " ANA groupId was not allocated " << dendl; + dout(4) << __func__ << " ERROR create GW: " << gw_name << " ANA groupId was not allocated " << dendl; return -EINVAL; } - Created_gws.insert({gw_id, gw_created}); - dout(4) << __func__ << "Created GW: " << gw_id << " grpid " << gw_created.ana_grp_id << dendl; + Created_gws.insert({gw_name, gw_created}); + dout(4) << __func__ << "Created GW: " << gw_name << " grpid " << gw_created.ana_grp_id << dendl; std::stringstream ss; _dump_created_gws(ss); dout(4) << ss.str() << dendl; @@ -65,13 +71,17 @@ int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id) { } -int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, bool & map_modified){ +int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group, bool & map_modified){ GW_STATE_T * state; int ana_grp_id = 0; - if(find_created_gw(gw_id, ana_grp_id) != 0) + std::string gw_name; + + NVMeofGwMap::gw_name_from_id_pool_group (gw_name, gw_id , pool, group ); + + if(find_created_gw(gw_name, ana_grp_id) != 0) { - dout(4) << __func__ << " ERROR :GW was not created " << gw_id << dendl; + dout(4) << __func__ << " ERROR :GW was not created " << gw_name << dendl; return -ENODEV ; } // traverse the GMap , find gw in the map for all nqns @@ -82,33 +92,33 @@ int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, bool & map_modified){ GW_ID_T found_gw_id = ptr.first; const std::string& nqn = itr.first; state = &ptr.second; - if (gw_id == found_gw_id) { // GW was created + if (gw_name == found_gw_id) { // GW was created bool modified = false; for(int i=0; ism_state[i], i, modified); + fsm_handle_gw_delete (gw_name, nqn, state->sm_state[i], i, modified); map_modified |= modified; } - dout(4) << " Delete GW :"<< gw_id << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; - Gmap[itr.first].erase(gw_id); - delete_metadata(gw_id, nqn); + dout(4) << " Delete GW :"<< gw_name << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; + Gmap[itr.first].erase(gw_name); + delete_metadata(gw_name, nqn); } } - Created_gws.erase(gw_id);//TODO check whether ana map with nonce vector is destroyed properly - probably not. to handle! + Created_gws.erase(gw_name);//TODO check whether ana map with nonce vector is destroyed properly - probably not. to handle! return 0; } -GW_METADATA_T* NVMeofGwMap::find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn) +GW_METADATA_T* NVMeofGwMap::find_gw_metadata(const GW_ID_T &gw_name, const std::string& nqn) { auto it = Gmetadata.find(nqn); if (it != Gmetadata.end() ) { - auto it2 = it->second.find(gw_id); + auto it2 = it->second.find(gw_name); if (it2 != it->second.end() ) { return &it2->second; } else{ - dout(4) << __func__ << " not found by gw id " << gw_id << dendl; + dout(4) << __func__ << " not found by gw id " << gw_name << dendl; } } else{ @@ -206,36 +216,36 @@ int NVMeofGwMap:: update_active_timers( bool &propose_pending ){ } -int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending) +int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_name, const std::string& nqn, bool &propose_pending) { int rc = 0; int i; - GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + GW_STATE_T* gw_state = find_gw_map(gw_name, nqn); if (gw_state) { - dout(4) << "GW down " << gw_id << " nqn " <availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i ++) { bool map_modified; - fsm_handle_gw_down (gw_id, nqn, gw_state->sm_state[i], i, map_modified); + fsm_handle_gw_down (gw_name, nqn, gw_state->sm_state[i], i, map_modified); if(map_modified) propose_pending = true; set_gw_standby_state(gw_state, i); } } else { - dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_name << dendl; rc = 1; } return rc; } -int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending) +int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_name, const std::string& nqn , bool &propose_pending) { int rc = 0; #define FAILBACK_PERSISTENCY_INT_SEC 8 - GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + GW_STATE_T* gw_state = find_gw_map(gw_name, nqn); if (gw_state) { - dout(4) << "KA beacon from the GW " << gw_id << " in state " << (int)gw_state->availability << dendl; + dout(4) << "KA beacon from the GW " << gw_name << " in state " << (int)gw_state->availability << dendl; if (gw_state->availability == GW_AVAILABILITY_E::GW_CREATED) { // first time appears - allow IO traffic for this GW @@ -258,9 +268,9 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn // find the GW that took over on the group gw_state->optimized_ana_group_id bool some_found = false; propose_pending = true; - find_failback_gw(gw_id, nqn, gw_state, some_found); + find_failback_gw(gw_name, nqn, gw_state, some_found); if (!some_found ) { // There is start of single GW so immediately turn its group to GW_ACTIVE_STATE - dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << " that took over the GW " << gw_id << "when it was fallen" << dendl; + dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << " that took over the GW " << gw_name << "when it was fallen" << dendl; gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; } } @@ -268,7 +278,7 @@ int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const std::string& nqn // if GW remains AVAILABLE need to handle failback Timers , this is handled separately } else{ - dout(4) << __func__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + dout(4) << __func__ << "ERROR GW-id was not found in the map " << gw_name << dendl; rc = 1; ceph_assert(false); } @@ -351,18 +361,18 @@ int NVMeofGwMap::handle_removed_subsystems (const std::vector &cre return 0; } -int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &failed_gw_id, const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid) +int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &failed_gw_id, const GW_ID_T &gw_name, const std::string& nqn, uint8_t ANA_groupid) { - GW_STATE_T* gw_state = find_gw_map(gw_id, nqn); + GW_STATE_T* gw_state = find_gw_map(gw_name, nqn); gw_state->sm_state[ANA_groupid] = GW_ACTIVE_STATE; gw_state->failover_peer[ANA_groupid] = failed_gw_id; //publish_map_to_gws(nqn); - dout(4) << "Set failower GW " << gw_id << " for ANA group " << (int)ANA_groupid << dendl; + dout(4) << "Set failower GW " << gw_name << " for ANA group " << (int)ANA_groupid << dendl; return 0; } -int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &some_found) +int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_name, const std::string& nqn, GW_STATE_T* gw_state, bool &some_found) { auto subsyst_it = find_subsystem_map(nqn); bool found_some_gw = false; @@ -370,9 +380,9 @@ int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_id, const std::string& nqn, for (auto& itr : *subsyst_it) { //cout << "Found GW " << itr.second.gw_id << endl; if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { - ceph_assert(itr.second.failover_peer[gw_state->optimized_ana_group_id] == gw_id); + ceph_assert(itr.second.failover_peer[gw_state->optimized_ana_group_id] == gw_name); - dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_id << dendl; + dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_name << dendl; itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; @@ -442,7 +452,7 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin } - int NVMeofGwMap::fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state , int grpid, bool &map_modified) + int NVMeofGwMap::fsm_handle_gw_down (const GW_ID_T &gw_name, const std::string& nqn, GW_STATES_PER_AGROUP_E state , int grpid, bool &map_modified) { switch (state) { @@ -453,12 +463,12 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin case GW_WAIT_FAILBACK_PREPARED: { - cancel_timer(gw_id, nqn, grpid); + cancel_timer(gw_name, nqn, grpid); auto subsyst_it = find_subsystem_map(nqn); for (auto& itr : *subsyst_it){ if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER) // found GW that was intended for Failback for this ana grp { - dout(4) << "Warning: Outgoing Failback when GW is down back - to rollback it" << nqn <<" GW " <optimized_ana_group_id) {// Try to find GW that temporary owns my group - if found, this GW should pass to standby for this group auto subsyst_it = find_subsystem_map(nqn); for (auto& itr : *subsyst_it){ @@ -514,12 +524,12 @@ int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::strin case GW_WAIT_FAILBACK_PREPARED: { - cancel_timer(gw_id, nqn, grpid); + cancel_timer(gw_name, nqn, grpid); auto subsyst_it = find_subsystem_map(nqn); for (auto& itr : *subsyst_it){ if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER) // found GW that was intended for Failback for this ana grp { - dout(4) << "Warning: Outgoing Failback when GW is deleted - to rollback it" << nqn <<" GW " <sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED) { - dout(4) << "Expired Failback timer from GW " << gw_id << " ANA groupId "<< grpid << dendl; + dout(4) << "Expired Failback timer from GW " << gw_name << " ANA groupId "<< grpid << dendl; - cancel_timer(gw_id, nqn, grpid); + cancel_timer(gw_name, nqn, grpid); for (auto& itr : *subsyst_it) { if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { set_gw_standby_state(gw_state, grpid); itr.second.sm_state[grpid] = GW_ACTIVE_STATE; - dout(4) << "Failback from GW " << gw_id << " to " << itr.first << dendl; + dout(4) << "Failback from GW " << gw_name << " to " << itr.first << dendl; map_modified = true; break; } @@ -568,7 +578,7 @@ int NVMeofGwMap::fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& dout(4) << "Failback unsuccessfull. GW: " << itr.first << "becomes Active for the ana group " << grpid << dendl; } set_gw_standby_state(gw_state, grpid); - dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << grpid << dendl; + dout(4) << "Failback unsuccessfull GW: " << gw_name << "becomes standby for the ana group " << grpid << dendl; map_modified = true; break; } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 0383637b709a..348bd3bee237 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -301,6 +301,8 @@ class NVMeofGwMap int update_gw_nonce(const GW_ID_T &gw_id, ANA_GRP_ID_T &ana_grp_id, NONCE_VECTOR_T &new_nonces) { GW_CREATED_T* gw_created = find_created_gw(gw_id); + if(!gw_created) + return 1; if (new_nonces.size() >0){ GW_ANA_NONCE_MAP & nonce_map = gw_created->nonce_map; if(nonce_map[ana_grp_id].size() == 0){ @@ -325,8 +327,11 @@ class NVMeofGwMap it.second.clear();// clear the nonce contexts } nonce_map.clear(); + return 0; } - return 0; + else + return 1; + } @@ -359,13 +364,21 @@ class NVMeofGwMap int _dump_gwmap(GWMAP & Gmap)const; int _dump_gwmap(std::stringstream &ss)const ; int _dump_created_gws(std::stringstream &ss)const ; - int cfg_add_gw (const GW_ID_T &gw_id); - int cfg_delete_gw (const GW_ID_T &gw_id, bool &propose_pending); + int cfg_add_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group); + int cfg_delete_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group, bool &propose_pending); int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); int handle_abandoned_ana_groups (bool &propose_pending); int handle_removed_subsystems (const std::vector &created_subsystems, bool &propose_pending); + //make these functions static + static void gw_name_from_id_pool_group (std::string &gw_name , const std::string &gw_id ,const std::string &gw_pool, const std::string &gw_group ){ + gw_name = gw_id + "." + gw_pool + "." + gw_group; + } + static void gw_preffix_from_id_pool_group (std::string &gw_preffix ,const std::string &gw_pool, const std::string &gw_group ){ + gw_preffix = "." + gw_pool + "." + gw_group; + } + void debug_encode_decode(){ ceph::buffer::list bl; encode(bl); diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 48d9e3114419..9acdb1bf3a46 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -51,14 +51,14 @@ static int cnt ; void NVMeofGwMon::inject1(){ //bool propose = false; if( ++cnt == 4 ){// simulation that new configuration was added - pending_map.cfg_add_gw("GW1" ); - pending_map.cfg_add_gw("GW2" ); - pending_map.cfg_add_gw("GW3" ); + pending_map.cfg_add_gw("GW1" ,"g1","p1"); + pending_map.cfg_add_gw("GW2" ,"g1","p1"); + pending_map.cfg_add_gw("GW3" ,"g1","p1"); NONCE_VECTOR_T new_nonces = {"abc", "def","hij"}; ANA_GRP_ID_T grp = 1; - pending_map.update_gw_nonce("GW1", grp, new_nonces); + pending_map.update_gw_nonce("GW1.g1.p1", grp, new_nonces); grp = 2; - pending_map.update_gw_nonce("GW1", grp, new_nonces); + pending_map.update_gw_nonce("GW1.g1.p1", grp, new_nonces); std::stringstream ss; pending_map._dump_created_gws(ss); dout(4) << ss.str() << dendl; @@ -141,7 +141,7 @@ void NVMeofGwMon::tick(){ } bool _propose_pending = false; - //inject1(); + inject1(); const auto now = ceph::coarse_mono_clock::now(); const auto nvmegw_beacon_grace = g_conf().get_val("mon_nvmeofgw_beacon_grace"); dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; @@ -172,12 +172,12 @@ void NVMeofGwMon::tick(){ const auto cutoff = now - nvmegw_beacon_grace; for(auto &itr : last_beacon){// Pass over all the stored beacons auto last_beacon_time = itr.second; - GW_ID_T gw_id; + GW_ID_T gw_name; std::string nqn; if(last_beacon_time < cutoff){ - get_gw_and_nqn_from_key(itr.first, gw_id, nqn); - dout(4) << "beacon timeout for GW " << gw_id << " nqn " << nqn << dendl; - pending_map.process_gw_map_gw_down( gw_id, nqn, propose); + get_gw_and_nqn_from_key(itr.first, gw_name, nqn); + dout(4) << "beacon timeout for GW " << gw_name << " nqn " << nqn << dendl; + pending_map.process_gw_map_gw_down( gw_name, nqn, propose); _propose_pending |= propose; last_beacon.erase(itr.first); } @@ -432,11 +432,12 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) cmd_getval(cmdmap, "group", group); if(prefix == "nvme-gw create"){ - rc = pending_map.cfg_add_gw( id ); + rc = pending_map.cfg_add_gw(id ,pool , group); ceph_assert(rc!= -EINVAL); + map_modified = true; } else{ - rc = pending_map.cfg_delete_gw( id, map_modified); + rc = pending_map.cfg_delete_gw(id, pool, group, map_modified);// TODO add params ceph_assert(rc!= -EINVAL); } if(map_modified){ @@ -483,11 +484,11 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ #define GW_DELIM ',' -void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_id , std::string& nqn) +void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_name , std::string& nqn) { std::stringstream s1(key); - std::getline(s1, gw_id, GW_DELIM); + std::getline(s1, gw_name, GW_DELIM); std::getline(s1, nqn, GW_DELIM); } @@ -504,23 +505,28 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << out.str() <get_gw_id(); + GW_ID_T pool = m->get_gw_pool(); + GW_ID_T group = m->get_gw_group(); GW_AVAILABILITY_E avail = m->get_availability(); const GwSubsystems& subsystems = m->get_subsystems(); bool propose = false; int ana_grp_id = 0; std::vector configured_subsystems; + std::string gw_name; + pending_map.gw_name_from_id_pool_group(gw_name , gw_id , pool, group ); + if (avail == GW_AVAILABILITY_E::GW_CREATED){ // in this special state GWs receive map with just "created_gws" vector - if(pending_map.find_created_gw(gw_id, ana_grp_id) == 0) {// GW is created administratively - dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP" << dendl; + if(pending_map.find_created_gw(gw_name, ana_grp_id) == 0) {// GW is created administratively + dout(4) << "GW " << gw_name << " sent beacon being in state GW_WAIT_INITIAL_MAP" << dendl; propose = true; } else{ - dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; + dout(4) << "GW " << gw_name << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; #ifdef BYPASS_GW_CREATE_CLI - pending_map.cfg_add_gw(gw_id); - dout(4) << "GW " << gw_id << " created since mode is bypass-create-cli "<< dendl; + pending_map.cfg_add_gw(gw_name); + dout(4) << "GW " << gw_name << " created since mode is bypass-create-cli "<< dendl; propose= true; #endif } @@ -530,14 +536,14 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ // Validation gw is in the database for (const NqnState &st : subsystems) { - gw_state = pending_map.find_gw_map( gw_id, st.nqn ); + gw_state = pending_map.find_gw_map( gw_name, st.nqn ); if (gw_state == NULL) { - dout(4) << "GW + NQN pair is not in the database: " << gw_id << " " << st.nqn << dendl; + dout(4) << "GW + NQN pair is not in the database: " << gw_name << " " << st.nqn << dendl; // if GW is created - if(pending_map.find_created_gw(gw_id, ana_grp_id) == 0) {// GW is created administratively - pending_map.insert_gw_to_map(gw_id, st.nqn, ana_grp_id); - dout(4) << "GW + NQN pair " << gw_id << " " << st.nqn << " inserted to map, ANA grp-id " << ana_grp_id << dendl; + if(pending_map.find_created_gw(gw_name, ana_grp_id) == 0) {// GW is created administratively + pending_map.insert_gw_to_map(gw_name, st.nqn, ana_grp_id); + dout(4) << "GW + NQN pair " << gw_name << " " << st.nqn << " inserted to map, ANA grp-id " << ana_grp_id << dendl; } else { //drop beacon on the floor silently discard @@ -554,8 +560,8 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ // check pending_map.epoch vs m->get_version() - if different - drop the beacon for (const NqnState& st: subsystems) { - last_beacon[(gw_id + GW_DELIM + st.nqn)] = now; - pending_map.process_gw_map_ka( gw_id, st.nqn, propose ); + last_beacon[(gw_name + GW_DELIM + st.nqn)] = now; + pending_map.process_gw_map_ka( gw_name, st.nqn, propose ); } } else if(avail == GW_AVAILABILITY_E::GW_UNAVAILABLE){ // state set by GW client application @@ -563,10 +569,10 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ for (const NqnState& st: subsystems) { - auto it = last_beacon.find(gw_id + GW_DELIM + st.nqn); + auto it = last_beacon.find(gw_name + GW_DELIM + st.nqn); if (it != last_beacon.end()){ - last_beacon.erase(gw_id + GW_DELIM + st.nqn); - pending_map.process_gw_map_gw_down( gw_id, st.nqn, propose ); + last_beacon.erase(gw_name + GW_DELIM + st.nqn); + pending_map.process_gw_map_gw_down( gw_name, st.nqn, propose ); } } } diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index a3e858d08f5f..3f2ac6385d6a 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -272,11 +272,14 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) mp._dump_gwmap(ss); dout(0) << ss.str() << dendl; ana_info ai; + std::string gw_name; + NVMeofGwMap::gw_name_from_id_pool_group(gw_name , name , pool, group); if (map.epoch == 0){ // initial map int ana_grp_id = -1; - if(mp.find_created_gw(name ,ana_grp_id) !=0 ) + + if(mp.find_created_gw(gw_name ,ana_grp_id) !=0) { - dout(0) << "Failed to find created gw for " << name << dendl; + dout(0) << "Failed to find created gw for " << gw_name << dendl; return; } std::stringstream ss1; @@ -303,13 +306,13 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) nas.set_nqn(nqn); // This gateway state for the current subsystem / nqn - const auto& new_gateway_state = idStateMap.find(name); + const auto& new_gateway_state = idStateMap.find(gw_name); // There is no subsystem update for this gateway if (new_gateway_state == idStateMap.end()) continue; // Previously monitor distributed state - GW_STATE_T* old_gw_state = map.find_gw_map(name, nqn); + GW_STATE_T* old_gw_state = map.find_gw_map(gw_name, nqn); // Iterate over possible ANA Groups for (uint32_t ana_grp_index = 0; ana_grp_index < MAX_SUPPORTED_ANA_GROUPS; ana_grp_index++) { From 753e6d38b1e030bb0ea0ffeb281ade1bb0a878ae Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Tue, 26 Dec 2023 15:23:55 +0200 Subject: [PATCH 63/65] Multi gateway group refactoring Cleanup, extract - src/mon/NVMeofGwSerialize.h - src/mon/NVMeofGwTypes.h Signed-off-by: Alexander Indenbaum --- src/messages/MNVMeofGwBeacon.h | 61 --- src/mon/NVMeofGwMap.cc | 754 ++++++++++++++------------------- src/mon/NVMeofGwMap.h | 481 +++------------------ src/mon/NVMeofGwMon.cc | 219 +++------- src/mon/NVMeofGwMon.h | 50 +-- src/mon/NVMeofGwSerialize.h | 311 ++++++++++++++ src/mon/NVMeofGwTypes.h | 108 +++++ src/nvmeof/NVMeofGw.cc | 120 +++--- 8 files changed, 919 insertions(+), 1185 deletions(-) create mode 100755 src/mon/NVMeofGwSerialize.h create mode 100755 src/mon/NVMeofGwTypes.h diff --git a/src/messages/MNVMeofGwBeacon.h b/src/messages/MNVMeofGwBeacon.h index 114c0cbd6c01..0735e57fa149 100644 --- a/src/messages/MNVMeofGwBeacon.h +++ b/src/messages/MNVMeofGwBeacon.h @@ -22,55 +22,6 @@ #include "mon/NVMeofGwMap.h" #include "include/types.h" - -typedef GW_STATES_PER_AGROUP_E SM_STATE[MAX_SUPPORTED_ANA_GROUPS]; -struct NqnState { - std::string nqn; // subsystem NQN - SM_STATE sm_state; // susbsystem's state machine state - uint16_t opt_ana_gid; // optimized ANA group index - - // Default constructor - NqnState(const std::string& _nqn) : nqn(_nqn), opt_ana_gid(0) { - for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - sm_state[i] = GW_STATES_PER_AGROUP_E::GW_IDLE_STATE; - } -}; - -typedef std::vector GwSubsystems; - -std::ostream& operator<<(std::ostream& os, const SM_STATE value) { - os << "SM_STATE [ "; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { - switch (value[i]) { - case GW_STATES_PER_AGROUP_E::GW_IDLE_STATE: os << "IDLE "; break; - case GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE: os << "STANDBY "; break; - case GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE: os << "ACTIVE "; break; - case GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER: os << "BLOCKED_AGROUP_OWNER "; break; - case GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED: os << "WAIT_FAILBACK_PREPARED "; break; - default: os << "Invalid " << (int)value[i] << " "; - } - } - os << "]"; - return os; -} - -std::ostream& operator<<(std::ostream& os, const NqnState value) { - os << "Subsystem( nqn: " << value.nqn << ", ANAGrpId: " << value.opt_ana_gid << ", " << value.sm_state << " )"; - return os; -} - -std::ostream& operator<<(std::ostream& os, const GW_AVAILABILITY_E value) { - switch (value) { - - case GW_AVAILABILITY_E::GW_CREATED: os << "CREATED"; break; - case GW_AVAILABILITY_E::GW_AVAILABLE: os << "AVAILABLE"; break; - case GW_AVAILABILITY_E::GW_UNAVAILABLE: os << "UNAVAILABLE"; break; - - default: os << "Invalid " << (int)value << " "; - } - return os; -} - class MNVMeofGwBeacon final : public PaxosServiceMessage { private: static constexpr int HEAD_VERSION = 1; @@ -115,18 +66,6 @@ class MNVMeofGwBeacon final : public PaxosServiceMessage { std::string_view get_type_name() const override { return "nvmeofgwbeacon"; } - void print(std::ostream& out) const override { - out << get_type_name() << - " nvmeofgw id: " << gw_id << - ", pool:" << gw_pool << - ", group:" << gw_group << - ", susbsystems: [ "; - for (const NqnState& st: subsystems) { - out << st << " "; - } - out << "], availability: " << availability << ", version:" << version; - } - void encode_payload(uint64_t features) override { header.version = HEAD_VERSION; header.compat_version = COMPAT_VERSION; diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index b3e3c6db8ca3..fe2c3dcca36d 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -1,7 +1,7 @@ - #include #include "include/stringify.h" #include "NVMeofGwMon.h" +#include "NVMeofGwMap.h" using std::map; using std::make_pair; @@ -32,360 +32,247 @@ static std::string G_gw_ana_states[] = { "WAIT_FLBACK_RDY" }; -int NVMeofGwMap::cfg_add_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group) { - GW_CREATED_T gw_created; - bool allocated[MAX_SUPPORTED_ANA_GROUPS] = {false}; - gw_created.ana_grp_id = 0xff; - std::string gw_name; - std::string gw_preffix; - NVMeofGwMap::gw_preffix_from_id_pool_group (gw_preffix, pool, group ); - NVMeofGwMap::gw_name_from_id_pool_group (gw_name, gw_id , pool, group ); - - for (auto& itr : Created_gws){ - // Allocate ana_grp_ids per pool + group pair - if((itr.first.find(gw_preffix) != std::string::npos)) // gw_name contains ".pool.group" string - allocated[itr.second.ana_grp_id ] = true; - if(itr.first == gw_name){ - dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_name << dendl; - return -EEXIST ; - } - } - - for(int i=0; i<=MAX_SUPPORTED_ANA_GROUPS; i++){ - if (allocated[i] == false){ - gw_created.ana_grp_id = i; - break; - } - } - if(gw_created.ana_grp_id == 0xff){ - dout(4) << __func__ << " ERROR create GW: " << gw_name << " ANA groupId was not allocated " << dendl; - return -EINVAL; - } - - Created_gws.insert({gw_name, gw_created}); - dout(4) << __func__ << "Created GW: " << gw_name << " grpid " << gw_created.ana_grp_id << dendl; - std::stringstream ss; - _dump_created_gws(ss); - dout(4) << ss.str() << dendl; - return 0; -} - - -int NVMeofGwMap::cfg_delete_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group, bool & map_modified){ - - GW_STATE_T * state; - int ana_grp_id = 0; - std::string gw_name; - - NVMeofGwMap::gw_name_from_id_pool_group (gw_name, gw_id , pool, group ); - - if(find_created_gw(gw_name, ana_grp_id) != 0) - { - dout(4) << __func__ << " ERROR :GW was not created " << gw_name << dendl; - return -ENODEV ; - } - // traverse the GMap , find gw in the map for all nqns - - map_modified = false; - for (auto& itr : Gmap) - for (auto& ptr : itr.second) { - GW_ID_T found_gw_id = ptr.first; - const std::string& nqn = itr.first; - state = &ptr.second; - if (gw_name == found_gw_id) { // GW was created - bool modified = false; - for(int i=0; ism_state[i], i, modified); - map_modified |= modified; - } - dout(4) << " Delete GW :"<< gw_name << "nqn " << nqn << " ANA grpid: " << state->optimized_ana_group_id << dendl; - Gmap[itr.first].erase(gw_name); - delete_metadata(gw_name, nqn); - } - } - Created_gws.erase(gw_name);//TODO check whether ana map with nonce vector is destroyed properly - probably not. to handle! - - return 0; -} - - -GW_METADATA_T* NVMeofGwMap::find_gw_metadata(const GW_ID_T &gw_name, const std::string& nqn) -{ - auto it = Gmetadata.find(nqn); - if (it != Gmetadata.end() ) { - auto it2 = it->second.find(gw_name); - if (it2 != it->second.end() ) { - return &it2->second; - } - else{ - dout(4) << __func__ << " not found by gw id " << gw_name << dendl; +int NVMeofGwMap::cfg_add_gw(const GW_ID_T &gw_id, const GROUP_KEY& group_key) { + // Calculate allocated group bitmask + bool allocated[MAX_SUPPORTED_ANA_GROUPS] = {false}; + for (auto& itr: Created_gws[group_key]) { + allocated[itr.second.ana_grp_id] = true; + if(itr.first == gw_id) { + dout(4) << __func__ << " ERROR create GW: already exists in map " << gw_id << dendl; + return -EEXIST ; } } - else{ - dout(4) << __func__ << " not found by nqn " << nqn << dendl; - } - return NULL; -} - -int NVMeofGwMap::_dump_gwmap(GWMAP & Gmap)const { + // Allocate the new group id + for(int i=0; i<=MAX_SUPPORTED_ANA_GROUPS; i++) { + if (allocated[i] == false) { + GW_CREATED_T gw_created(i); + Created_gws[group_key][gw_id] = gw_created; - dout(0) << __func__ << " called " << mon << dendl; - std::ostringstream ss; - ss << std::endl; - for (auto& itr : Gmap) { - for (auto& ptr : itr.second) { - - ss << "(gw-mon) NQN " << itr.first << " GW_ID " << ptr.first << " ANA gr " << std::setw(5) << (int)ptr.second.optimized_ana_group_id+1 << - " available :" << G_gw_avail[(int)ptr.second.availability] << " States: "; - int num_groups = Created_gws.size(); - for (int i = 0; i < num_groups; i++) { - ss << G_gw_ana_states[(int)ptr.second.sm_state[i]] << " " ; - } - ss << " Failover peers: " << std::endl << " "; - - for (int i = 0; i < num_groups; i++) { - ss << ptr.second.failover_peer[i] << " " ; - } - ss << std::endl; + dout(4) << __func__ << "Created GW: " << gw_id << " pool " << group_key.first << "group" << group_key.second + << " grpid " << gw_created.ana_grp_id << dendl; + return 0; } } - dout(0) << ss.str() <second; + for(int i=0; ianagrp_sm_tstamps[i] != INVALID_GW_TIMER){ - metadata->anagrp_sm_tstamps[i] ++; - dout(4) << "timer for GW " << ptr.first << " ANA GRP " << i<<" :" << metadata->anagrp_sm_tstamps[i] <anagrp_sm_tstamps[i] >= 2){//TODO define - fsm_handle_to_expired (ptr.first, itr.first, i, propose_pending); + for (auto& group_md: Gmetadata) { + auto& group_key = group_md.first; + auto& pool = group_key.first; + auto& group = group_key.second; + + for (auto& nqn_md: group_md.second) { + auto& nqn = nqn_md.first; + for (auto& gw_md: nqn_md.second) { + auto& gw_id = gw_md.first; + auto& md = gw_md.second; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { + if (md.anagrp_sm_tstamps[i] == INVALID_GW_TIMER) continue; + + md.anagrp_sm_tstamps[i]++; + dout(4) << "timer for GW " << gw_id << " ANA GRP " << i<<" :" << md.anagrp_sm_tstamps[i] <= 2){//TODO define + fsm_handle_to_expired (gw_id, std::make_pair(pool, group), nqn, i, propose_pending); } } } } } - return 0; } -int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_name, const std::string& nqn, bool &propose_pending) -{ +int NVMeofGwMap::process_gw_map_gw_down(const GW_ID_T &gw_id, const GROUP_KEY& group_key, + const NQN_ID_T& nqn, bool &propose_pending) { int rc = 0; - int i; - GW_STATE_T* gw_state = find_gw_map(gw_name, nqn); - if (gw_state) { - dout(4) << "GW down " << gw_name << " nqn " <availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; - for (i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i ++) { - bool map_modified; - fsm_handle_gw_down (gw_name, nqn, gw_state->sm_state[i], i, map_modified); - if(map_modified) propose_pending = true; - set_gw_standby_state(gw_state, i); + auto& nqn_gws_states = Gmap[group_key][nqn]; + auto gw_state = nqn_gws_states.find(gw_id); + if (gw_state != nqn_gws_states.end()) { + dout(4) << "GW down " << gw_id << " nqn " <second; + st.availability = GW_AVAILABILITY_E::GW_UNAVAILABLE; + for (ANA_GRP_ID_T i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i ++) { + fsm_handle_gw_down (gw_id, group_key, nqn, st.sm_state[i], i, propose_pending); + st.standby_state(i); } } else { - dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_name << dendl; - rc = 1; + dout(4) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl; + rc = -EINVAL; } return rc; } -int NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_name, const std::string& nqn , bool &propose_pending) +void NVMeofGwMap::process_gw_map_ka(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn , bool &propose_pending) { - int rc = 0; + #define FAILBACK_PERSISTENCY_INT_SEC 8 - GW_STATE_T* gw_state = find_gw_map(gw_name, nqn); - if (gw_state) { - dout(4) << "KA beacon from the GW " << gw_name << " in state " << (int)gw_state->availability << dendl; - - if (gw_state->availability == GW_AVAILABILITY_E::GW_CREATED) { - // first time appears - allow IO traffic for this GW - gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) gw_state->sm_state[i] = GW_STANDBY_STATE; - if (gw_state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { // not a redundand GW - gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; - } - propose_pending = true; + auto& nqn_gws_states = Gmap[group_key][nqn]; + auto gw_state = nqn_gws_states.find(gw_id); + ceph_assert (gw_state != nqn_gws_states.end()); + auto& st = gw_state->second; + dout(4) << "KA beacon from the GW " << gw_id << " in state " << (int)st.availability << dendl; + + if (st.availability == GW_AVAILABILITY_E::GW_CREATED) { + // first time appears - allow IO traffic for this GW + st.availability = GW_AVAILABILITY_E::GW_AVAILABLE; + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) st.sm_state[i] = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE; + if (st.optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { // not a redundand GW + st.sm_state[st.optimized_ana_group_id] = GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE; } - - else if (gw_state->availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) { - gw_state->availability = GW_AVAILABILITY_E::GW_AVAILABLE; - if (gw_state->optimized_ana_group_id == REDUNDANT_GW_ANA_GROUP_ID) { - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) gw_state->sm_state[i] = GW_STANDBY_STATE; - propose_pending = true; //TODO try to find the 1st GW overloaded by ANA groups and start failback for ANA group that it is not an owner of - } - else { - //========= prepare to Failback to this GW ========= - // find the GW that took over on the group gw_state->optimized_ana_group_id - bool some_found = false; - propose_pending = true; - find_failback_gw(gw_name, nqn, gw_state, some_found); - if (!some_found ) { // There is start of single GW so immediately turn its group to GW_ACTIVE_STATE - dout(4) << "Warning - not found the GW responsible for" << gw_state->optimized_ana_group_id << " that took over the GW " << gw_name << "when it was fallen" << dendl; - gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_ACTIVE_STATE; - } + propose_pending = true; + } + else if (st.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE) { + st.availability = GW_AVAILABILITY_E::GW_AVAILABLE; + if (st.optimized_ana_group_id == REDUNDANT_GW_ANA_GROUP_ID) { + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) st.sm_state[i] = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE; + propose_pending = true; //TODO try to find the 1st GW overloaded by ANA groups and start failback for ANA group that it is not an owner of + } + else { + //========= prepare to Failback to this GW ========= + // find the GW that took over on the group st.optimized_ana_group_id + bool some_found = false; + propose_pending = true; + find_failback_gw(gw_id, group_key, nqn, some_found); + if (!some_found ) { // There is start of single GW so immediately turn its group to GW_ACTIVE_STATE + dout(4) << "Warning - not found the GW responsible for" << st.optimized_ana_group_id << " that took over the GW " << gw_id << "when it was fallen" << dendl; + st.sm_state[st.optimized_ana_group_id] = GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE; } } - // if GW remains AVAILABLE need to handle failback Timers , this is handled separately - } - else{ - dout(4) << __func__ << "ERROR GW-id was not found in the map " << gw_name << dendl; - rc = 1; - ceph_assert(false); } - return rc; } -int NVMeofGwMap::handle_abandoned_ana_groups(bool & propose) +void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) { propose = false; - for (auto& nqn_itr : Gmap) { - dout(4) << "NQN " << nqn_itr.first << dendl; - - for (auto& ptr : nqn_itr.second) { // loop for GWs inside nqn group - auto gw_id = ptr.first; - GW_STATE_T* state = &ptr.second; - - //1. Failover missed : is there is a GW in unavailable state? if yes, is its ANA group handled by some other GW? - if (state->availability == GW_AVAILABILITY_E::GW_UNAVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { - auto found_gw_for_ana_group = false; - for (auto& ptr2 : nqn_itr.second) { - if (ptr2.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE && ptr2.second.sm_state[state->optimized_ana_group_id] == GW_ACTIVE_STATE) { - found_gw_for_ana_group = true; // dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; - break; + for (auto& group_state: Gmap) { + auto& group_key = group_state.first; + auto& nqn_gws_states = group_state.second; + + for (auto& nqn_gws_state: nqn_gws_states) { + auto& nqn = nqn_gws_state.first; + auto& gws_states = nqn_gws_state.second; + dout(4) << "NQN " << nqn << dendl; + + for (auto& gw_state : gws_states) { // loop for GWs inside nqn group + auto& gw_id = gw_state.first; + GW_STATE_T& state = gw_state.second; + + //1. Failover missed : is there is a GW in unavailable state? if yes, is its ANA group handled by some other GW? + if (state.availability == GW_AVAILABILITY_E::GW_UNAVAILABLE && state.optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID) { + auto found_gw_for_ana_group = false; + for (auto& gw_state2 : gws_states) { + GW_STATE_T& state2 = gw_state2.second; + if (state2.availability == GW_AVAILABILITY_E::GW_AVAILABLE && state2.sm_state[state.optimized_ana_group_id] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { + found_gw_for_ana_group = true; // dout(4) << "Found GW " << ptr2.first << " that handles ANA grp " << (int)state->optimized_ana_group_id << dendl; + break; + } } - } - if (found_gw_for_ana_group == false) { //choose the GW for handle ana group - dout(4)<< "Was not found the GW " << " that handles ANA grp " << (int)state->optimized_ana_group_id << " find candidate "<< dendl; + if (found_gw_for_ana_group == false) { //choose the GW for handle ana group + dout(4)<< "Was not found the GW " << " that handles ANA grp " << (int)state.optimized_ana_group_id << " find candidate "<< dendl; - GW_STATE_T* gw_state = find_gw_map(gw_id, nqn_itr.first); - for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - find_failover_candidate( gw_id, nqn_itr.first , gw_state, i, propose ); + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + find_failover_candidate( gw_id, group_key, nqn, i, propose ); + } } - } - //2. Failback missed: Check this GW is Available and Standby and no other GW is doing Failback to it - else if (state->availability == GW_AVAILABILITY_E::GW_AVAILABLE && state->optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID && - state->sm_state[state->optimized_ana_group_id] == GW_STANDBY_STATE - ) - { - bool found = false; - for (auto& ptr2 : nqn_itr.second) { - if ( ptr2.second.sm_state[state->optimized_ana_group_id] == GW_WAIT_FAILBACK_PREPARED){ - found = true; - break; - } - } - if(!found){ - dout(4) << __func__ << " GW " <optimized_ana_group_id << dendl; - state->sm_state[state->optimized_ana_group_id] = GW_ACTIVE_STATE; - propose = true; + //2. Failback missed: Check this GW is Available and Standby and no other GW is doing Failback to it + else if (state.availability == GW_AVAILABILITY_E::GW_AVAILABLE + && state.optimized_ana_group_id != REDUNDANT_GW_ANA_GROUP_ID && + state.sm_state[state.optimized_ana_group_id] == GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE) + { + bool found = false; + for (auto& gw_state2 : gws_states) { + auto& state2 = gw_state2.second; + if (state2.sm_state[state.optimized_ana_group_id] == GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED){ + found = true; + break; + } + } + if (!found) { + dout(4) << __func__ << " GW " << gw_id << " turns to be Active for ANA group " << state.optimized_ana_group_id << dendl; + state.sm_state[state.optimized_ana_group_id] = GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE; + propose = true; + } } } } } - return 0; } - -int NVMeofGwMap::handle_removed_subsystems (const std::vector &created_subsystems, bool &propose_pending) +/* + sync our sybsystems from the beacon. systems subsystems not in beacon are removed. +*/ +void NVMeofGwMap::handle_removed_subsystems (const std::vector ¤t_subsystems, const GROUP_KEY& group_key, bool &propose_pending) { - bool found = false;; - for (auto& m_itr : Gmap) { - //if not found in the vector of configured subsystems, need to remove the nqn from the map - found = false; - for(auto v_itr : created_subsystems){ - if (m_itr.first == v_itr){ - found = true; - break; - } - } - if(!found){ - // remove m_itr.first from the map - dout(4) << "seems subsystem nqn was removed - to remove nqn from the map " << m_itr.first <first) == current_subsystems.end()) { + // Erase the susbsystem nqn if the nqn is not in the current subsystems + it = nqn_gws_states.erase(it); + } else { + // Move to the next pair + ++it; } } - return 0; } -int NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &failed_gw_id, const GW_ID_T &gw_name, const std::string& nqn, uint8_t ANA_groupid) +void NVMeofGwMap::set_failover_gw_for_ANA_group(const GW_ID_T &failed_gw_id, const GROUP_KEY& group_key, const GW_ID_T &gw_id, const NQN_ID_T& nqn, ANA_GRP_ID_T ANA_groupid) { - GW_STATE_T* gw_state = find_gw_map(gw_name, nqn); - gw_state->sm_state[ANA_groupid] = GW_ACTIVE_STATE; - gw_state->failover_peer[ANA_groupid] = failed_gw_id; - //publish_map_to_gws(nqn); - dout(4) << "Set failower GW " << gw_name << " for ANA group " << (int)ANA_groupid << dendl; - return 0; + GW_STATE_T& gw_state = Gmap[group_key][nqn][gw_id]; + gw_state.sm_state[ANA_groupid] = GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE; + gw_state.failover_peer[ANA_groupid] = failed_gw_id; + + dout(4) << "Set failower GW " << gw_id << " for ANA group " << (int)ANA_groupid << dendl; } -int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_name, const std::string& nqn, GW_STATE_T* gw_state, bool &some_found) +void NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, bool &some_found) { - auto subsyst_it = find_subsystem_map(nqn); bool found_some_gw = false; bool found_candidate = false; - for (auto& itr : *subsyst_it) { - //cout << "Found GW " << itr.second.gw_id << endl; - if (itr.second.sm_state[gw_state->optimized_ana_group_id] == GW_ACTIVE_STATE) { - ceph_assert(itr.second.failover_peer[gw_state->optimized_ana_group_id] == gw_name); - - dout(4) << "Found GW " << itr.first << ", nqn " << nqn << " that took over the ANAGRP " << (int)gw_state->optimized_ana_group_id << " of the available GW " << gw_name << dendl; - itr.second.sm_state[gw_state->optimized_ana_group_id] = GW_WAIT_FAILBACK_PREPARED; - start_timer(itr.first, nqn, gw_state->optimized_ana_group_id);// Add timestamp of start Failback preparation - gw_state->sm_state[gw_state->optimized_ana_group_id] = GW_BLOCKED_AGROUP_OWNER; + auto& nqn_gws_states = Gmap[group_key][nqn]; + auto& gw_state = Gmap[group_key][nqn][gw_id]; + for (auto& nqn_gw_state: nqn_gws_states) { + auto& found_gw_id = nqn_gw_state.first; + auto& st = nqn_gw_state.second; + if (st.sm_state[gw_state.optimized_ana_group_id] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { + ceph_assert(st.failover_peer[gw_state.optimized_ana_group_id] == gw_id); + + dout(4) << "Found GW " << found_gw_id << ", nqn " << nqn << " that took over the ANAGRP " << gw_state.optimized_ana_group_id << " of the available GW " << gw_id << dendl; + st.sm_state[gw_state.optimized_ana_group_id] = GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED; + start_timer(found_gw_id, group_key, nqn, gw_state.optimized_ana_group_id);// Add timestamp of start Failback preparation + gw_state.sm_state[gw_state.optimized_ana_group_id] = GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER; found_candidate = true; break; @@ -394,98 +281,95 @@ int NVMeofGwMap::find_failback_gw(const GW_ID_T &gw_name, const std::string& nq } some_found = found_candidate |found_some_gw; //TODO cleanup myself (gw_id) from the Block-List - return 0; } // TODO When decision to change ANA state of group is prepared, need to consider that last seen FSM state is "approved" - means it was returned in beacon alone with map version -int NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, int grpid, bool &propose_pending) +void NVMeofGwMap::find_failover_candidate(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, ANA_GRP_ID_T grpid, bool &propose_pending) { - // dout(4) <<__func__<< " process GW down " << gw_id << dendl; -#define ILLEGAL_GW_ID " " -#define MIN_NUM_ANA_GROUPS 0xFFF - int min_num_ana_groups_in_gw = 0; - int current_ana_groups_in_gw = 0; - GW_ID_T min_loaded_gw_id = ILLEGAL_GW_ID; - auto subsyst_it = find_subsystem_map(nqn); - - // this GW may handle several ANA groups and for each of them need to found the candidate GW - if (gw_state->sm_state[grpid] == GW_ACTIVE_STATE || gw_state->optimized_ana_group_id == grpid) { - // Find a GW that takes over the ANA group(s) - min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; - min_loaded_gw_id = ILLEGAL_GW_ID; - for (auto& itr : *subsyst_it) { // for all the gateways of the subsystem - if (itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - - current_ana_groups_in_gw = 0; - for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { - if (itr.second.sm_state[j] == GW_BLOCKED_AGROUP_OWNER || itr.second.sm_state[j] == GW_WAIT_FAILBACK_PREPARED) { - current_ana_groups_in_gw = 0xFFFF; - break; // dont take into account GWs in the transitive state - } - else if (itr.second.sm_state[j] == GW_ACTIVE_STATE) - //dout(4) << " process GW down " << current_ana_groups_in_gw << dendl; - current_ana_groups_in_gw++; // how many ANA groups are handled by this GW + // dout(4) <<__func__<< " process GW down " << gw_id << dendl; + #define ILLEGAL_GW_ID " " + #define MIN_NUM_ANA_GROUPS 0xFFF + int min_num_ana_groups_in_gw = 0; + int current_ana_groups_in_gw = 0; + GW_ID_T min_loaded_gw_id = ILLEGAL_GW_ID; + + auto& nqn_gws_states = Gmap[group_key][nqn]; + + auto gw_state = nqn_gws_states.find(gw_id); + ceph_assert(gw_state != nqn_gws_states.end()); + + // this GW may handle several ANA groups and for each of them need to found the candidate GW + if (gw_state->second.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE || gw_state->second.optimized_ana_group_id == grpid) { + // Find a GW that takes over the ANA group(s) + min_num_ana_groups_in_gw = MIN_NUM_ANA_GROUPS; + min_loaded_gw_id = ILLEGAL_GW_ID; + for (auto& found_gw_state: nqn_gws_states) { // for all the gateways of the subsystem + auto st = found_gw_state.second; + if (st.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + current_ana_groups_in_gw = 0; + for (int j = 0; j < MAX_SUPPORTED_ANA_GROUPS; j++) { + if (st.sm_state[j] == GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER || st.sm_state[j] == GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED) { + current_ana_groups_in_gw = 0xFFFF; + break; // dont take into account GWs in the transitive state + } + else if (st.sm_state[j] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) + //dout(4) << " process GW down " << current_ana_groups_in_gw << dendl; + current_ana_groups_in_gw++; // how many ANA groups are handled by this GW } if (min_num_ana_groups_in_gw > current_ana_groups_in_gw) { min_num_ana_groups_in_gw = current_ana_groups_in_gw; - min_loaded_gw_id = itr.first; - dout(4) << "choose: gw-id min_ana_groups " << itr.first << current_ana_groups_in_gw << " min " << min_num_ana_groups_in_gw << dendl; + min_loaded_gw_id = found_gw_state.first; + dout(4) << "choose: gw-id min_ana_groups " << min_loaded_gw_id << current_ana_groups_in_gw << " min " << min_num_ana_groups_in_gw << dendl; } } } if (min_loaded_gw_id != ILLEGAL_GW_ID) { propose_pending = true; - set_failover_gw_for_ANA_group(gw_id, min_loaded_gw_id, nqn, grpid); + set_failover_gw_for_ANA_group(gw_id, group_key, min_loaded_gw_id, nqn, grpid); } else { - if (gw_state->sm_state[grpid] == GW_ACTIVE_STATE){// not found candidate but map changed. + if (gw_state->second.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE){// not found candidate but map changed. propose_pending = true; dout(4) << "gw down no candidate found " << dendl; - _dump_gwmap(Gmap); } } - gw_state->sm_state[grpid] = GW_STANDBY_STATE; + gw_state->second.sm_state[grpid] = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE; } - return 0; } - int NVMeofGwMap::fsm_handle_gw_down (const GW_ID_T &gw_name, const std::string& nqn, GW_STATES_PER_AGROUP_E state , int grpid, bool &map_modified) + void NVMeofGwMap::fsm_handle_gw_down(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, GW_STATES_PER_AGROUP_E state, ANA_GRP_ID_T grpid, bool &map_modified) { switch (state) { - case GW_STANDBY_STATE: - case GW_IDLE_STATE: - // nothing to do - break; + case GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE: + case GW_STATES_PER_AGROUP_E::GW_IDLE_STATE: + // nothing to do + break; - case GW_WAIT_FAILBACK_PREPARED: - { - cancel_timer(gw_name, nqn, grpid); - auto subsyst_it = find_subsystem_map(nqn); - for (auto& itr : *subsyst_it){ - if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER) // found GW that was intended for Failback for this ana grp - { - dout(4) << "Warning: Outgoing Failback when GW is down back - to rollback it" << nqn <<" GW " <optimized_ana_group_id) {// Try to find GW that temporary owns my group - if found, this GW should pass to standby for this group - auto subsyst_it = find_subsystem_map(nqn); - for (auto& itr : *subsyst_it){ - if (itr.second.sm_state[grpid] == GW_ACTIVE_STATE || itr.second.sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED){ - set_gw_standby_state(&itr.second, grpid); - map_modified = true; - if (itr.second.sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED) - cancel_timer(itr.first, nqn, grpid); - break; - } - } +void NVMeofGwMap::fsm_handle_gw_delete (const GW_ID_T &gw_id, const GROUP_KEY& group_key, + const NQN_ID_T& nqn, GW_STATES_PER_AGROUP_E state , ANA_GRP_ID_T grpid, bool &map_modified) { + switch (state) + { + case GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE: + case GW_STATES_PER_AGROUP_E::GW_IDLE_STATE: + case GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER: + { + GW_STATE_T& gw_state = Gmap[group_key][nqn][gw_id]; + + if (grpid == gw_state.optimized_ana_group_id) {// Try to find GW that temporary owns my group - if found, this GW should pass to standby for this group + auto& gateway_states = Gmap[group_key][nqn]; + for (auto& gs: gateway_states) { + if (gs.second.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE || gs.second.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED){ + gs.second.standby_state(grpid); + if (gs.second.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED) + cancel_timer(gs.first, group_key, nqn, grpid); + break; + } + } + } + } + break; + + case GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED: + { + cancel_timer(gw_id, group_key, nqn, grpid); + for (auto& nqn_gws_state: Gmap[group_key][nqn]) { + auto& st = nqn_gws_state.second; + + if (st.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER) { // found GW that was intended for Failback for this ana grp + dout(4) << "Warning: Outgoing Failback when GW is deleted - to rollback it" << nqn <<" GW " <sm_state[grpid] == GW_WAIT_FAILBACK_PREPARED) { - - dout(4) << "Expired Failback timer from GW " << gw_name << " ANA groupId "<< grpid << dendl; - - cancel_timer(gw_name, nqn, grpid); - for (auto& itr : *subsyst_it) { - if (itr.second.sm_state[grpid] == GW_BLOCKED_AGROUP_OWNER && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - set_gw_standby_state(gw_state, grpid); - itr.second.sm_state[grpid] = GW_ACTIVE_STATE; - dout(4) << "Failback from GW " << gw_name << " to " << itr.first << dendl; + auto& gw_state = Gmap[group_key][nqn][gw_id]; + + if (gw_state.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_WAIT_FAILBACK_PREPARED) { + + dout(4) << "Expired Failback timer from GW " << gw_id << " ANA groupId "<< grpid << dendl; + + cancel_timer(gw_id, group_key, nqn, grpid); + for (auto& gw_state: Gmap[group_key][nqn]) { + auto& st = gw_state.second; + if (st.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_BLOCKED_AGROUP_OWNER && + st.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + st.standby_state(grpid); + st.sm_state[grpid] = GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE; + dout(4) << "Failback from GW " << gw_id << " to " << gw_state.first << dendl; map_modified = true; break; } - else if (itr.second.optimized_ana_group_id == grpid ){ - if(itr.second.sm_state[grpid] == GW_STANDBY_STATE && itr.second.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { - itr.second.sm_state[grpid] = GW_ACTIVE_STATE; // GW failed and started during the persistency interval - dout(4) << "Failback unsuccessfull. GW: " << itr.first << "becomes Active for the ana group " << grpid << dendl; + else if (st.optimized_ana_group_id == grpid ){ + if(st.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE && st.availability == GW_AVAILABILITY_E::GW_AVAILABLE) { + st.sm_state[grpid] = GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE; // GW failed and started during the persistency interval + dout(4) << "Failback unsuccessfull. GW: " << gw_state.first << "becomes Active for the ana group " << grpid << dendl; } - set_gw_standby_state(gw_state, grpid); - dout(4) << "Failback unsuccessfull GW: " << gw_name << "becomes standby for the ana group " << grpid << dendl; + st.standby_state(grpid); + dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << grpid << dendl; map_modified = true; break; } } } - return 0; } +void NVMeofGwMap::start_timer(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, ANA_GRP_ID_T anagrpid) { + Gmetadata[group_key][nqn][gw_id].anagrp_sm_tstamps[anagrpid] = 0; +} + +int NVMeofGwMap::get_timer(const GW_ID_T &gw_id, GROUP_KEY& group_key, const NQN_ID_T& nqn, ANA_GRP_ID_T anagrpid) { + auto timer = Gmetadata[group_key][nqn][gw_id].anagrp_sm_tstamps[anagrpid]; + ceph_assert(timer != INVALID_GW_TIMER); + return timer; +} -int NVMeofGwMap::set_gw_standby_state(GW_STATE_T* gw_state, uint8_t ANA_groupid) -{ - gw_state->sm_state[ANA_groupid] = GW_STANDBY_STATE; - gw_state->failover_peer[ANA_groupid] = "NULL"; - return 0; +void NVMeofGwMap::cancel_timer(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, ANA_GRP_ID_T anagrpid) { + Gmetadata[group_key][nqn][gw_id].anagrp_sm_tstamps[anagrpid] = INVALID_GW_TIMER; } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 348bd3bee237..43aba52cd355 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -7,12 +7,8 @@ #ifndef MON_NVMEOFGWMAP_H_ #define MON_NVMEOFGWMAP_H_ -#include "string" -#include -#include "map" +#include #include -#include -#include #include "include/encoding.h" #include "include/utime.h" #include "common/Formatter.h" @@ -23,455 +19,74 @@ #include "PaxosService.h" #include "msg/Message.h" #include "common/ceph_time.h" -/*#include "NVMeofGwMon.h" - -using std::ostream; - -#define dout_subsys ceph_subsys_mon -#undef dout_prefix -#define dout_prefix _prefix(_dout, mon, this) -using namespace TOPNSPC::common; - -class NVMeofGwMap; - -inline ostream& _prefix(std::ostream *_dout, const Monitor &mon, - const NVMeofGwMap *map) { - return *_dout << "mon." << mon.name << "@" << mon.rank; -} - */ +#include "NVMeofGwTypes.h" using ceph::coarse_mono_clock; - -using GW_ID_T = std::string; -using ANA_GRP_ID_T = uint16_t; -typedef enum { - GW_IDLE_STATE = 0, //invalid state - GW_STANDBY_STATE, - GW_ACTIVE_STATE, - GW_BLOCKED_AGROUP_OWNER, - GW_WAIT_FAILBACK_PREPARED -}GW_STATES_PER_AGROUP_E; - -enum class GW_AVAILABILITY_E { - GW_CREATED = 0, - GW_AVAILABLE, - GW_UNAVAILABLE, - GW_DELETED -}; - -#define MAX_SUPPORTED_ANA_GROUPS 16 -#define INVALID_GW_TIMER 0xffff -#define REDUNDANT_GW_ANA_GROUP_ID 0xFF -typedef struct GW_STATE_T { - GW_STATES_PER_AGROUP_E sm_state [MAX_SUPPORTED_ANA_GROUPS]; // state machine states per ANA group - GW_ID_T failover_peer[MAX_SUPPORTED_ANA_GROUPS]; - ANA_GRP_ID_T optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF - GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable - uint64_t version; // version per all GWs of the same subsystem. subsystem version -}GW_STATE_T; - -typedef struct GW_METADATA_T { - int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state -}GW_METADATA_T; - - -using GWMAP = std::map >; -using GWMETADATA = std::map >; -using SUBSYST_GWMAP = std::map; -using SUBSYST_GWMETA = std::map; - -using NONCE_VECTOR_T = std::vector; -using GW_ANA_NONCE_MAP = std::map ; - - -typedef struct { - ANA_GRP_ID_T ana_grp_id; // ana-group-id allocated for this GW, GW owns this group-id - GW_ANA_NONCE_MAP nonce_map; -} GW_CREATED_T; - -using GW_CREATED_MAP = std::map; - - - -inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { - for(int i = 0; i Gmap; + std::map Created_gws; + std::map Gmetadata; + + int cfg_add_gw (const GW_ID_T &gw_id, const GROUP_KEY& group_key); + int cfg_delete_gw (const GW_ID_T &gw_id, const GROUP_KEY& group_key); + void process_gw_map_ka (const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, bool &propose_pending); + int process_gw_map_gw_down (const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, bool &propose_pending); + void update_active_timers (bool &propose_pending); + void handle_abandoned_ana_groups (bool &propose_pending); + void handle_removed_subsystems (const std::vector ¤t_subsystems, const GROUP_KEY& group_key, bool &propose_pending); +private: + void fsm_handle_gw_down (const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, GW_STATES_PER_AGROUP_E state, ANA_GRP_ID_T grpid, bool &map_modified); + void fsm_handle_gw_delete (const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, GW_STATES_PER_AGROUP_E state, ANA_GRP_ID_T grpid, bool &map_modified); + void fsm_handle_to_expired (const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, ANA_GRP_ID_T grpid, bool &map_modified); - void encode_nonces(const GW_ANA_NONCE_MAP & nonce_map, ceph::buffer::list &bl, bool full_encode ) const - { - ENCODE_START(2, 1, bl); - - encode((int)nonce_map.size(), bl); - for (auto& itr : nonce_map) { - encode((ANA_GRP_ID_T)itr.first, bl); - // now encode the nonces - const NONCE_VECTOR_T &nonce_vector = itr.second; - encode ((int)nonce_vector.size(), bl); // encode the vector size - for(auto &list_it : nonce_vector ){ - encode(list_it, bl); - } - } - ENCODE_FINISH(bl); - } - - void decode_nonces(GW_ANA_NONCE_MAP & nonce_map, ceph::buffer::list::const_iterator &bl, bool full_decode = true) { - DECODE_START(1, bl); - int map_size; - ANA_GRP_ID_T ana_grp_id; - int vector_size; - std::string nonce; - - decode(map_size, bl); - for(int i = 0; iana_grp_id, bl); - encode_nonces(gw_created->nonce_map, bl, full_encode);//TODO "if not full_encode" to prevent sending full nonce map to the clients - } - encode ((int)Gmap.size(),bl); // number nqn - for (auto& itr : Gmap) { - encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : std::map - } + using ceph::encode; + __u8 struct_v = 0; + encode(struct_v, bl); // version + encode(epoch, bl);// global map epoch - if(full_encode) { - // Encode Gmetadata - encode ((int)Gmetadata.size(),bl); - for (auto& itr : Gmetadata) { - encode((const std::string &)itr.first, bl);// nqn - encode( itr.second, bl);// encode the full map of this nqn : - } + encode(Created_gws, bl); //Encode created GWs + encode(Gmap, bl); + if (full_encode) { + encode(Gmetadata, bl); } - ENCODE_FINISH(bl); } void decode(ceph::buffer::list::const_iterator &bl, bool full_decode = true) { - DECODE_START(1, bl); - int num_subsystems; - std::string nqn; + using ceph::decode; + __u8 struct_v; + decode(struct_v, bl); + ceph_assert(struct_v == 0); decode(epoch, bl); - //Decode created GWs - int num_created_gws; - decode(num_created_gws, bl); - Created_gws.clear(); - for(int i = 0; i())); - //decode the map - gw_map.clear(); - decode(gw_map, bl); - //insert the qw_map to Gmap - for(auto &itr: gw_map ){ - Gmap[nqn].insert({itr.first, itr.second}); - } - } - - if(full_decode){ - // decode Gmetadata - decode(num_subsystems, bl); - SUBSYST_GWMETA gw_meta; - Gmetadata.clear(); - //_dump_gwmap(Gmap); - for(int i = 0; i < num_subsystems; i++){ - decode(nqn, bl); - Gmetadata.insert(make_pair(nqn, std::map())); - //decode the map - gw_meta.clear(); - decode(gw_meta, bl); - //insert the gw_meta to Gmap - for(auto &itr: gw_meta ){ - Gmetadata[nqn].insert({itr.first, itr.second}); - } - } - } - DECODE_FINISH(bl); } - - int find_created_gw(const GW_ID_T &gw_id , int &ana_grp_id) const - { - auto it = Created_gws.find(gw_id); - if (it != Created_gws.end()) { - ana_grp_id = it->second.ana_grp_id; - return 0; - } - return -1; - } - - GW_CREATED_T* find_created_gw(const GW_ID_T &gw_id ) - { - auto it = Created_gws.find(gw_id); - if (it != Created_gws.end()) { - return &it->second; - } - return NULL; - } - - int update_gw_nonce(const GW_ID_T &gw_id, ANA_GRP_ID_T &ana_grp_id, NONCE_VECTOR_T &new_nonces) - { - GW_CREATED_T* gw_created = find_created_gw(gw_id); - if(!gw_created) - return 1; - if (new_nonces.size() >0){ - GW_ANA_NONCE_MAP & nonce_map = gw_created->nonce_map; - if(nonce_map[ana_grp_id].size() == 0){ - nonce_map.insert({ana_grp_id, NONCE_VECTOR_T()}) ; - } - nonce_map[ana_grp_id].clear(); - nonce_map[ana_grp_id].reserve(new_nonces.size()); //gw_created->nonces.clear();// gw_created->nonces.reserve(new_nonces.size()); - for( auto &it : new_nonces){ - nonce_map[ana_grp_id].push_back(it); //gw_created->nonces.push_back(it); - } - } - return 0; - } - - int destroy_gw(const GW_ID_T &gw_id) - { - GW_CREATED_T* gw_created = find_created_gw(gw_id); - if( gw_created ) - { - GW_ANA_NONCE_MAP & nonce_map = gw_created->nonce_map; - for(auto &it : nonce_map){ - it.second.clear();// clear the nonce contexts - } - nonce_map.clear(); - return 0; - } - else - return 1; - - } - - - GW_STATE_T * find_gw_map(const GW_ID_T &gw_id, const std::string& nqn ) const - { - auto it = Gmap.find(nqn); - if (it != Gmap.end() /* && it->first == nqn*/) { - auto it2 = it->second.find(gw_id); - if (it2 != it->second.end() /* && it2->first == gw_id*/ ){ // cout << "AAAA " << gw_id << " " << it2->first << endl; - return (GW_STATE_T *) &it2->second; - } - } - return NULL; - } - - int insert_gw_to_map(const GW_ID_T &gw_id, const std::string& nqn, int ana_grp_id ){ - if(Gmap[nqn].size() == 0) - Gmap.insert(make_pair(nqn, SUBSYST_GWMAP())); - - GW_STATE_T state{ {GW_IDLE_STATE,}, {""}, (uint16_t)ana_grp_id, GW_AVAILABILITY_E::GW_CREATED, 0 }; - for(int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) state.failover_peer[i] = "NULL"; - - Gmap[nqn].insert({gw_id, state}); - create_metadata(gw_id, nqn); - return 0; - } - - int update_active_timers( bool &propose_pending); - epoch_t get_epoch() const { return epoch; } - int _dump_gwmap(GWMAP & Gmap)const; - int _dump_gwmap(std::stringstream &ss)const ; - int _dump_created_gws(std::stringstream &ss)const ; - int cfg_add_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group); - int cfg_delete_gw (const GW_ID_T &gw_id, const std::string& pool, const std::string& group, bool &propose_pending); - int process_gw_map_ka (const GW_ID_T &gw_id, const std::string& nqn , bool &propose_pending); - int process_gw_map_gw_down (const GW_ID_T &gw_id, const std::string& nqn, bool &propose_pending); - int handle_abandoned_ana_groups (bool &propose_pending); - int handle_removed_subsystems (const std::vector &created_subsystems, bool &propose_pending); - - //make these functions static - static void gw_name_from_id_pool_group (std::string &gw_name , const std::string &gw_id ,const std::string &gw_pool, const std::string &gw_group ){ - gw_name = gw_id + "." + gw_pool + "." + gw_group; - } - static void gw_preffix_from_id_pool_group (std::string &gw_preffix ,const std::string &gw_pool, const std::string &gw_group ){ - gw_preffix = "." + gw_pool + "." + gw_group; - } - - void debug_encode_decode(){ - ceph::buffer::list bl; - encode(bl); - auto p = bl.cbegin(); - decode(p); - } - -private: - int fsm_handle_gw_down (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); - int fsm_handle_gw_delete (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); - int fsm_handle_gw_up (const GW_ID_T &gw_id, const std::string& nqn, GW_STATES_PER_AGROUP_E state, int grpid, bool &map_modified); - int fsm_handle_to_expired (const GW_ID_T &gw_id, const std::string& nqn, int grpid, bool &map_modified); - - int find_failover_candidate(const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, int grpid, bool &propose_pending); - int find_failback_gw (const GW_ID_T &gw_id, const std::string& nqn, GW_STATE_T* gw_state, bool &found); - int set_failover_gw_for_ANA_group (const GW_ID_T &failed_gw_id, const GW_ID_T &gw_id, const std::string& nqn, uint8_t ANA_groupid); - int set_gw_standby_state(GW_STATE_T* gw_state, uint8_t ANA_groupid); - - SUBSYST_GWMAP * find_subsystem_map(const std::string& nqn) - { - auto it = Gmap.find(nqn); - if (it != Gmap.end() ){ - return &it->second; - } - return NULL; - } - - int create_metadata(const GW_ID_T& gw_id, const std::string & nqn) - { - - if(Gmetadata[nqn].size() == 0) - Gmetadata.insert(make_pair(nqn, std::map())); - //Gmetadata[nqn].insert({ gw_id, new_metadata }); - return 0; - } - - int delete_metadata(const GW_ID_T& gw_id, const std::string & nqn) - { - if(Gmetadata[nqn].size() != 0) - Gmetadata[nqn].erase(gw_id); - return 0; - } - - int start_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) - { - GW_METADATA_T* metadata; - //const auto now = ceph::coarse_mono_clock::now(); - if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { - metadata->anagrp_sm_tstamps[anagrpid] = 0;// set timer - } - else { - GW_METADATA_T new_metadata = {INVALID_GW_TIMER,}; - for (int i=0; ianagrp_sm_tstamps[anagrpid] != INVALID_GW_TIMER); - return metadata->anagrp_sm_tstamps[anagrpid]; - } - else{ - ceph_assert(false); - } - } - - int cancel_timer(const GW_ID_T &gw_id, const std::string& nqn, uint16_t anagrpid) - { - GW_METADATA_T* metadata; - int i; - if ((metadata = find_gw_metadata(gw_id, nqn)) != NULL) { - metadata->anagrp_sm_tstamps[anagrpid] = INVALID_GW_TIMER; - for(i=0; ianagrp_sm_tstamps[i] != INVALID_GW_TIMER) - break; - if(i==MAX_SUPPORTED_ANA_GROUPS){ - Gmetadata[nqn].erase(gw_id); // remove all gw_id timers from the map - } - } - else { - ceph_assert(false); - } - return 0; - } - - GW_METADATA_T* find_gw_metadata(const GW_ID_T &gw_id, const std::string& nqn); }; +#include "NVMeofGwSerialize.h" + #endif /* SRC_MON_NVMEOFGWMAP_H_ */ diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 9acdb1bf3a46..6c5a434d9783 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -12,25 +12,18 @@ #include "messages/MNVMeofGwBeacon.h" #include "messages/MNVMeofGwMap.h" -using std::map; -using std::make_pair; -using std::ostream; -using std::ostringstream; using std::string; -using std::vector; - #define dout_subsys ceph_subsys_mon #undef dout_prefix #define dout_prefix _prefix(_dout, this, this) using namespace TOPNSPC::common; -static ostream& _prefix(std::ostream *_dout, const NVMeofGwMon *h,//const Monitor &mon, +static std::ostream& _prefix(std::ostream *_dout, const NVMeofGwMon *h,//const Monitor &mon, const NVMeofGwMon *hmon) { return *_dout << "gw-mon." << hmon->mon.name << "@" << hmon->mon.rank; } #define MY_MON_PREFFIX " NVMeGW " - void NVMeofGwMon::init(){ dout(4) << MY_MON_PREFFIX << __func__ << "called " << dendl; } @@ -41,92 +34,7 @@ void NVMeofGwMon::on_restart(){ last_tick = ceph::coarse_mono_clock::now(); } - -void NVMeofGwMon::on_shutdown() { - -} - -static int cnt ; -#define start_cnt 6 -void NVMeofGwMon::inject1(){ - //bool propose = false; - if( ++cnt == 4 ){// simulation that new configuration was added - pending_map.cfg_add_gw("GW1" ,"g1","p1"); - pending_map.cfg_add_gw("GW2" ,"g1","p1"); - pending_map.cfg_add_gw("GW3" ,"g1","p1"); - NONCE_VECTOR_T new_nonces = {"abc", "def","hij"}; - ANA_GRP_ID_T grp = 1; - pending_map.update_gw_nonce("GW1.g1.p1", grp, new_nonces); - grp = 2; - pending_map.update_gw_nonce("GW1.g1.p1", grp, new_nonces); - std::stringstream ss; - pending_map._dump_created_gws(ss); - dout(4) << ss.str() << dendl; - - //pending_map._dump_gwmap(pending_map.Gmap); - pending_map.debug_encode_decode(); - dout(4) << "Dump map after decode encode:" <("mon_nvmeofgw_beacon_grace"); dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; @@ -171,18 +79,16 @@ void NVMeofGwMon::tick(){ const auto cutoff = now - nvmegw_beacon_grace; for(auto &itr : last_beacon){// Pass over all the stored beacons + auto& lb = itr.first; auto last_beacon_time = itr.second; - GW_ID_T gw_name; - std::string nqn; if(last_beacon_time < cutoff){ - get_gw_and_nqn_from_key(itr.first, gw_name, nqn); - dout(4) << "beacon timeout for GW " << gw_name << " nqn " << nqn << dendl; - pending_map.process_gw_map_gw_down( gw_name, nqn, propose); + dout(4) << "beacon timeout for GW " << lb.gw_id << " nqn " << lb.nqn << dendl; + pending_map.process_gw_map_gw_down( lb.gw_id, lb.group_key, lb.nqn, propose); _propose_pending |= propose; - last_beacon.erase(itr.first); + last_beacon.erase(lb); } - else{ - dout(4) << "beacon live for GW key: " << itr.first << dendl; + else { + dout(4) << "beacon live for GW key: " << lb.gw_id << " nqn " << lb.nqn << dendl; } } @@ -205,7 +111,6 @@ const char **NVMeofGwMon::get_tracked_conf_keys() const static const char* KEYS[] = { "nvmf_mon_mapdump", "nvmf_mon_log_level", - //"rocksdb_cache_size", NULL }; return KEYS; @@ -217,31 +122,19 @@ void NVMeofGwMon::handle_conf_change(const ConfigProxy& conf, dout(4) << __func__ << " " << changed << dendl; if (changed.count("nvmef_gw_mapdump")) { - //_set_cache_autotuning(); - std::stringstream ss1; - pending_map._dump_gwmap(ss1); - - std::stringstream ss2; - pending_map._dump_created_gws(ss2); - + dout(4) << "pending_map " << pending_map << dendl; } if (changed.count("nvmf_mon_log_level")){ dout(4) << "TODO SET LOG LEVEL >= " << g_conf()->nvmf_mon_log_level << dendl; } } - void NVMeofGwMon::create_pending(){ pending_map = map;// deep copy of the object // TODO since "pending_map" can be reset each time during paxos re-election even in the middle of the changes ... pending_map.epoch++; - //map.epoch ++; - dout(4) << MY_MON_PREFFIX << __func__ << " pending epoch " << pending_map.epoch << dendl; - - dout(5) << MY_MON_PREFFIX << "dump_pending" << dendl; - - pending_map._dump_gwmap(pending_map.Gmap); + dout(4) << MY_MON_PREFFIX << __func__ << " pending " << pending_map << dendl; } void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t){ @@ -268,10 +161,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap){ auto p = bl.cbegin(); map.decode(p); if(!mon.is_leader()) { - std::stringstream ss; - map._dump_created_gws(ss); - dout(4) << ss.str() << dendl; - map._dump_gwmap(map.Gmap); + dout(4) << "leader map: " << map << dendl; } check_subs(true); @@ -282,8 +172,8 @@ void NVMeofGwMon::check_sub(Subscription *sub) { /* MgrMonitor::check_sub*/ //if (sub->type == "NVMeofGw") { - dout(4) << "sub->next , map-epoch " << sub->next << " " << map.get_epoch() << dendl; - if (sub->next <= map.get_epoch()) + dout(4) << "sub->next , map-epoch " << sub->next << " " << map.epoch << dendl; + if (sub->next <= map.epoch) { dout(4) << "Sending map to subscriber " << sub->session->con << " " << sub->session->con->get_peer_addr() << dendl; sub->session->con->send_message2(make_message(map)); @@ -291,7 +181,7 @@ void NVMeofGwMon::check_sub(Subscription *sub) if (sub->onetime) { mon.session_map.remove_sub(sub); } else { - sub->next = map.get_epoch() + 1; + sub->next = map.epoch + 1; } } } @@ -423,24 +313,23 @@ bool NVMeofGwMon::prepare_command(MonOpRequestRef op) const auto prefix = cmd_getval_or(cmdmap, "prefix", string{}); dout(4) << "MonCommand : "<< prefix << dendl; - bool map_modified = false; if( prefix == "nvme-gw create" || prefix == "nvme-gw delete" ) { std::string id, pool, group; cmd_getval(cmdmap, "id", id); cmd_getval(cmdmap, "pool", pool); cmd_getval(cmdmap, "group", group); + auto group_key = std::make_pair(pool, group); if(prefix == "nvme-gw create"){ - rc = pending_map.cfg_add_gw(id ,pool , group); + rc = pending_map.cfg_add_gw(id, group_key); ceph_assert(rc!= -EINVAL); - map_modified = true; } else{ - rc = pending_map.cfg_delete_gw(id, pool, group, map_modified);// TODO add params + rc = pending_map.cfg_delete_gw(id, group_key); ceph_assert(rc!= -EINVAL); } - if(map_modified){ + if(rc != -EEXIST){ propose_pending(); goto update; } @@ -482,51 +371,34 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ } -#define GW_DELIM ',' - -void NVMeofGwMon::get_gw_and_nqn_from_key(std::string key, GW_ID_T &gw_name , std::string& nqn) -{ - std::stringstream s1(key); - - std::getline(s1, gw_name, GW_DELIM); - std::getline(s1, nqn, GW_DELIM); -} - //#define BYPASS_GW_CREATE_CLI bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; - GW_STATE_T* gw_state = NULL; auto m = op->get_req(); - // dout(4) << "availability " << m->get_availability() << " GW : " <get_gw_id() << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; - std::stringstream out; - m->print(out); - dout(4) << out.str() <get_availability() << " GW : " <get_gw_id() << " subsystems " << m->get_subsystems() << " epoch " << m->get_version() << dendl; GW_ID_T gw_id = m->get_gw_id(); - GW_ID_T pool = m->get_gw_pool(); - GW_ID_T group = m->get_gw_group(); + GROUP_KEY group_key = std::make_pair(m->get_gw_pool(), m->get_gw_group()); GW_AVAILABILITY_E avail = m->get_availability(); const GwSubsystems& subsystems = m->get_subsystems(); bool propose = false; - int ana_grp_id = 0; - std::vector configured_subsystems; - - std::string gw_name; - pending_map.gw_name_from_id_pool_group(gw_name , gw_id , pool, group ); + ANA_GRP_ID_T ana_grp_id = 0; + std::vector configured_subsystems; if (avail == GW_AVAILABILITY_E::GW_CREATED){ // in this special state GWs receive map with just "created_gws" vector - if(pending_map.find_created_gw(gw_name, ana_grp_id) == 0) {// GW is created administratively - dout(4) << "GW " << gw_name << " sent beacon being in state GW_WAIT_INITIAL_MAP" << dendl; + auto& created_gw = pending_map.Created_gws[group_key][gw_id]; + if(created_gw.ana_grp_id == ana_grp_id) {// GW is created administratively + dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP" << dendl; propose = true; } else{ - dout(4) << "GW " << gw_name << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; + dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; #ifdef BYPASS_GW_CREATE_CLI - pending_map.cfg_add_gw(gw_name); - dout(4) << "GW " << gw_name << " created since mode is bypass-create-cli "<< dendl; + pending_map.cfg_add_gw(gw_id); + dout(4) << "GW " << gw_id << " created since mode is bypass-create-cli "<< dendl; propose= true; #endif } @@ -536,14 +408,19 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ // Validation gw is in the database for (const NqnState &st : subsystems) { - gw_state = pending_map.find_gw_map( gw_name, st.nqn ); - if (gw_state == NULL) + auto& nqn_gws_states = pending_map.Gmap[group_key][st.nqn]; + auto gw_state = nqn_gws_states.find(gw_id); + if (gw_state == nqn_gws_states.end()) { - dout(4) << "GW + NQN pair is not in the database: " << gw_name << " " << st.nqn << dendl; - // if GW is created - if(pending_map.find_created_gw(gw_name, ana_grp_id) == 0) {// GW is created administratively - pending_map.insert_gw_to_map(gw_name, st.nqn, ana_grp_id); - dout(4) << "GW + NQN pair " << gw_name << " " << st.nqn << " inserted to map, ANA grp-id " << ana_grp_id << dendl; + dout(4) << "GW + NQN pair is not in the database: " << gw_id << " " << st.nqn << dendl; + // if GW is created + auto& group_gws = pending_map.Created_gws[group_key]; + auto gw_state = group_gws.find(gw_id); + if (gw_state != group_gws.end()) { + GW_STATE_T gst(ana_grp_id); + pending_map.Gmap[group_key][st.nqn][gw_id] = gst; + GW_METADATA_T md; + pending_map.Gmetadata[group_key][st.nqn][gw_id] = md; } else { //drop beacon on the floor silently discard @@ -552,7 +429,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ } configured_subsystems.push_back(st.nqn); } - pending_map.handle_removed_subsystems( configured_subsystems, propose ); + pending_map.handle_removed_subsystems( configured_subsystems, group_key, propose ); if(avail == GW_AVAILABILITY_E::GW_AVAILABLE) { @@ -560,19 +437,21 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ // check pending_map.epoch vs m->get_version() - if different - drop the beacon for (const NqnState& st: subsystems) { - last_beacon[(gw_name + GW_DELIM + st.nqn)] = now; - pending_map.process_gw_map_ka( gw_name, st.nqn, propose ); + LastBeacon lb = { gw_id, group_key, st.nqn }; + last_beacon[lb] = now; + pending_map.process_gw_map_ka( gw_id, group_key, st.nqn, propose ); } } else if(avail == GW_AVAILABILITY_E::GW_UNAVAILABLE){ // state set by GW client application // TODO: remove from last_beacon if found . if gw was found in last_beacon call process_gw_map_gw_down for (const NqnState& st: subsystems) { + LastBeacon lb = { gw_id, group_key, st.nqn }; - auto it = last_beacon.find(gw_name + GW_DELIM + st.nqn); + auto it = last_beacon.find(lb); if (it != last_beacon.end()){ - last_beacon.erase(gw_name + GW_DELIM + st.nqn); - pending_map.process_gw_map_gw_down( gw_name, st.nqn, propose ); + last_beacon.erase(lb); + pending_map.process_gw_map_gw_down( gw_id, group_key, st.nqn, propose ); } } } diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 97aad7e03433..a5d83e570ff4 100755 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -4,27 +4,38 @@ * Created on: Oct 17, 2023 * Author: 227870756 */ - #ifndef MON_NVMEGWMONITOR_H_ #define MON_NVMEGWMONITOR_H_ -#include -#include -#include "include/Context.h" -//#include "MgrMap.h" -#include "PaxosService.h" -#include "MonCommand.h" #include "NVMeofGwMap.h" +struct LastBeacon { + GW_ID_T gw_id; + GROUP_KEY group_key; + NQN_ID_T nqn; + + // Comparison operators to allow usage as a map key + bool operator<(const LastBeacon& other) const { + if (gw_id != other.gw_id) return gw_id < other.gw_id; + if (group_key != other.group_key) return group_key < other.group_key; + return nqn < other.nqn; + } + + bool operator==(const LastBeacon& other) const { + return gw_id == other.gw_id && + group_key == other.group_key && + nqn == other.nqn; + } +}; + class NVMeofGwMon: public PaxosService, public md_config_obs_t { - NVMeofGwMap map; //NVMeGWMap + NVMeofGwMap map; //NVMeGWMap NVMeofGwMap pending_map; - //utime_t first_seen_inactive; //TODO the key of the beacon is a unique gw-id; for example string consisting from gw_num + subsystem_nqn - std::map< std::string, ceph::coarse_mono_clock::time_point> last_beacon; + std::map last_beacon; // when the mon was not updating us for some period (e.g. during slow @@ -44,21 +55,11 @@ class NVMeofGwMon: public PaxosService, const char** get_tracked_conf_keys() const override; void handle_conf_change(const ConfigProxy& conf, const std::set &changed) override; - //const MgrMap &get_map() const { return map; } - - // bool in_use() const { return map.epoch > 0; } - - //void prime_mgr_client(); - - - // void get_store_prefixes(std::set& s) const override; - // 3 pure virtual methods of the paxosService void create_initial()override{}; void create_pending()override ; void encode_pending(MonitorDBStore::TransactionRef t)override ; - void init() override; void on_shutdown() override; void on_restart() override; @@ -76,17 +77,10 @@ class NVMeofGwMon: public PaxosService, bool preprocess_beacon(MonOpRequestRef op); bool prepare_beacon(MonOpRequestRef op); - //void check_sub(Subscription *sub); - //void check_subs() - void tick() override; void print_summary(ceph::Formatter *f, std::ostream *ss) const; - //const std::vector &get_command_descs() const; - - - //void get_versions(std::map> &versions); void check_subs(bool type); void check_sub(Subscription *sub); private: @@ -95,6 +89,4 @@ class NVMeofGwMon: public PaxosService, }; - - #endif /* SRC_MON_NVMEGWMONITOR_H_ */ diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h new file mode 100755 index 000000000000..e99cb4c9b311 --- /dev/null +++ b/src/mon/NVMeofGwSerialize.h @@ -0,0 +1,311 @@ +/* + * NVMeofGwSerialize.h + * + * Created on: Dec 29, 2023 + */ + +#ifndef MON_NVMEOFGWSERIALIZE_H_ +#define MON_NVMEOFGWSERIALIZE_H_ + +inline void encode(const GW_STATE_T& state, ceph::bufferlist &bl) { + for(int i = 0; i & created_gws, ceph::bufferlist &bl) { + encode (created_gws.size(), bl); // number of groups + for (auto& group_gws: created_gws) { + auto& group_key = group_gws.first; + encode(group_key.first, bl); // pool + encode(group_key.second, bl); // group + + auto& gws = group_gws.second; + encode (gws, bl); // encode group gws + } +} + +inline void decode(std::map& created_gws, ceph::buffer::list::const_iterator &bl) { + created_gws.clear(); + size_t ngroups; + decode(ngroups, bl); + for(size_t i = 0; i + } +} + +inline void decode(GWMAP& nqn_gws_states, ceph::buffer::list::const_iterator &bl) { + size_t num_subsystems; + + decode(num_subsystems, bl); + SUBSYST_GWMAP gw_map; + nqn_gws_states.clear(); + + for (size_t i = 0; i < num_subsystems; i++) { + std::string nqn; + decode(nqn, bl); + SUBSYST_GWMAP gw_map; + decode(gw_map, bl); + nqn_gws_states[nqn] = gw_map; + } +} + + +inline void encode(const std::map& gmap, ceph::bufferlist &bl) { + encode (gmap.size(), bl); // number of groups + for (auto& group_state: gmap) { + auto& group_key = group_state.first; + encode(group_key.first, bl); // pool + encode(group_key.second, bl); // group + + encode(group_state.second, bl); + } +} + +inline void decode(std::map& gmap, ceph::buffer::list::const_iterator &bl) { + gmap.clear(); + size_t ngroups; + decode(ngroups, bl); + for(size_t i = 0; i& gmetadata, ceph::bufferlist &bl) { + encode (gmetadata.size(), bl); // number of groups + for (auto& group_md: gmetadata) { + auto& group_key = group_md.first; + encode(group_key.first, bl); // pool + encode(group_key.second, bl); // group + + encode(group_md.second, bl); + } +} + +inline void decode(std::map& gmetadata, ceph::buffer::list::const_iterator &bl) { + gmetadata.clear(); + size_t ngroups; + decode(ngroups, bl); + for(size_t i = 0; i "; + for (auto& gw_state: nqn_state.second) { + os << " { gw_id: " << gw_state.first << " -> " << gw_state.second << "}"; + } + os << "}"; + } + return os; +}; + +inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { + os << "NVMeofGwMap [ Gmap: "; + for (auto& group_state: value.Gmap) { + os << " { " << group_state.first << " } -> { " << group_state.second << " }"; + } + os << " ] [ Created_gws: "; + for (auto& group_gws: value.Created_gws) { + os << " { " << group_gws.first << " } -> { "; + for (auto& gw: group_gws.second) { + os << " { gw_id " << gw.first << " } -> { " << gw.second.ana_grp_id << " }"; + } + os << " }"; + } + os << "]"; + return os; +} +#endif /* SRC_MON_NVMEOFGWSERIALIZEP_H_ */ diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h new file mode 100755 index 000000000000..4a759eeacb77 --- /dev/null +++ b/src/mon/NVMeofGwTypes.h @@ -0,0 +1,108 @@ +/* + * NVMeofGwTypes.h + * + * Created on: Dec 29, 2023 + */ + +#ifndef MON_NVMEOFGWTYPES_H_ +#define MON_NVMEOFGWTYPES_H_ +#include +#include +#include +#include + +using GW_ID_T = std::string; +using GROUP_KEY = std::pair; +using NQN_ID_T = std::string; +using ANA_GRP_ID_T = uint32_t; + + +enum class GW_STATES_PER_AGROUP_E { + GW_IDLE_STATE = 0, //invalid state + GW_STANDBY_STATE, + GW_ACTIVE_STATE, + GW_BLOCKED_AGROUP_OWNER, + GW_WAIT_FAILBACK_PREPARED +}; + +enum class GW_AVAILABILITY_E { + GW_CREATED = 0, + GW_AVAILABLE, + GW_UNAVAILABLE, + GW_DELETED +}; + +#define MAX_SUPPORTED_ANA_GROUPS 16 +#define INVALID_GW_TIMER 0xffff +#define REDUNDANT_GW_ANA_GROUP_ID 0xFF + +typedef GW_STATES_PER_AGROUP_E SM_STATE[MAX_SUPPORTED_ANA_GROUPS]; + +struct NqnState { + std::string nqn; // subsystem NQN + SM_STATE sm_state; // susbsystem's state machine state + uint16_t opt_ana_gid; // optimized ANA group index + + // Default constructor + NqnState(const std::string& _nqn) : nqn(_nqn), opt_ana_gid(0) { + for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + sm_state[i] = GW_STATES_PER_AGROUP_E::GW_IDLE_STATE; + } +}; + +typedef std::vector GwSubsystems; + +struct GW_STATE_T { + SM_STATE sm_state; // state machine states per ANA group + GW_ID_T failover_peer[MAX_SUPPORTED_ANA_GROUPS]; + ANA_GRP_ID_T optimized_ana_group_id; // optimized ANA group index as configured by Conf upon network entry, note for redundant GW it is FF + GW_AVAILABILITY_E availability; // in absence of beacon heartbeat messages it becomes inavailable + uint64_t version; // version per all GWs of the same subsystem. subsystem version + + GW_STATE_T(ANA_GRP_ID_T id): + optimized_ana_group_id(id), + availability(GW_AVAILABILITY_E::GW_CREATED), + version(0) + { + for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + sm_state[i] = GW_STATES_PER_AGROUP_E::GW_IDLE_STATE; + }; + + GW_STATE_T() : GW_STATE_T(REDUNDANT_GW_ANA_GROUP_ID) {}; + + void standby_state(ANA_GRP_ID_T grpid) { + sm_state[grpid] = GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE; + failover_peer[grpid] = ""; + }; +}; + +struct GW_METADATA_T { + int anagrp_sm_tstamps[MAX_SUPPORTED_ANA_GROUPS]; // statemachine timer(timestamp) set in some state + + GW_METADATA_T() { + for (int i=0; i >; +using GWMETADATA = std::map >; +using SUBSYST_GWMAP = std::map; +using SUBSYST_GWMETA = std::map; + +using NONCE_VECTOR_T = std::vector; +using GW_ANA_NONCE_MAP = std::map ; + + +struct GW_CREATED_T { + ANA_GRP_ID_T ana_grp_id; // ana-group-id allocated for this GW, GW owns this group-id + GW_ANA_NONCE_MAP nonce_map; + + GW_CREATED_T(): ana_grp_id(REDUNDANT_GW_ANA_GROUP_ID) {}; + GW_CREATED_T(ANA_GRP_ID_T id): ana_grp_id(id) {}; +}; + +using GW_CREATED_MAP = std::map; + +#endif /* SRC_MON_NVMEOFGWTYPES_H_ */ diff --git a/src/nvmeof/NVMeofGw.cc b/src/nvmeof/NVMeofGw.cc index 3f2ac6385d6a..0b97a45e92a5 100644 --- a/src/nvmeof/NVMeofGw.cc +++ b/src/nvmeof/NVMeofGw.cc @@ -197,12 +197,11 @@ void NVMeofGw::send_beacon() for (int i = 0; i < gw_subsystems.subsystems_size(); i++) { const subsystem& sub = gw_subsystems.subsystems(i); struct NqnState nqn_state(sub.nqn()); - GW_STATE_T* gw_state = map.find_gw_map(name, nqn_state.nqn); - if (gw_state) { - nqn_state.opt_ana_gid = gw_state->optimized_ana_group_id; - for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) - nqn_state.sm_state[i] = gw_state->sm_state[i]; - } + auto group_key = std::make_pair(pool, group); + GW_STATE_T& gw_state = map.Gmap[group_key][nqn_state.nqn][name]; + nqn_state.opt_ana_gid = gw_state.optimized_ana_group_id; + for (int i=0; i < MAX_SUPPORTED_ANA_GROUPS; i++) + nqn_state.sm_state[i] = gw_state.sm_state[i]; subs.push_back(nqn_state); } } @@ -267,30 +266,28 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) { dout(0) << "handle nvmeof gw map" << dendl; auto &mp = mmap->get_map(); - dout(0) << "received map epoch " << mp.get_epoch() << dendl; - std::stringstream ss; - mp._dump_gwmap(ss); - dout(0) << ss.str() << dendl; + dout(0) << "received map epoch " << mp.epoch << dendl; + dout(0) << "mp " << mp << dendl; ana_info ai; - std::string gw_name; - NVMeofGwMap::gw_name_from_id_pool_group(gw_name , name , pool, group); + auto group_key = std::make_pair(pool, group); if (map.epoch == 0){ // initial map - int ana_grp_id = -1; - - if(mp.find_created_gw(gw_name ,ana_grp_id) !=0) + auto group_gws = mp.Created_gws.find(group_key); + if (group_gws == mp.Created_gws.end()) { + dout(0) << "Failed to find group key " << group_key << "created gw for " << name << dendl; + return; + } + auto gw = group_gws->second.find(name); + if(gw == group_gws->second.end()) { - dout(0) << "Failed to find created gw for " << gw_name << dendl; + dout(0) << "Failed to find created gw for " << name << dendl; return; } - std::stringstream ss1; - mp._dump_created_gws(ss1); - dout(0) << ss1.str() << dendl; bool set_group_id = false; while (!set_group_id) { NVMeofGwMonitorGroupClient monitor_group_client( grpc::CreateChannel(monitor_address, grpc::InsecureChannelCredentials())); - dout(0) << "GRPC set_group_id: " << ana_grp_id << dendl; - set_group_id = monitor_group_client.set_group_id( ana_grp_id); + dout(0) << "GRPC set_group_id: " << gw->second.ana_grp_id << dendl; + set_group_id = monitor_group_client.set_group_id( gw->second.ana_grp_id); if (!set_group_id) { dout(0) << "GRPC set_group_id failed" << dendl; usleep(1000); // TODO: conf options @@ -299,45 +296,50 @@ void NVMeofGw::handle_nvmeof_gw_map(ceph::ref_t mmap) } // Interate over NQNs - for (const auto& subsystemPair : mp.Gmap) { - const std::string& nqn = subsystemPair.first; - const auto& idStateMap = subsystemPair.second; - nqn_ana_states nas; - nas.set_nqn(nqn); - - // This gateway state for the current subsystem / nqn - const auto& new_gateway_state = idStateMap.find(gw_name); - - // There is no subsystem update for this gateway - if (new_gateway_state == idStateMap.end()) continue; - - // Previously monitor distributed state - GW_STATE_T* old_gw_state = map.find_gw_map(gw_name, nqn); - - // Iterate over possible ANA Groups - for (uint32_t ana_grp_index = 0; ana_grp_index < MAX_SUPPORTED_ANA_GROUPS; ana_grp_index++) { - ana_group_state gs; - gs.set_grp_id(ana_grp_index + 1); // offset by 1, index 0 is ANAGRP1 - - // There is no state change for this ANA Group - auto old_state = old_gw_state ? old_gw_state->sm_state[ana_grp_index] : GW_STATES_PER_AGROUP_E::GW_IDLE_STATE; - if (old_state == new_gateway_state->second.sm_state[ana_grp_index]) continue; - - // detect was active, but not any more transition - if ((old_state == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE || old_state == GW_STATES_PER_AGROUP_E::GW_IDLE_STATE ) && - new_gateway_state->second.sm_state[ana_grp_index] != GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { - gs.set_state(INACCESSIBLE); // Set the ANA state - nas.mutable_states()->Add(std::move(gs)); - dout(0) << "nqn: " << nqn << " grpid " << (ana_grp_index + 1) << " INACCESSIBLE" <second.sm_state[ana_grp_index] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { - gs.set_state(OPTIMIZED); // Set the ANA state - nas.mutable_states()->Add(std::move(gs)); - dout(0) << "nqn: " << nqn << " grpid " << (ana_grp_index + 1) << " OPTIMIZED" <second) { + const auto& nqn = subsystemPair.first; + const auto& idStateMap = subsystemPair.second; + nqn_ana_states nas; + nas.set_nqn(nqn); + + // This gateway state for the current subsystem / nqn + const auto& new_gateway_state = idStateMap.find(name); + + // There is no subsystem update for this gateway + if (new_gateway_state == idStateMap.end()) continue; + + // Previously monitor distributed state + GW_STATE_T& old_gw_state = map.Gmap[group_key][nqn][name]; + + // Iterate over possible ANA Groups + for (ANA_GRP_ID_T ana_grp_index = 0; ana_grp_index < MAX_SUPPORTED_ANA_GROUPS; ana_grp_index++) { + ana_group_state gs; + gs.set_grp_id(ana_grp_index + 1); // offset by 1, index 0 is ANAGRP1 + + // There is no state change for this ANA Group + auto old_state = old_gw_state.sm_state[ana_grp_index]; + if (old_state == new_gateway_state->second.sm_state[ana_grp_index]) continue; + + // detect was active, but not any more transition + if ((old_state == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE || old_state == GW_STATES_PER_AGROUP_E::GW_IDLE_STATE ) && + new_gateway_state->second.sm_state[ana_grp_index] != GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { + gs.set_state(INACCESSIBLE); // Set the ANA state + nas.mutable_states()->Add(std::move(gs)); + dout(0) << "nqn: " << nqn << " grpid " << (ana_grp_index + 1) << " INACCESSIBLE" <second.sm_state[ana_grp_index] == GW_STATES_PER_AGROUP_E::GW_ACTIVE_STATE) { + gs.set_state(OPTIMIZED); // Set the ANA state + nas.mutable_states()->Add(std::move(gs)); + dout(0) << "nqn: " << nqn << " grpid " << (ana_grp_index + 1) << " OPTIMIZED" <Add(std::move(nas)); } - if (nas.states_size()) ai.mutable_states()->Add(std::move(nas)); } if (ai.states_size()) { bool set_ana_state = false; From b2e2dd0c375de2cf3cc5922d67a7dc4f09b79a37 Mon Sep 17 00:00:00 2001 From: Leonid Chernin Date: Sun, 31 Dec 2023 06:44:19 +0000 Subject: [PATCH 64/65] my additions --- src/mon/NVMeofGwMap.cc | 2 +- src/mon/NVMeofGwMap.h | 7 +++ src/mon/NVMeofGwMon.cc | 108 +++++++++++++++++++++++++++++++++++- src/mon/NVMeofGwSerialize.h | 20 +++++-- 4 files changed, 128 insertions(+), 9 deletions(-) diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index fe2c3dcca36d..034c1cc835da 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -49,7 +49,7 @@ int NVMeofGwMap::cfg_add_gw(const GW_ID_T &gw_id, const GROUP_KEY& group_key) { GW_CREATED_T gw_created(i); Created_gws[group_key][gw_id] = gw_created; - dout(4) << __func__ << "Created GW: " << gw_id << " pool " << group_key.first << "group" << group_key.second + dout(4) << __func__ << "Created GW: " << gw_id << " pool " << group_key.first << " group " << group_key.second << " grpid " << gw_created.ana_grp_id << dendl; return 0; } diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 43aba52cd355..1391e2b9bb88 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -59,6 +59,13 @@ class NVMeofGwMap void cancel_timer(const GW_ID_T &gw_id, const GROUP_KEY& group_key, const NQN_ID_T& nqn, ANA_GRP_ID_T anagrpid); public: + void debug_encode_decode(){ + ceph::buffer::list bl; + encode(bl); + auto p = bl.cbegin(); + decode(p); + } + void encode(ceph::buffer::list &bl, bool full_encode = true) const { using ceph::encode; __u8 struct_v = 0; diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 6c5a434d9783..fb28844c4e11 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -36,6 +36,108 @@ void NVMeofGwMon::on_restart(){ void NVMeofGwMon::on_shutdown() {} +static int cnt ; +#define start_cnt 6 +void NVMeofGwMon::inject1(){ + //bool propose = false; + if( ++cnt == 4 ){// simulation that new configuration was added + std::string pool = "pool1"; + std::string group = "grp1"; + auto group_key = std::make_pair(pool, group); + pending_map.cfg_add_gw("GW1" ,group_key); + pending_map.cfg_add_gw("GW2" ,group_key); + pending_map.cfg_add_gw("GW3" ,group_key); + NONCE_VECTOR_T new_nonces = {"abc", "def","hij"}; + //ANA_GRP_ID_T grp = 1; + //pending_map.update_gw_nonce("GW1.g1.p1", grp, new_nonces); + pending_map.Created_gws[group_key]["GW1"].nonce_map[1] = new_nonces; + + // pending_map.update_gw_nonce("GW1.g1.p1", grp, new_nonces); + pending_map.Created_gws[group_key]["GW2"].nonce_map[2] = new_nonces; + GW_STATE_T gst1(1); + std::string nqn1 = "nqn2008.node1"; + pending_map.Gmap[group_key][nqn1]["GW2"] = gst1; + + GW_STATE_T gst2(2); + pending_map.Gmap[group_key][nqn1]["GW3"] = gst2; + dout(4) << pending_map << dendl; + + + pending_map.debug_encode_decode(); + dout(4) << "Dump map after decode encode:" <("mon_nvmeofgw_beacon_grace"); dout(4) << MY_MON_PREFFIX << __func__ << "NVMeofGwMon leader got a real tick, pending epoch "<< pending_map.epoch << dendl; @@ -371,7 +473,7 @@ bool NVMeofGwMon::preprocess_beacon(MonOpRequestRef op){ } -//#define BYPASS_GW_CREATE_CLI +#define BYPASS_GW_CREATE_CLI bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ dout(4) << MY_MON_PREFFIX <<__func__ << dendl; @@ -397,7 +499,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){ else{ dout(4) << "GW " << gw_id << " sent beacon being in state GW_WAIT_INITIAL_MAP but it is not created yet!!! "<< dendl; #ifdef BYPASS_GW_CREATE_CLI - pending_map.cfg_add_gw(gw_id); + pending_map.cfg_add_gw(gw_id ,group_key); dout(4) << "GW " << gw_id << " created since mode is bypass-create-cli "<< dendl; propose= true; #endif diff --git a/src/mon/NVMeofGwSerialize.h b/src/mon/NVMeofGwSerialize.h index e99cb4c9b311..bb90b733dfe0 100755 --- a/src/mon/NVMeofGwSerialize.h +++ b/src/mon/NVMeofGwSerialize.h @@ -268,11 +268,11 @@ inline std::ostream& operator<<(std::ostream& os, const GROUP_KEY value) { }; inline std::ostream& operator<<(std::ostream& os, const GW_STATE_T value) { - os << "GW_STATE_T [ sm_state "; + os << "GW_STATE_T [ \n (gw-mon) sm_state "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { os << value.sm_state[i] << ","; } - os << "] failover peers "; + os << "] \n (gw-mon) failover peers "; for (int i = 0; i < MAX_SUPPORTED_ANA_GROUPS; i++) { os << value.failover_peer[i] << ","; } @@ -285,7 +285,7 @@ inline std::ostream& operator<<(std::ostream& os, const GWMAP value) { for (auto& nqn_state: value) { os << " { nqn: " << nqn_state.first << " -> "; for (auto& gw_state: nqn_state.second) { - os << " { gw_id: " << gw_state.first << " -> " << gw_state.second << "}"; + os << "\n (gw-mon) { gw_id: " << gw_state.first << " -> " << gw_state.second << "}"; } os << "}"; } @@ -297,11 +297,21 @@ inline std::ostream& operator<<(std::ostream& os, const NVMeofGwMap value) { for (auto& group_state: value.Gmap) { os << " { " << group_state.first << " } -> { " << group_state.second << " }"; } - os << " ] [ Created_gws: "; + os << " ] \n (gw-mon)[ Created_gws: "; for (auto& group_gws: value.Created_gws) { os << " { " << group_gws.first << " } -> { "; for (auto& gw: group_gws.second) { - os << " { gw_id " << gw.first << " } -> { " << gw.second.ana_grp_id << " }"; + os << " \n (gw-mon) { gw_id " << gw.first << " } -> { ana-grp-id:" << gw.second.ana_grp_id << " nonces : " ; // << " }"; + // dump nonces map + if(gw.second.nonce_map.size()) + for( auto &nonce_map : gw.second.nonce_map){ + os << "ana_grp: " << nonce_map.first << " [ " ; + for (auto & nonces : nonce_map.second){ + os << nonces << " "; + } + os << "]" ; + } + } os << " }"; } From ecaa4a038ce32e558c1c1c950e50e09a561c4cc8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 31 Dec 2023 08:20:07 +0000 Subject: [PATCH 65/65] .github/workflows: Bump actions/stale from 5 to 9 Bumps [actions/stale](https://github.com/actions/stale) from 5 to 9. - [Release notes](https://github.com/actions/stale/releases) - [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/stale/compare/v5...v9) --- updated-dependencies: - dependency-name: actions/stale dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 31478e8e8f65..510a6bebd4e2 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -9,7 +9,7 @@ jobs: stale: runs-on: ubuntu-latest steps: - - uses: actions/stale@v8 + - uses: actions/stale@v9 with: # PAT for GitHub API authentication repo-token: "${{ secrets.GITHUB_TOKEN }}"