Skip to content

Commit

Permalink
ceph-nvmeof-mon: fixes
Browse files Browse the repository at this point in the history
- gw subsystems update
  propose state change when subsystems reported by the gateway changes.
  otherwise, only the initially reported subsystem is handled, and any
  new subsystem listener ana groups could remain "unreachable".

  example of the bug: https://github.com/ceph/ceph-nvmeof/actions/runs/8603572675/job/23576286745?pr=560

  Signed-off-by: Alexander Indenbaum <[email protected]>
  (cherry picked from commit 7ad25a3)

- fixing  issue : same group set to Active on 2 gws

  Signed-off-by: Leonid Chernin <[email protected]>
  (cherry picked from commit f2d3d29)

Signed-off-by: Alexander Indenbaum <[email protected]>
  • Loading branch information
Alexander Indenbaum committed Apr 14, 2024
1 parent e50ed54 commit 1c8eaae
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 5 deletions.
7 changes: 4 additions & 3 deletions src/mon/NVMeofGwMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ int NVMeofGwMap::process_gw_map_gw_down(const NvmeGwId &gw_id, const NvmeGroupKe
fsm_handle_gw_down (gw_id, group_key, st.sm_state[i], i, propose_pending);
st.standby_state(i);
}
propose_pending = true; // map should reflect that gw becames unavailable
}
else {
dout(1) << __FUNCTION__ << "ERROR GW-id was not found in the map " << gw_id << dendl;
Expand Down Expand Up @@ -478,11 +479,11 @@ void NVMeofGwMap::fsm_handle_to_expired(const NvmeGwId &gw_id, const NvmeGroupKe
break;
}
else if(st.sm_state[grpid] == GW_STATES_PER_AGROUP_E::GW_STANDBY_STATE && st.availability == GW_AVAILABILITY_E::GW_AVAILABLE) {
st.active_state(grpid);// GW failed and started during the persistency interval
dout(4) << "Failback unsuccessfull. GW: " << gw_state.first << "becomes Active for the ana group " << grpid << dendl;
st.standby_state(grpid);// GW failed during the persistency interval
dout(4) << "Failback unsuccessfull. GW: " << gw_state.first << " becomes Standby for the ANA groupId " << grpid << dendl;
}
fbp_gw_state.standby_state(grpid);
dout(4) << "Failback unsuccessfull GW: " << gw_id << "becomes standby for the ana group " << grpid << dendl;
dout(4) << "Failback unsuccessfull GW: " << gw_id << " becomes Standby for the ANA groupId " << grpid << dendl;
map_modified = true;
break;
}
Expand Down
8 changes: 7 additions & 1 deletion src/mon/NVMeofGwMon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,13 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
if(sub.size() == 0) {
avail = GW_AVAILABILITY_E::GW_UNAVAILABLE;
}
pending_map.Created_gws[group_key][gw_id].subsystems = sub;
if(pending_map.Created_gws[group_key][gw_id].subsystems != sub)
{
dout(10) << "subsystems of GW changed, propose pending " << gw_id << dendl;
pending_map.Created_gws[group_key][gw_id].subsystems = sub;
dout(10) << "subsystems of GW " << gw_id << " "<< pending_map.Created_gws[group_key][gw_id].subsystems << dendl;
nonce_propose = true;
}
pending_map.Created_gws[group_key][gw_id].last_gw_map_epoch_valid = ( map.epoch == m->get_last_gwmap_epoch() );
if( pending_map.Created_gws[group_key][gw_id].last_gw_map_epoch_valid == false ){
dout(1) << "map epoch of gw is not up-to-date " << gw_id << " epoch " << map.epoch << " beacon_epoch " << m->get_last_gwmap_epoch() << dendl;
Expand Down
20 changes: 20 additions & 0 deletions src/mon/NVMeofGwTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,38 @@ using ANA_STATE = std::vector<std::pair<GW_EXPORTED_STATES_PER_AGROUP_E, epoch_t
struct BeaconNamespace {
NvmeAnaGrpId anagrpid;
std::string nonce;

// Define the equality operator
bool operator==(const BeaconNamespace& other) const {
return anagrpid == other.anagrpid &&
nonce == other.nonce;
}
};

struct BeaconListener {
std::string address_family; // IPv4 or IPv6
std::string address; //
std::string svcid; // port

// Define the equality operator
bool operator==(const BeaconListener& other) const {
return address_family == other.address_family &&
address == other.address &&
svcid == other.svcid;
}
};

struct BeaconSubsystem {
NvmeNqnId nqn;
std::list<BeaconListener> listeners;
std::list<BeaconNamespace> namespaces;

// Define the equality operator
bool operator==(const BeaconSubsystem& other) const {
return nqn == other.nqn &&
listeners == other.listeners &&
namespaces == other.namespaces;
}
};

using BeaconSubsystems = std::list<BeaconSubsystem>;
Expand Down
1 change: 0 additions & 1 deletion src/nvmeof/NVMeofGwMonitorClient.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ static bool get_gw_state(const char* desc, const std::map<NvmeGroupKey, NvmeGwMa
void NVMeofGwMonitorClient::send_beacon()
{
ceph_assert(ceph_mutex_is_locked_by_me(lock));
//dout(0) << "sending beacon as gid " << monc.get_global_id() << dendl;
GW_AVAILABILITY_E gw_availability = GW_AVAILABILITY_E::GW_CREATED;
BeaconSubsystems subs;
NVMeofGwClient gw_client(
Expand Down

0 comments on commit 1c8eaae

Please sign in to comment.