From 41272e71cde64d409c15841a7075b5568a94a723 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 30 Oct 2013 11:21:56 -0700 Subject: [PATCH] ReplicatedPG/PGBackend: block all ops other than Pull prior to active Previously, it was guarranteed that prior to activation, flushed would be false on a replica. Now, there may be a period where flushed is true due to the flush in Stray completing prior to activation and flushed being false again. This is necessary since shortly it won't be possible to determine from the osdmap whether a stray will be activated in a particular interval. Signed-off-by: Samuel Just --- src/osd/PG.cc | 9 ++++----- src/osd/PGBackend.h | 7 +++++++ src/osd/ReplicatedBackend.cc | 25 +++++++++++++++++++++++++ src/osd/ReplicatedBackend.h | 3 +++ src/osd/ReplicatedPG.cc | 15 ++++++++++++++- 5 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 4f76a5190cd66..ef529c189f849 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -6203,11 +6203,10 @@ PG::RecoveryState::Stray::Stray(my_context ctx) assert(!pg->is_active()); assert(!pg->is_peering()); assert(!pg->is_primary()); - if (!pg->is_replica()) // stray, need to flush for pulls - pg->start_flush( - context< RecoveryMachine >().get_cur_transaction(), - context< RecoveryMachine >().get_on_applied_context_list(), - context< RecoveryMachine >().get_on_safe_context_list()); + pg->start_flush( + context< RecoveryMachine >().get_cur_transaction(), + context< RecoveryMachine >().get_on_applied_context_list(), + context< RecoveryMachine >().get_on_safe_context_list()); } boost::statechart::result PG::RecoveryState::Stray::react(const MLogRec& logevt) diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 408c589a08a06..42959664ea89b 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -173,6 +173,13 @@ RecoveryHandle *h ///< [in,out] handle to attach recovery op to ) = 0; + /** + * true if PGBackend can handle this message while inactive + * + * If it returns true, handle_message *must* also return true + */ + virtual bool can_handle_while_inactive(OpRequestRef op) = 0; + /// gives PGBackend a crack at an incoming message virtual bool handle_message( OpRequestRef op ///< [in] message received diff --git a/src/osd/ReplicatedBackend.cc b/src/osd/ReplicatedBackend.cc index 9529e15ae7728..74b12ad4614c7 100644 --- a/src/osd/ReplicatedBackend.cc +++ b/src/osd/ReplicatedBackend.cc @@ -91,6 +91,31 @@ void ReplicatedBackend::check_recovery_sources(const OSDMapRef osdmap) } } +bool ReplicatedBackend::can_handle_while_inactive(OpRequestRef op) +{ + dout(10) << __func__ << ": " << op << dendl; + switch (op->get_req()->get_type()) { + case MSG_OSD_PG_PULL: + return true; + case MSG_OSD_SUBOP: { + MOSDSubOp *m = static_cast(op->get_req()); + if (m->ops.size() >= 1) { + OSDOp *first = &m->ops[0]; + switch (first->op.op) { + case CEPH_OSD_OP_PULL: + return true; + default: + return false; + } + } else { + return false; + } + } + default: + return false; + } +} + bool ReplicatedBackend::handle_message( OpRequestRef op ) diff --git a/src/osd/ReplicatedBackend.h b/src/osd/ReplicatedBackend.h index a95a77b1a1dcd..0c03a7abb3e91 100644 --- a/src/osd/ReplicatedBackend.h +++ b/src/osd/ReplicatedBackend.h @@ -66,6 +66,9 @@ class ReplicatedBackend : public PGBackend { void check_recovery_sources(const OSDMapRef osdmap); + /// @see PGBackend::delay_message_until_active + bool can_handle_while_inactive(OpRequestRef op); + /// @see PGBackend::handle_message bool handle_message( OpRequestRef op diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 5bcc82cdd0d64..c91ed68505ac3 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -841,12 +841,25 @@ void ReplicatedPG::do_request( return; } + if (!is_active()) { + // Delay unless PGBackend says it's ok + if (pgbackend->can_handle_while_inactive(op)) { + bool handled = pgbackend->handle_message(op); + assert(handled); + return; + } else { + waiting_for_active.push_back(op); + return; + } + } + + assert(is_active() && flushes_in_progress == 0); if (pgbackend->handle_message(op)) return; switch (op->get_req()->get_type()) { case CEPH_MSG_OSD_OP: - if (is_replay() || !is_active()) { + if (is_replay()) { dout(20) << " replay, waiting for active on " << op << dendl; waiting_for_active.push_back(op); return;