Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
* include GBUFFER to the accepted client caps for a quiesced node
* only use rdlock on the filelock
* change dir gather completion to a lambda
* only use non-projected inodes for the quiesce operation
  • Loading branch information
leonid-s-usov committed Feb 19, 2024
1 parent a810e7f commit f80937d
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 76 deletions.
5 changes: 0 additions & 5 deletions qa/tasks/cephfs/test_quiesce.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,6 @@ def _verify_quiesce(self, rank=0, root=None, splitauth=False):
self.assertEqual(lock['flags'], 4)
self.assertEqual(lock['lock']['state'], 'lock')
self.assertEqual(lock['lock']['num_xlocks'], 1)
elif lock_type == "iversion":
# only regular files acquire xlock (on ifile)
self.assertEqual((mode & S_IFMT), S_IFREG)
self.assertEqual(lock['flags'], 2)
self.assertEqual(lock['lock']['state'][:4], 'lock')
else:
# no iflock
self.assertFalse(lock_type.startswith("i"))
Expand Down
7 changes: 4 additions & 3 deletions src/mds/CInode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3538,17 +3538,18 @@ int CInode::get_caps_quiesce_mask() const
if (quiescelock.can_wrlock()) {
return CEPH_CAP_ANY;
} else {
return CEPH_CAP_ANY_SHARED | CEPH_CAP_PIN /*?*/;
// what we allow for a quiesced node
return CEPH_CAP_ANY_SHARED | CEPH_CAP_GRD | CEPH_CAP_GBUFFER | CEPH_CAP_PIN;
}
}

// caps allowed
int CInode::get_caps_liked() const
{
if (is_dir())
return CEPH_CAP_PIN | CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_SHARED; // but not, say, FILE_RD|WR|WRBUFFER
return get_caps_quiesce_mask() & (CEPH_CAP_PIN | CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_SHARED); // but not, say, FILE_RD|WR|WRBUFFER
else
return CEPH_CAP_ANY & ~CEPH_CAP_FILE_LAZYIO;
return get_caps_quiesce_mask() & (CEPH_CAP_ANY & ~CEPH_CAP_FILE_LAZYIO);
}

int CInode::get_caps_allowed_ever() const
Expand Down
147 changes: 79 additions & 68 deletions src/mds/MDCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <ostream>
#include <string>
#include <string_view>
#include <ranges>
#include <array>
#include <map>

#include "MDCache.h"
Expand Down Expand Up @@ -13526,7 +13528,6 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
mds->server->respond_to_request(mdr, -CEPHFS_ENOENT);
return;
}
const bool is_root = (mdr->get_filepath().get_ino() == mdr->get_filepath2().get_ino());

dout(20) << __func__ << " " << *mdr << " quiescing " << *in << dendl;

Expand Down Expand Up @@ -13565,7 +13566,7 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
}

// now we can test if this inode should be skipped
if (in->get_projected_inode()->get_quiesce_block()) {
if (in->get_inode()->get_quiesce_block()) {
dout(10) << __func__ << " quiesce is blocked for this inode; dropping locks!" << dendl;
mdr->mark_event("quiesce blocked");
mds->locker->drop_locks(mdr.get());
Expand All @@ -13576,17 +13577,14 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
return;
}

if (!in->is_dir()) {
/*
* Because files are treated specially allowing multiple reader/writers, we
* need an xlock here to recall all write caps. This unfortunately means
* there can be no readers.
*/
lov.add_xlock(&in->filelock);
} else {
lov.add_rdlock(&in->filelock);
}
// we are adding the rest of the rdlocks
// this is safe because we only take rdlocks for all four locks
// if any lock is attempted for wr or x, then quiesce lock would be
// injected automatically by the `acquire_locks` and we already hold
// that as xlock so those can't be holding any of the write locks to the below

// rdlock on the filelock is sufficient, as it ca
lov.add_rdlock(&in->filelock);
lov.add_rdlock(&in->authlock);
lov.add_rdlock(&in->linklock);

Expand All @@ -13600,16 +13598,49 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
return;
}


// since we're holding onto the quiesce lock,
// we don't need the other capabilities related locks anymore
// as the quiesce mask will prevent any non-shared capabilities from being issued
// see CInode::get_caps_quiesce_mask()
// we actually don't need to hold any other lock but the quiesce lock

MutationImpl::LockOpVec drop_ops;
std::ranges::copy_if(
mdr->locks,
std::back_inserter(drop_ops),
[&in](auto &&op) { return op.lock != &in->quiescelock; }
);

dout(20) << "dropping locks: " << drop_ops << dendl;

for (auto op: drop_ops) {
mds->locker->drop_lock(mdr.get(), op.lock);
}

// use the lambda for completion so that we don't have to go
// through the whole lock/drop story above
auto did_quiesce = new LambdaContext([in, mdr=mdr, &qs, mds = mds, func = __func__](int rc) {
/* do not respond/complete so locks are not lost, parent request will complete */
if (mdr->killed) {
dout(20) << func << " " << *mdr << " not dispatching killed " << *mdr << dendl;
return;
} else if (mdr->internal_op_finish == nullptr) {
dout(20) << func << " " << *mdr << " already finished quiesce" << dendl;
return;
}

if (in->is_auth()) {
dout(10) << func << " " << *mdr << " quiesce complete of " << *in << dendl;
mdr->mark_event("quiesce complete");
} else {
dout(10) << func << " " << *mdr << " non-auth quiesce complete of " << *in << dendl;
mdr->mark_event("quiesce complete for non-auth inode");
}

mds->locker->drop_lock(mdr.get(), &in->filelock);
mds->locker->drop_lock(mdr.get(), &in->authlock);
mds->locker->drop_lock(mdr.get(), &in->linklock);
mds->locker->drop_lock(mdr.get(), &in->xattrlock);
qs.inc_inodes_quiesced();
mdr->internal_op_finish->complete(rc);
mdr->internal_op_finish = nullptr;
});

if (in->is_dir()) {
for (auto& dir : in->get_dirfrags()) {
Expand All @@ -13621,48 +13652,41 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
}
}
uint64_t count = 0;
MDSGatherBuilder gather(g_ceph_context, new C_MDS_RetryRequest(this, mdr));
MDSGatherBuilder gather(g_ceph_context, new MDSInternalContextWrapper(mds, did_quiesce));
auto& qops = qrmdr->more()->quiesce_ops;
for (auto& dir : in->get_dirfrags()) {
for (auto& [dnk, dn] : *dir) {
auto recurse = [this, mds=mds, &qops, &count, delay, &qfinisher, &qs, &gather, func=__func__](CInode* in) {
if (auto it = qops.find(in); it != qops.end()) {
dout(25) << func << ": existing quiesce metareqid: " << it->second << dendl;
if (auto reqit = active_requests.find(it->second); reqit != active_requests.end()) {
auto& qimdr = reqit->second;
dout(25) << func << ": found in-progress " << qimdr << dendl;
return;
}
}
dout(10) << func << ": scheduling op to quiesce " << *in << dendl;

MDRequestRef qimdr = request_start_internal(CEPH_MDS_OP_QUIESCE_INODE);
qimdr->set_filepath(filepath(in->ino()));
qimdr->internal_op_finish = gather.new_sub();
qimdr->internal_op_private = qfinisher;
qops[in] = qimdr->reqid;
qs.inc_inodes();
if (delay > 0ms) {
mds->timer.add_event_after(delay, new LambdaContext([cache=this,qimdr](int r) {
cache->dispatch_request(qimdr);
}));
} else {
dispatch_request(qimdr);
}
if (!(++count % mds->heartbeat_reset_grace())) {
mds->heartbeat_reset();
// don't take the projected linkage because
// we should only care about the committed state
auto in = dn->get_linkage()->inode;
if (!in) {
continue;
}
if (auto it = qops.find(in); it != qops.end()) {
dout(25) << __func__ << ": existing quiesce metareqid: " << it->second << dendl;
if (auto reqit = active_requests.find(it->second); reqit != active_requests.end()) {
auto& qimdr = reqit->second;
dout(25) << __func__ << ": found in-progress " << qimdr << dendl;
continue;
}
};


auto inode = dn->get_linkage()->inode;
if (inode) {
recurse(inode);
}
auto pinode = dn->get_projected_inode();
if (pinode && pinode != inode) {
dout(10) << __func__ << " descending into a projected dentry" << dendl;
recurse(inode);
dout(10) << __func__ << ": scheduling op to quiesce " << *in << dendl;

MDRequestRef qimdr = request_start_internal(CEPH_MDS_OP_QUIESCE_INODE);
qimdr->set_filepath(filepath(in->ino()));
qimdr->internal_op_finish = gather.new_sub();
qimdr->internal_op_private = qfinisher;
qops[in] = qimdr->reqid;
qs.inc_inodes();
if (delay > 0ms) {
mds->timer.add_event_after(delay, new LambdaContext([cache=this,qimdr](int r) {
cache->dispatch_request(qimdr);
}));
} else {
dispatch_request(qimdr);
}
if (!(++count % mds->heartbeat_reset_grace())) {
mds->heartbeat_reset();
}
}
}
Expand All @@ -13673,19 +13697,7 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr)
}
}

if (in->is_auth()) {
dout(10) << __func__ << " " << *mdr << " quiesce complete of " << *in << dendl;
mdr->mark_event("quiesce complete");
} else {
dout(10) << __func__ << " " << *mdr << " non-auth quiesce complete of " << *in << dendl;
mdr->mark_event("quiesce complete for non-auth inode");
}

qs.inc_inodes_quiesced();
mdr->internal_op_finish->complete(0);
mdr->internal_op_finish = nullptr;

/* do not respond/complete so locks are not lost, parent request will complete */
did_quiesce->complete(0);
}

void MDCache::dispatch_quiesce_path(const MDRequestRef& mdr)
Expand Down Expand Up @@ -13755,7 +13767,6 @@ void MDCache::dispatch_quiesce_path(const MDRequestRef& mdr)
} else if (auto& qops = mdr->more()->quiesce_ops; qops.count(diri) == 0) {
MDRequestRef qimdr = request_start_internal(CEPH_MDS_OP_QUIESCE_INODE);
qimdr->set_filepath(filepath(diri->ino()));
qimdr->set_filepath2(filepath(diri->ino())); /* is_root! */
qimdr->internal_op_finish = new C_MDS_RetryRequest(this, mdr);
qimdr->internal_op_private = qfinisher;
qops[diri] = qimdr->reqid;
Expand Down

0 comments on commit f80937d

Please sign in to comment.