From f808c205c503f7d32518c91619f249466f84c4cf Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 27 Aug 2013 08:49:14 -0700 Subject: [PATCH] PGLog: maintain writeout_from and trimmed This way, we can avoid omap_rmkeyrange in the common append and trim cases. Fixes: #6040 Backport: Dumpling Signed-off-by: Samuel Just --- src/osd/PGLog.cc | 40 +++++++++++++++++++++++++++++----------- src/osd/PGLog.h | 22 +++++++++++++++++----- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 0e18710ad18ee..a6e538c75e75e 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -64,7 +64,7 @@ void PGLog::IndexedLog::split_into( index(); } -void PGLog::IndexedLog::trim(eversion_t s) +void PGLog::IndexedLog::trim(eversion_t s, set *trimmed) { if (complete_to != log.end() && complete_to->version <= s) { @@ -77,6 +77,8 @@ void PGLog::IndexedLog::trim(eversion_t s) if (e.version > s) break; generic_dout(20) << "trim " << e << dendl; + if (trimmed) + trimmed->insert(e.version); unindex(e); // remove from index, log.pop_front(); // from log } @@ -142,14 +144,8 @@ void PGLog::trim(eversion_t trim_to, pg_info_t &info) assert(trim_to <= info.last_complete); dout(10) << "trim " << log << " to " << trim_to << dendl; - log.trim(trim_to); + log.trim(trim_to, &trimmed); info.log_tail = log.tail; - - if (log.log.empty()) { - mark_dirty_to(eversion_t::max()); - } else { - mark_dirty_to(log.log.front().version); - } } } @@ -541,11 +537,15 @@ void PGLog::write_log( << "dirty_to: " << dirty_to << ", dirty_from: " << dirty_from << ", dirty_divergent_priors: " << dirty_divergent_priors + << ", writeout_from: " << writeout_from + << ", trimmed: " << trimmed << dendl; _write_log( t, log, log_oid, divergent_priors, dirty_to, dirty_from, + writeout_from, + trimmed, dirty_divergent_priors, !touched_log, (pg_log_debug ? &log_keys_debug : 0)); @@ -558,8 +558,11 @@ void PGLog::write_log( void PGLog::write_log(ObjectStore::Transaction& t, pg_log_t &log, const hobject_t &log_oid, map &divergent_priors) { - _write_log(t, log, log_oid, divergent_priors, eversion_t::max(), eversion_t(), - true, true, 0); + _write_log( + t, log, log_oid, + divergent_priors, eversion_t::max(), eversion_t(), eversion_t(), + set(), + true, true, 0); } void PGLog::_write_log( @@ -567,11 +570,24 @@ void PGLog::_write_log( const hobject_t &log_oid, map &divergent_priors, eversion_t dirty_to, eversion_t dirty_from, + eversion_t writeout_from, + const set &trimmed, bool dirty_divergent_priors, bool touch_log, set *log_keys_debug ) { + set to_remove; + for (set::const_iterator i = trimmed.begin(); + i != trimmed.end(); + ++i) { + to_remove.insert(i->get_key_name()); + if (log_keys_debug) { + assert(log_keys_debug->count(i->get_key_name())); + log_keys_debug->erase(i->get_key_name()); + } + } + //dout(10) << "write_log, clearing up to " << dirty_to << dendl; if (touch_log) t.touch(coll_t(), log_oid); @@ -599,7 +615,8 @@ void PGLog::_write_log( } for (list::reverse_iterator p = log.log.rbegin(); - p != log.log.rend() && p->version >= dirty_from && + p != log.log.rend() && + (p->version >= dirty_from || p->version >= writeout_from) && p->version >= dirty_to; ++p) { bufferlist bl(sizeof(*p) * 2); @@ -621,6 +638,7 @@ void PGLog::_write_log( ::encode(divergent_priors, keys["divergent_priors"]); } + t.omap_rmkeys(coll_t::META_COLL, log_oid, to_remove); t.omap_setkeys(coll_t::META_COLL, log_oid, keys); } diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index b524c60c00a47..871aa1ab7dd8b 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -142,7 +142,7 @@ struct PGLog { caller_ops[e.reqid] = &(log.back()); } - void trim(eversion_t s); + void trim(eversion_t s, set *trimmed); ostream& print(ostream& out) const; }; @@ -158,8 +158,10 @@ struct PGLog { /// Log is clean on [dirty_to, dirty_from) bool touched_log; - eversion_t dirty_to; - eversion_t dirty_from; + eversion_t dirty_to; ///< must clear/writeout all keys up to dirty_to + eversion_t dirty_from; ///< must clear/writeout all keys past dirty_from + eversion_t writeout_from; ///< must writout keys past writeout_from + set trimmed; ///< must clear keys in trimmed bool dirty_divergent_priors; CephContext *cct; @@ -167,7 +169,9 @@ struct PGLog { return !touched_log || (dirty_to != eversion_t()) || (dirty_from != eversion_t::max()) || - dirty_divergent_priors; + dirty_divergent_priors || + (writeout_from != eversion_t::max()) || + !(trimmed.empty()); } void mark_dirty_to(eversion_t to) { if (to > dirty_to) @@ -177,6 +181,10 @@ struct PGLog { if (from < dirty_from) dirty_from = from; } + void mark_writeout_from(eversion_t from) { + if (from < writeout_from) + writeout_from = from; + } void add_divergent_prior(eversion_t version, hobject_t obj) { divergent_priors.insert(make_pair(version, obj)); dirty_divergent_priors = true; @@ -221,6 +229,8 @@ struct PGLog { dirty_from = eversion_t::max(); dirty_divergent_priors = false; touched_log = true; + trimmed.clear(); + writeout_from = eversion_t::max(); check(); } public: @@ -281,7 +291,7 @@ struct PGLog { void unindex() { log.unindex(); } void add(pg_log_entry_t& e) { - mark_dirty_from(e.version); + mark_writeout_from(e.version); log.add(e); } @@ -374,6 +384,8 @@ struct PGLog { const hobject_t &log_oid, map &divergent_priors, eversion_t dirty_to, eversion_t dirty_from, + eversion_t writeout_from, + const set &trimmed, bool dirty_divergent_priors, bool touch_log, set *log_keys_debug