From 306ec7142d699c26ce874b11dc02ccdb3cf296c7 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Fri, 1 Nov 2013 16:12:52 -0700 Subject: [PATCH 01/37] rgw: don't turn 404 into 400 for the replicalog api 404 is not actually a problem to clients like radosgw-agent, but 400 implies something about the request was incorrect. Backport: dumpling Signed-off-by: Josh Durgin --- src/rgw/rgw_rest_replica_log.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/rgw/rgw_rest_replica_log.cc b/src/rgw/rgw_rest_replica_log.cc index 600a8edb78c5b..2543f32fba6e8 100644 --- a/src/rgw/rgw_rest_replica_log.cc +++ b/src/rgw/rgw_rest_replica_log.cc @@ -171,7 +171,9 @@ static int bucket_instance_to_bucket(RGWRados *store, string& bucket_instance, r int r = store->get_bucket_instance_info(NULL, bucket_instance, bucket_info, &mtime, NULL); if (r < 0) { - dout(5) << "could not get bucket instance info for bucket=" << bucket_instance << dendl; + dout(5) << "could not get bucket instance info for bucket=" << bucket_instance << ": " << cpp_strerror(r) << dendl; + if (r == -ENOENT) + return r; return -EINVAL; } From 9834ab9ab838d5164d518a96dbf4638950efe68f Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Fri, 1 Nov 2013 15:45:02 -0700 Subject: [PATCH 02/37] OSDMonitor: be a little nicer about letting users do pg splitting We were previously blocking pg splits whenever pg creations were in- progress, but we only really need to avoid splitting any pgs which are currently being created. Let the user set a different pg_num if there are no creating PGs on the pool in question. Fixes: #6673, take two Signed-off-by: Greg Farnum --- src/mon/OSDMonitor.cc | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index d56953ba06991..07775fce2bf9a 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2781,10 +2781,15 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, } if (n <= (int)p.get_pg_num()) { ss << "specified pg_num " << n << " <= current " << p.get_pg_num(); - } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { - ss << "currently creating pgs, wait"; - return -EAGAIN; } else { + for(set::iterator i = mon->pgmon()->pg_map.creating_pgs.begin(); + i != mon->pgmon()->pg_map.creating_pgs.end(); + ++i) { + if (i->m_pool == static_cast(pool)) { + ss << "currently creating pgs, wait"; + return -EAGAIN; + } + } p.set_pg_num(n); ss << "set pool " << pool << " pg_num to " << n; } @@ -2797,10 +2802,15 @@ int OSDMonitor::prepare_command_pool_set(map &cmdmap, ss << "specified pgp_num must > 0, but you set to " << n; } else if (n > (int)p.get_pg_num()) { ss << "specified pgp_num " << n << " > pg_num " << p.get_pg_num(); - } else if (!mon->pgmon()->pg_map.creating_pgs.empty()) { - ss << "still creating pgs, wait"; - return -EAGAIN; } else { + for(set::iterator i = mon->pgmon()->pg_map.creating_pgs.begin(); + i != mon->pgmon()->pg_map.creating_pgs.end(); + ++i) { + if (i->m_pool == static_cast(pool)) { + ss << "currently creating pgs, wait"; + return -EAGAIN; + } + } p.set_pgp_num(n); ss << "set pool " << pool << " pgp_num to " << n; } From 2db20d972125032d52c345d4fd16cdd3947471e0 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Fri, 1 Nov 2013 18:41:02 -0700 Subject: [PATCH 03/37] qa: don't run racy xfstest 008 This test attempts to generate a random number of holes within a particular range, but may fail because hole placement is random. Signed-off-by: Josh Durgin --- qa/run_xfstests.sh | 5 ++++- qa/run_xfstests_qemu.sh | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/qa/run_xfstests.sh b/qa/run_xfstests.sh index f9c3e55a79d60..3f5e2eca9f507 100644 --- a/qa/run_xfstests.sh +++ b/qa/run_xfstests.sh @@ -48,7 +48,7 @@ XFS_MKFS_OPTIONS="-l su=32k" # Override the default test list with a list of tests known to pass # until we can work through getting them all passing reliably. -TESTS="1-9 11-15 17 19-21 26-29 31-34 41 46-48 50-54 56 61 63-67 69-70 74-76" +TESTS="1-7 9 11-15 17 19-21 26-29 31-34 41 46-48 50-54 56 61 63-67 69-70 74-76" TESTS="${TESTS} 78 79 84-89 91-92 100 103 105 108 110 116-121 124 126" TESTS="${TESTS} 129-135 137-141 164-167 182 184 187-190 192 194" TESTS="${TESTS} 196 199 201 203 214-216 220-227 234 236-238 241 243-249" @@ -59,6 +59,9 @@ TESTS="${TESTS} 253 257-259 261 262 269 273 275 277 278 280 285 286" ###### # Some explanation of why tests have been excluded above: # +# Test 008 was pulled because it contained a race condition leading to +# spurious failures. +# # Test 049 was pulled because it caused a kernel fault. # http://tracker.newdream.net/issues/2260 # Test 232 was pulled because it caused an XFS error diff --git a/qa/run_xfstests_qemu.sh b/qa/run_xfstests_qemu.sh index 919e46a4d59fc..9dcced7edab38 100644 --- a/qa/run_xfstests_qemu.sh +++ b/qa/run_xfstests_qemu.sh @@ -7,4 +7,4 @@ chmod +x run_xfstests.sh # tests excluded fail in the current testing vm regardless of whether # rbd is used -./run_xfstests.sh -c 1 -f xfs -t /dev/vdb -s /dev/vdc 1-17 19-26 28-49 51-61 63 66-67 69-79 83 85-105 108-110 112-135 137-170 174-191 193-204 206-217 220-227 230-231 233 235-241 243-249 251-262 264-278 281-286 288-289 +./run_xfstests.sh -c 1 -f xfs -t /dev/vdb -s /dev/vdc 1-7 9-17 19-26 28-49 51-61 63 66-67 69-79 83 85-105 108-110 112-135 137-170 174-191 193-204 206-217 220-227 230-231 233 235-241 243-249 251-262 264-278 281-286 288-289 From fb0f1986449b53908a6d17d4398270da64aa8f07 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Fri, 1 Nov 2013 19:02:29 -0700 Subject: [PATCH 04/37] rbd: omit 'rw' option during map The ro and rw options were added in linux 3.7. To be compatible with older kernels, don't specify rw. The default will probably always be rw, so this should not present any problems in the future. Reported-by: nicolasc Signed-off-by: Josh Durgin --- src/rbd.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rbd.cc b/src/rbd.cc index 147eb2c5138e9..41cd243735520 100644 --- a/src/rbd.cc +++ b/src/rbd.cc @@ -1643,12 +1643,12 @@ static int do_kernel_add(const char *poolname, const char *imgname, } if (read_only) - oss << " ro"; + oss << " ro,"; else - oss << " rw"; + oss << " "; const char *user = g_conf->name.get_id().c_str(); - oss << ",name=" << user; + oss << "name=" << user; char key_name[strlen(user) + strlen("client.") + 1]; snprintf(key_name, sizeof(key_name), "client.%s", user); From 09e159719b3c8867266d73cc83bdb4b6b52c1f38 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 1 Nov 2013 23:56:45 -0700 Subject: [PATCH 05/37] mon/PGMap: use const ref, not pass-by-value Signed-off-by: Sage Weil --- src/mon/PGMap.cc | 6 +++--- src/mon/PGMap.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc index ab4c885df4c02..2ba8402b34f60 100644 --- a/src/mon/PGMap.cc +++ b/src/mon/PGMap.cc @@ -787,7 +787,7 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const } void PGMap::recovery_summary(Formatter *f, ostream *out, - pool_stat_t delta_sum) const + const pool_stat_t& delta_sum) const { bool first = true; if (delta_sum.stats.sum.num_objects_degraded) { @@ -825,7 +825,7 @@ void PGMap::recovery_summary(Formatter *f, ostream *out, } void PGMap::recovery_rate_summary(Formatter *f, ostream *out, - pool_stat_t delta_sum, + const pool_stat_t& delta_sum, utime_t delta_stamp) const { // make non-negative; we can get negative values if osds send @@ -886,7 +886,7 @@ void PGMap::pool_recovery_summary(Formatter *f, ostream *out, } void PGMap::client_io_rate_summary(Formatter *f, ostream *out, - pool_stat_t delta_sum, + const pool_stat_t& delta_sum, utime_t delta_stamp) const { pool_stat_t pos_delta = delta_sum; diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h index c8ce7fd973eba..8a931ecbcca67 100644 --- a/src/mon/PGMap.h +++ b/src/mon/PGMap.h @@ -244,12 +244,12 @@ class PGMap { void print_osd_perf_stats(std::ostream *ss) const; void recovery_summary(Formatter *f, ostream *out, - pool_stat_t delta_sum) const; + const pool_stat_t& delta_sum) const; void overall_recovery_summary(Formatter *f, ostream *out) const; void pool_recovery_summary(Formatter *f, ostream *out, uint64_t poolid) const; void recovery_rate_summary(Formatter *f, ostream *out, - pool_stat_t delta_sum, + const pool_stat_t& delta_sum, utime_t delta_stamp) const; void overall_recovery_rate_summary(Formatter *f, ostream *out) const; void pool_recovery_rate_summary(Formatter *f, ostream *out, @@ -259,7 +259,7 @@ class PGMap { * given @p delta_sum pool over a given @p delta_stamp period of time. */ void client_io_rate_summary(Formatter *f, ostream *out, - pool_stat_t delta_sum, + const pool_stat_t& delta_sum, utime_t delta_stamp) const; /** * Obtain a formatted/plain output for the overall client I/O, which is From c0bcdc37028d2582ef1a475ec957db509e4c2786 Mon Sep 17 00:00:00 2001 From: Xing Lin Date: Sat, 2 Nov 2013 19:24:22 -0600 Subject: [PATCH 06/37] osd/erasurecode: correct one variable name in jerasure_matrix_to_bitmatrix() When bitmatrix is NULL, this function returns NULL. Signed-off-by: Xing Lin Reviewed-by: Sage Weil --- src/osd/ErasureCodePluginJerasure/jerasure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/ErasureCodePluginJerasure/jerasure.c b/src/osd/ErasureCodePluginJerasure/jerasure.c index 9efae02e5fb21..d5752a8ee517b 100755 --- a/src/osd/ErasureCodePluginJerasure/jerasure.c +++ b/src/osd/ErasureCodePluginJerasure/jerasure.c @@ -276,7 +276,7 @@ int *jerasure_matrix_to_bitmatrix(int k, int m, int w, int *matrix) int rowelts, rowindex, colindex, elt, i, j, l, x; bitmatrix = talloc(int, k*m*w*w); - if (matrix == NULL) { return NULL; } + if (bitmatrix == NULL) { return NULL; } rowelts = k * w; rowindex = 0; From 545135f3e1c75318940caa2c82ac32a53fc4f957 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Sat, 2 Nov 2013 13:54:51 -0700 Subject: [PATCH 07/37] ReplicatedPG::recover_backfill: adjust last_backfill to HEAD if snapdir Otherwise, if last_backfill_started is a snapdir, we will fail to send a transaction for a client IO creating the head object and removing the snapdir object. The result will be that head will eventually be backfilled, but the snapdir object will erroneously not be removed. Fixes: #6685 Signed-off-by: Samuel Just Reviewed-by: Greg Farnum --- src/common/hobject.h | 5 +++++ src/osd/ReplicatedPG.cc | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/common/hobject.h b/src/common/hobject.h index edaf04a301fd6..87a2b1e8f6fe4 100644 --- a/src/common/hobject.h +++ b/src/common/hobject.h @@ -86,6 +86,11 @@ struct hobject_t { return ret; } + /// @return true if object is snapdir + bool is_snapdir() const { + return snap == CEPH_SNAPDIR; + } + /// @return snapdir version of this hobject_t hobject_t get_snapdir() const { hobject_t ret(*this); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index ba6b7ecca7175..48040b68a882d 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -8151,6 +8151,17 @@ int ReplicatedPG::recover_backfill( assert(i->first > new_last_backfill); new_last_backfill = i->first; } + + /* If last_backfill is snapdir, we know that head necessarily cannot exist, + * therefore it's safe to bump the snap up to NOSNAP. This is necessary + * since we need avoid having SNAPDIR backfilled and HEAD not backfilled + * since a transaction on HEAD might change SNAPDIR + */ + if (new_last_backfill.is_snapdir()) + new_last_backfill = new_last_backfill.get_head(); + if (last_backfill_started.is_snapdir()) + last_backfill_started = last_backfill_started.get_head(); + assert(!pending_backfill_updates.empty() || new_last_backfill == last_backfill_started); if (pending_backfill_updates.empty() && From 9ab513334c7ff9544bac07bd420c6d5d200cf535 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Sun, 3 Nov 2013 11:06:10 -0800 Subject: [PATCH 08/37] OSD: don't clear peering_wait_for_split in advance_map() I really don't know why I added this... Ops can be discarded from the waiting_for_pg queue if we aren't primary simply because there must have been an exchange of peering events before subops will be sent within a particular epoch. Thus, any events in the waiting_for_pg queue must be client ops which should only be seen by the primary. Peering events, on the other hand, should only be discarded if we are in a new interval, and that check might as well be performed in the peering wq. Fixes: #6681 Signed-off-by: Samuel Just Reviewed-by: Greg Farnum --- src/osd/OSD.cc | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index eb5191f770e15..0a2d057df169a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5441,22 +5441,6 @@ void OSD::advance_map(ObjectStore::Transaction& t, C_Contexts *tfin) waiting_for_pg.erase(p++); } } - map >::iterator q = - peering_wait_for_split.begin(); - while (q != peering_wait_for_split.end()) { - pg_t pgid = q->first; - - // am i still primary? - vector acting; - int nrep = osdmap->pg_to_acting_osds(pgid, acting); - int role = osdmap->calc_pg_role(whoami, acting, nrep); - if (role >= 0) { - ++q; // still me - } else { - dout(10) << " discarding waiting ops for " << pgid << dendl; - peering_wait_for_split.erase(q++); - } - } } void OSD::consume_map() From cd0d612e1abdf5c87082eeeccd4ca09dd14fd737 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Sun, 3 Nov 2013 21:02:36 -0800 Subject: [PATCH 09/37] OSD: allow project_pg_history to handle a missing map If we get a peering message for an old map we don't have, we can throwit out: the sending OSD will learn about the newer maps and update itself accordingly, and we don't have the information to know if the message is valid. This situation can only happen if the sender was down for a long enough time to create a map gap and its PGs have not yet advanced from their boot-up maps to the current ones, so we can rely on it Fixes: #6712 Signed-off-by: Samuel Just Reviewed-by: Greg Farnum --- src/osd/OSD.cc | 35 +++++++++++++++++++++++++---------- src/osd/OSD.h | 8 ++++++-- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 0a2d057df169a..1a60de6bdfe03 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2201,9 +2201,10 @@ void OSD::handle_pg_peering_evt( int role = osdmap->calc_pg_role(whoami, acting, acting.size()); pg_history_t history = info.history; - project_pg_history(info.pgid, history, epoch, up, acting); + bool valid_history = project_pg_history( + info.pgid, history, epoch, up, acting); - if (epoch < history.same_interval_since) { + if (!valid_history || epoch < history.same_interval_since) { dout(10) << "get_or_create_pg " << info.pgid << " acting changed in " << history.same_interval_since << " (msg from " << epoch << ")" << dendl; return; @@ -2388,7 +2389,7 @@ void OSD::calc_priors_during(pg_t pgid, epoch_t start, epoch_t end, set& ps * Fill in the passed history so you know same_interval_since, same_up_since, * and same_primary_since. */ -void OSD::project_pg_history(pg_t pgid, pg_history_t& h, epoch_t from, +bool OSD::project_pg_history(pg_t pgid, pg_history_t& h, epoch_t from, const vector& currentup, const vector& currentacting) { @@ -2402,7 +2403,11 @@ void OSD::project_pg_history(pg_t pgid, pg_history_t& h, epoch_t from, e > from; e--) { // verify during intermediate epoch (e-1) - OSDMapRef oldmap = get_map(e-1); + OSDMapRef oldmap = service.try_get_map(e-1); + if (!oldmap) { + dout(15) << __func__ << ": found map gap, returning false" << dendl; + return false; + } assert(oldmap->have_pg_pool(pgid.pool())); vector up, acting; @@ -2452,6 +2457,7 @@ void OSD::project_pg_history(pg_t pgid, pg_history_t& h, epoch_t from, } dout(15) << "project_pg_history end " << h << dendl; + return true; } // ------------------------------------- @@ -5919,7 +5925,12 @@ void OSD::handle_pg_create(OpRequestRef op) utime_t now = ceph_clock_now(NULL); history.last_scrub_stamp = now; history.last_deep_scrub_stamp = now; - project_pg_history(pgid, history, created, up, acting); + bool valid_history = + project_pg_history(pgid, history, created, up, acting); + /* the pg creation message must have come from a mon and therefore + * cannot be on the other side of a map gap + */ + assert(valid_history); // register. creating_pgs[pgid].history = history; @@ -6531,9 +6542,11 @@ void OSD::handle_pg_query(OpRequestRef op) // same primary? pg_history_t history = it->second.history; - project_pg_history(pgid, history, it->second.epoch_sent, up, acting); + bool valid_history = + project_pg_history(pgid, history, it->second.epoch_sent, up, acting); - if (it->second.epoch_sent < history.same_interval_since) { + if (!valid_history || + it->second.epoch_sent < history.same_interval_since) { dout(10) << " pg " << pgid << " dne, and pg has changed in " << history.same_interval_since << " (msg from " << it->second.epoch_sent << ")" << dendl; @@ -6597,9 +6610,11 @@ void OSD::handle_pg_remove(OpRequestRef op) pg_history_t history = pg->info.history; vector up, acting; osdmap->pg_to_up_acting_osds(pgid, up, acting); - project_pg_history(pg->info.pgid, history, pg->get_osdmap()->get_epoch(), - up, acting); - if (history.same_interval_since <= m->get_epoch()) { + bool valid_history = + project_pg_history(pg->info.pgid, history, pg->get_osdmap()->get_epoch(), + up, acting); + if (valid_history && + history.same_interval_since <= m->get_epoch()) { assert(pg->get_primary() == m->get_source().num()); PGRef _pg(pg); _remove_pg(pg); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index f7559da3be53a..b9fb6153b1ad0 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1191,8 +1191,12 @@ class OSD : public Dispatcher, void build_past_intervals_parallel(); void calc_priors_during(pg_t pgid, epoch_t start, epoch_t end, set& pset); - void project_pg_history(pg_t pgid, pg_history_t& h, epoch_t from, - const vector& lastup, const vector& lastacting); + + /// project pg history from from to now + bool project_pg_history( + pg_t pgid, pg_history_t& h, epoch_t from, + const vector& lastup, const vector& lastacting + ); ///< @return false if there was a map gap between from and now void wake_pg_waiters(pg_t pgid) { if (waiting_for_pg.count(pgid)) { From d03924ca609becf6959022fdbbad7420edeb26c1 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Mon, 4 Nov 2013 23:30:47 +0100 Subject: [PATCH 10/37] galois.c: fix compiler warning galois_create_split_w8_tables() takes no parameter, remove '8' passed to the function in one case. osd/ErasureCodePluginJerasure/galois.c: In function 'galois_w32_region_multiply': osd/ErasureCodePluginJerasure/galois.c:696:5: warning: call to function 'galois_create_split_w8_tables' without a real prototype [-Wunprototyped-calls] In file included from osd/ErasureCodePluginJerasure/galois.c:53:0: osd/ErasureCodePluginJerasure/galois.h:71:12: note: 'galois_create_split_w8_tables' was declared here Signed-off-by: Danny Al-Gaaf --- src/osd/ErasureCodePluginJerasure/galois.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/ErasureCodePluginJerasure/galois.c b/src/osd/ErasureCodePluginJerasure/galois.c index be8be59affa0c..0de6fbd334cfe 100755 --- a/src/osd/ErasureCodePluginJerasure/galois.c +++ b/src/osd/ErasureCodePluginJerasure/galois.c @@ -693,7 +693,7 @@ void galois_w32_region_multiply(char *region, /* Region to multiply */ nbytes /= sizeof(int); if (galois_split_w8[0]== NULL) { - if (galois_create_split_w8_tables(8) < 0) { + if (galois_create_split_w8_tables() < 0) { fprintf(stderr, "galois_32_region_multiply -- couldn't make split multiplication tables\n"); exit(1); } From 154ee0b0345482bdfdeb84b7d3b00dba71577d97 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 4 Nov 2013 11:25:31 -0800 Subject: [PATCH 11/37] FileStore::_collection_move_rename: handle missing dst dir on replay In case of a replay, a missing destination directory indicates that the destination object and directory have been removed by a later transaction. Thus, we need to remove the src object and return 0. Fixes: #6714 Signed-off-by: Samuel Just Reviewed-by: Greg Farnum --- src/os/FileStore.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/os/FileStore.cc b/src/os/FileStore.cc index ffac501aaf20d..89a55b393db7c 100644 --- a/src/os/FileStore.cc +++ b/src/os/FileStore.cc @@ -4273,6 +4273,14 @@ int FileStore::_collection_move_rename(coll_t oldcid, const ghobject_t& oldoid, int r = 0; int dstcmp, srccmp; + if (replaying) { + /* If the destination collection doesn't exist during replay, + * we need to delete the src object and continue on + */ + if (!collection_exists(c)) + goto out_rm_src; + } + dstcmp = _check_replay_guard(c, o, spos); if (dstcmp < 0) goto out_rm_src; From c22c84a88c22688b6044ab37f65a3fe40dfe1983 Mon Sep 17 00:00:00 2001 From: Dan Mick Date: Tue, 5 Nov 2013 16:11:10 -0800 Subject: [PATCH 12/37] osdmaptool: don't put progress on stdout If one requests JSON output, the progress message pollutes the output; don't do that, send it to stderr instead Signed-off-by: Dan Mick --- src/tools/osdmaptool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/osdmaptool.cc b/src/tools/osdmaptool.cc index 2e55026076c32..edd31284c4d4f 100644 --- a/src/tools/osdmaptool.cc +++ b/src/tools/osdmaptool.cc @@ -156,7 +156,7 @@ int main(int argc, const char **argv) OSDMap osdmap; bufferlist bl; - cout << me << ": osdmap file '" << fn << "'" << std::endl; + cerr << me << ": osdmap file '" << fn << "'" << std::endl; int r = 0; struct stat st; From a3ccd29716af900be265b6f995eb4069b334c516 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 5 Nov 2013 15:40:29 -0800 Subject: [PATCH 13/37] RadosModel: use sharedptr_registry for snaps_in_use There might be two concurrent rollback ops each of which adds snap x to snaps_in_use. Between when the first completes and the second completes, snap x may be removed since the first would have removed snap x from snaps_in_use. Using sharedptr_registry here avoids this by ensuring that the snap won't be removed from snaps_in_use until all refs are gone. This patch also adds size() to sharedptr_registry. Fixes: #6719 Signed-off-by: Samuel Just Reviewed-by: David Zafman --- src/common/sharedptr_registry.hpp | 5 +++++ src/test/osd/RadosModel.h | 10 +++++++--- src/test/osd/TestRados.cc | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/common/sharedptr_registry.hpp b/src/common/sharedptr_registry.hpp index 9fe2fe6be1a10..83396b8cc5f67 100644 --- a/src/common/sharedptr_registry.hpp +++ b/src/common/sharedptr_registry.hpp @@ -149,6 +149,11 @@ class SharedPtrRegistry { return retval; } + unsigned size() { + Mutex::Locker l(lock); + return contents.size(); + } + void remove(const K &key) { Mutex::Locker l(lock); contents.erase(key); diff --git a/src/test/osd/RadosModel.h b/src/test/osd/RadosModel.h index 902d4b970c3df..80bcf00a6d785 100644 --- a/src/test/osd/RadosModel.h +++ b/src/test/osd/RadosModel.h @@ -18,6 +18,7 @@ #include "Object.h" #include "TestOpStat.h" #include "test/librados/test.h" +#include "common/sharedptr_registry.hpp" #ifndef RADOSMODEL_H #define RADOSMODEL_H @@ -143,7 +144,7 @@ class RadosTestContext { map > pool_obj_cont; set oid_in_use; set oid_not_in_use; - set snaps_in_use; + SharedPtrRegistry snaps_in_use; int current_snap; string pool_name; librados::IoCtx io_ctx; @@ -1321,6 +1322,7 @@ class RollbackOp : public TestOp { bool done; librados::ObjectWriteOperation op; librados::AioCompletion *comp; + std::tr1::shared_ptr in_use; RollbackOp(int n, RadosTestContext *context, @@ -1351,7 +1353,9 @@ class RollbackOp : public TestOp { context->oid_not_in_use.erase(oid); roll_back_to = rand_choose(context->snaps)->first; - context->snaps_in_use.insert(roll_back_to); + in_use = context->snaps_in_use.lookup_or_create( + roll_back_to, + roll_back_to); cout << "rollback oid " << oid << " to " << roll_back_to << std::endl; @@ -1382,7 +1386,7 @@ class RollbackOp : public TestOp { context->update_object_version(oid, comp->get_version64()); context->oid_in_use.erase(oid); context->oid_not_in_use.insert(oid); - context->snaps_in_use.erase(roll_back_to); + in_use = std::tr1::shared_ptr(); context->kick(); } diff --git a/src/test/osd/TestRados.cc b/src/test/osd/TestRados.cc index 0c1d55c7777e6..20a4f8209cc55 100644 --- a/src/test/osd/TestRados.cc +++ b/src/test/osd/TestRados.cc @@ -116,7 +116,7 @@ class WeightedTestGenerator : public TestOpGenerator } while (true) { int snap = rand_choose(context.snaps)->first; - if (context.snaps_in_use.count(snap)) + if (context.snaps_in_use.lookup(snap)) continue; // in use; try again! cout << "snap_remove snap " << snap << std::endl; return new SnapRemoveOp(m_op, &context, snap, m_stats); From f4648bc6fec89c870e0c47b38b2f13496742b10f Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 5 Nov 2013 17:47:48 -0800 Subject: [PATCH 14/37] PGLog::rewind_divergent_log: log may not contain newhead Due to split, there may be a hole at newhead. Fixes: #6722 Signed-off-by: Samuel Just Reviewed-by: David Zafman --- src/osd/PGLog.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 526baecf1284a..9f6ca1f70c3bc 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -360,7 +360,7 @@ void PGLog::rewind_divergent_log(ObjectStore::Transaction& t, eversion_t newhead } --p; mark_dirty_from(p->version); - if (p->version == newhead) { + if (p->version <= newhead) { ++p; divergent.splice(divergent.begin(), log.log, p, log.log.end()); break; From c6826c1e8a301b2306530c6e5d0f4a3160c4e691 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 5 Nov 2013 21:48:53 -0800 Subject: [PATCH 15/37] PG: fix operator<<,log_wierdness log bound warning Split may cause holes such that head != tail and yet log.empty(). Fixes: #6722 Signed-off-by: Samuel Just Reviewed-by: David Zafman --- src/osd/PG.cc | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index e92013abdc733..8207a675bce4a 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2399,13 +2399,7 @@ void PG::log_weirdness() << " != info.last_update " << info.last_update << "\n"; - if (pg_log.get_log().empty()) { - // shoudl it be? - if (pg_log.get_head() != pg_log.get_tail()) - osd->clog.error() << info.pgid - << " log bound mismatch, empty but (" << pg_log.get_tail() << "," - << pg_log.get_head() << "]\n"; - } else { + if (!pg_log.get_log().empty()) { // sloppy check if ((pg_log.get_log().log.begin()->version <= pg_log.get_tail())) osd->clog.error() << info.pgid @@ -4679,19 +4673,11 @@ ostream& operator<<(ostream& out, const PG& pg) pg.pg_log.get_head() != pg.info.last_update) out << " (info mismatch, " << pg.pg_log.get_log() << ")"; - if (pg.pg_log.get_log().empty()) { - // shoudl it be? - if (pg.pg_log.get_head().version - pg.pg_log.get_tail().version != 0) { - out << " (log bound mismatch, empty)"; - } - } else { - if ((pg.pg_log.get_log().log.begin()->version <= pg.pg_log.get_tail()) || // sloppy check - (pg.pg_log.get_log().log.rbegin()->version != pg.pg_log.get_head() && - !(pg.pg_log.get_head() == pg.pg_log.get_tail()))) { + if (!pg.pg_log.get_log().empty()) { + if ((pg.pg_log.get_log().log.begin()->version <= pg.pg_log.get_tail())) { out << " (log bound mismatch, actual=[" << pg.pg_log.get_log().log.begin()->version << "," << pg.pg_log.get_log().log.rbegin()->version << "]"; - //out << "len=" << pg.log.log.size(); out << ")"; } } From c7a30b881151e08b37339bb025789921e7115288 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 6 Nov 2013 14:33:03 -0800 Subject: [PATCH 16/37] ReplicatedPG: don't skip missing if sentries is empty on pgls Formerly, if sentries is empty, we skip missing. In general, we need to continue adding items from missing until we get to next (returned from collection_list_partial) to avoid missing any objects. Fixes: #6633 Signed-off-by: Samuel Just Reviewed-by: David Zafman --- src/osd/ReplicatedPG.cc | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 48040b68a882d..19592a64cea71 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -655,19 +655,38 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) map::const_iterator missing_iter = pg_log.get_missing().missing.lower_bound(current); vector::iterator ls_iter = sentries.begin(); + hobject_t _max = hobject_t::get_max(); while (1) { - if (ls_iter == sentries.end()) { - break; - } + const hobject_t &mcand = + missing_iter == pg_log.get_missing().missing.end() ? + _max : + missing_iter->first; + const hobject_t &lcand = + ls_iter == sentries.end() ? + _max : + *ls_iter; hobject_t candidate; - if (missing_iter == pg_log.get_missing().missing.end() || - *ls_iter < missing_iter->first) { - candidate = *(ls_iter++); + if (mcand == lcand) { + candidate = mcand; + if (!mcand.is_max()) { + ls_iter++; + missing_iter++; + } + } else if (mcand < lcand) { + candidate = mcand; + assert(!mcand.is_max()); + ++missing_iter; } else { - candidate = (missing_iter++)->first; + candidate = lcand; + assert(!lcand.is_max()); + ++ls_iter; } + if (candidate >= next) { + break; + } + if (response.entries.size() == list_size) { next = candidate; break; From 082e7c9eedeeb9a588b4ae9ec4d9cf1343e9704d Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 7 Nov 2013 10:44:30 +0800 Subject: [PATCH 17/37] Ceph: Fix memory leak in chain_flistxattr() Free allocated memory before return. Signed-off-by: Li Wang Reviewed-by: Sage Weil --- src/os/chain_xattr.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/os/chain_xattr.cc b/src/os/chain_xattr.cc index 8ca815689ed36..c020c9db84398 100644 --- a/src/os/chain_xattr.cc +++ b/src/os/chain_xattr.cc @@ -388,6 +388,10 @@ int chain_listxattr(const char *fn, char *names, size_t len) { int chain_flistxattr(int fd, char *names, size_t len) { int r; + char *p; + const char * end; + char *dest; + char *dest_end; if (!len) return sys_flistxattr(fd, names, len) * 2; @@ -403,12 +407,12 @@ int chain_flistxattr(int fd, char *names, size_t len) { r = sys_flistxattr(fd, full_buf, total_len); if (r < 0) - return r; + goto done; - char *p = full_buf; - const char *end = full_buf + r; - char *dest = names; - char *dest_end = names + len; + p = full_buf; + end = full_buf + r; + dest = names; + dest_end = names + len; while (p < end) { char name[CHAIN_XATTR_MAX_NAME_LEN * 2 + 16]; From 1ee112fa2efcf743c3f0451d73386d3364b59f1a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 6 Nov 2013 19:59:56 -0800 Subject: [PATCH 18/37] osdmaptool: fix cli tests From c22c84a88c22688b6044ab37f65a3fe40dfe1983. Signed-off-by: Sage Weil --- src/test/cli/osdmaptool/clobber.t | 3 +++ src/test/cli/osdmaptool/create-racks.t | 1 + 2 files changed, 4 insertions(+) diff --git a/src/test/cli/osdmaptool/clobber.t b/src/test/cli/osdmaptool/clobber.t index 9bbe4d4ceeb7f..bbec5f0de636a 100644 --- a/src/test/cli/osdmaptool/clobber.t +++ b/src/test/cli/osdmaptool/clobber.t @@ -3,6 +3,7 @@ osdmaptool: writing epoch 1 to myosdmap $ ORIG_FSID="$(osdmaptool --print myosdmap|grep ^fsid)" + osdmaptool: osdmap file 'myosdmap' $ osdmaptool --createsimple 3 myosdmap osdmaptool: osdmap file 'myosdmap' @@ -27,6 +28,7 @@ $ NEW_FSID="$(osdmaptool --print myosdmap|grep ^fsid)" + osdmaptool: osdmap file 'myosdmap' $ [ "$ORIG_FSID" = "$NEW_FSID" ] $ osdmaptool --createsimple 1 --clobber myosdmap @@ -49,6 +51,7 @@ $ NEW_FSID="$(osdmaptool --print myosdmap|grep ^fsid)" + osdmaptool: osdmap file 'myosdmap' #TODO --clobber should probably set new fsid, remove the [1] $ [ "$ORIG_FSID" != "$NEW_FSID" ] [1] diff --git a/src/test/cli/osdmaptool/create-racks.t b/src/test/cli/osdmaptool/create-racks.t index 92bc995a227c5..f686ef4c05131 100644 --- a/src/test/cli/osdmaptool/create-racks.t +++ b/src/test/cli/osdmaptool/create-racks.t @@ -1,4 +1,5 @@ $ osdmaptool --create-from-conf om -c $TESTDIR/ceph.conf.withracks > /dev/null + osdmaptool: osdmap file 'om' $ osdmaptool --test-map-pg 0.0 om osdmaptool: osdmap file 'om' parsed '0.0' -> 0.0 From d8f05024e7f8e7c864bb5030412f6d89f67a711f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 6 Nov 2013 20:02:09 -0800 Subject: [PATCH 19/37] doc/release-notes: note crush update timeout on startup change Signed-off-by: Sage Weil --- doc/release-notes.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/release-notes.rst b/doc/release-notes.rst index 0095b8684e266..251a015788416 100644 --- a/doc/release-notes.rst +++ b/doc/release-notes.rst @@ -39,6 +39,12 @@ v0.69 Upgrading ~~~~~~~~~ +* The sysvinit /etc/init.d/ceph script will, by default, update the + CRUSH location of an OSD when it starts. Previously, if the + monitors were not available, this command would hang indefinitely. + Now, that step will time out after 10 seconds and the ceph-osd daemon + will not be started. + * Users of the librados C++ API should replace users of get_version() with get_version64() as the old method only returns a 32-bit value for a 64-bit field. The existing 32-bit get_version() method is now From 84fb1bf3eefe88c0f5f15034d69c171e6531bf76 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 5 Nov 2013 14:54:20 -0800 Subject: [PATCH 20/37] rgw: deny writes to a secondary zone by non-system users Fixes: #6678 We don't want to allow regular users to write to secondary zones, otherwise we'd end up with data inconsistencies. Reviewed-by: Josh Durgin Signed-off-by: Yehuda Sadeh --- src/rgw/rgw_op.cc | 5 +++++ src/rgw/rgw_rados.cc | 4 ++++ src/rgw/rgw_rados.h | 3 +++ 3 files changed, 12 insertions(+) diff --git a/src/rgw/rgw_op.cc b/src/rgw/rgw_op.cc index c750276596f83..bd73a239a4bf1 100644 --- a/src/rgw/rgw_op.cc +++ b/src/rgw/rgw_op.cc @@ -418,6 +418,11 @@ int RGWOp::verify_op_mask() return -EPERM; } + if (!s->system_request && (required_mask & RGW_OP_TYPE_MODIFY) && !store->zone.is_master) { + ldout(s->cct, 5) << "NOTICE: modify request to a non-master zone by a non-system user, permission denied" << dendl; + return -EPERM; + } + return 0; } diff --git a/src/rgw/rgw_rados.cc b/src/rgw/rgw_rados.cc index 4d6f8ef45301a..55d4b92bcbdb0 100644 --- a/src/rgw/rgw_rados.cc +++ b/src/rgw/rgw_rados.cc @@ -362,6 +362,10 @@ int RGWZoneParams::init(CephContext *cct, RGWRados *store, RGWRegion& region) return -EIO; } + is_master = (name == region.master_zone) || (region.master_zone.empty() && name == "default"); + + ldout(cct, 2) << "zone " << name << " is " << (is_master ? "" : "NOT ") << "master" << dendl; + return 0; } diff --git a/src/rgw/rgw_rados.h b/src/rgw/rgw_rados.h index 874492ffe692d..476572ce3f6c9 100644 --- a/src/rgw/rgw_rados.h +++ b/src/rgw/rgw_rados.h @@ -428,11 +428,14 @@ struct RGWZoneParams { rgw_bucket user_uid_pool; string name; + bool is_master; RGWAccessKey system_key; map placement_pools; + RGWZoneParams() : is_master(false) {} + static int get_pool_name(CephContext *cct, string *pool_name); void init_name(CephContext *cct, RGWRegion& region); int init(CephContext *cct, RGWRados *store, RGWRegion& region); From 5832e2603c7db5d40b433d0953408993a9b7c217 Mon Sep 17 00:00:00 2001 From: Gary Lowell Date: Thu, 7 Nov 2013 20:27:35 +0000 Subject: [PATCH 21/37] v0.72 --- configure.ac | 2 +- debian/changelog | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index c344e7d6134b6..58c5e1b94a634 100644 --- a/configure.ac +++ b/configure.ac @@ -8,7 +8,7 @@ AC_PREREQ(2.59) # VERSION define is not used by the code. It gets a version string # from 'git describe'; see src/ceph_ver.[ch] -AC_INIT([ceph], [0.72-rc1], [ceph-devel@vger.kernel.org]) +AC_INIT([ceph], [0.72], [ceph-devel@vger.kernel.org]) # Create release string. Used with VERSION for RPMs. RPM_RELEASE=0 diff --git a/debian/changelog b/debian/changelog index f6575c29a726c..dd2dbb95ce4f6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +ceph (0.72-1) stable; urgency=low + + * New upstream release + + -- Gary Lowell Thu, 07 Nov 2013 20:25:18 +0000 + ceph (0.72-rc1-1) stable; urgency=low * New upstream release From 0803d607093a911d1fc4a19500493e2e9c4c7894 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 16:57:09 +0100 Subject: [PATCH 22/37] common/buffer.cc: prefer prefix ++operator for non-primitive types Prefer prefix ++operator for non-primitive types like iterators for performance reasons. Prefix ++/-- operators avoid creating a temporary copy. Signed-off-by: Danny Al-Gaaf --- src/common/buffer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/buffer.cc b/src/common/buffer.cc index 95c7acdb694a2..819d767d006ff 100644 --- a/src/common/buffer.cc +++ b/src/common/buffer.cc @@ -1045,7 +1045,7 @@ void buffer::list::rebuild_page_aligned() return 0; // no buffers std::list::const_iterator iter = _buffers.begin(); - iter++; + ++iter; if (iter != _buffers.end()) rebuild(); From 555e71779513b89440edffc023e6d2258d4562d7 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:39:59 +0100 Subject: [PATCH 23/37] mon/OSDMonitor.cc: prefer prefix ++operator for non-primitive types Prefer prefix ++operator for non-primitive types like iterators for performance reasons. Prefix ++/-- operators avoid creating a temporary copy. Signed-off-by: Danny Al-Gaaf --- src/mon/OSDMonitor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index f976e3101a8e3..f178bdf934540 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -2659,7 +2659,7 @@ int OSDMonitor::prepare_new_pool(string& name, uint64_t auid, int crush_rule, pi->auid = auid; for (vector::const_iterator i = properties.begin(); i != properties.end(); - i++) { + ++i) { size_t equal = i->find('='); if (equal == string::npos) pi->properties[*i] = string(); From a8e10d3d0a27436c6f1df2601678a8abcc4b79f5 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:40:47 +0100 Subject: [PATCH 24/37] os/ObjectStore.cc: prefer prefix ++operator for non-primitive types Prefer prefix ++operator for non-primitive types like iterators for performance reasons. Prefix ++/-- operators avoid creating a temporary copy. Signed-off-by: Danny Al-Gaaf --- src/os/ObjectStore.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/os/ObjectStore.cc b/src/os/ObjectStore.cc index 1a1bbcb0b67de..327c64167d5cf 100644 --- a/src/os/ObjectStore.cc +++ b/src/os/ObjectStore.cc @@ -504,7 +504,7 @@ int ObjectStore::collection_list(coll_t c, vector& o) int ret = collection_list(c, go); if (ret == 0) { o.reserve(go.size()); - for (vector::iterator i = go.begin(); i != go.end() ; i++) + for (vector::iterator i = go.begin(); i != go.end() ; ++i) o.push_back(i->hobj); } return ret; @@ -520,7 +520,7 @@ int ObjectStore::collection_list_partial(coll_t c, hobject_t start, if (ret == 0) { *next = gnext.hobj; ls->reserve(go.size()); - for (vector::iterator i = go.begin(); i != go.end() ; i++) + for (vector::iterator i = go.begin(); i != go.end() ; ++i) ls->push_back(i->hobj); } return ret; @@ -534,7 +534,7 @@ int ObjectStore::collection_list_range(coll_t c, hobject_t start, hobject_t end, int ret = collection_list_range(c, gstart, gend, seq, &go); if (ret == 0) { ls->reserve(go.size()); - for (vector::iterator i = go.begin(); i != go.end() ; i++) + for (vector::iterator i = go.begin(); i != go.end() ; ++i) ls->push_back(i->hobj); } return ret; From 23f7bfc2a9ad98dffea5564428b0b0233c49cf85 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:41:54 +0100 Subject: [PATCH 25/37] osd/ErasureCodePlugin.cc: prefer prefix ++operator for non-primitive types Prefer prefix ++operator for non-primitive types like iterators for performance reasons. Prefix ++/-- operators avoid creating a temporary copy. Signed-off-by: Danny Al-Gaaf --- src/osd/ErasureCodePlugin.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/ErasureCodePlugin.cc b/src/osd/ErasureCodePlugin.cc index 38ea56a174c25..5d0b6904e1ea2 100644 --- a/src/osd/ErasureCodePlugin.cc +++ b/src/osd/ErasureCodePlugin.cc @@ -45,7 +45,7 @@ ErasureCodePluginRegistry::~ErasureCodePluginRegistry() { for (std::map::iterator i = plugins.begin(); i != plugins.end(); - i++) { + ++i) { void *library = i->second->library; delete i->second; dlclose(library); From 6b5a96b1f1b2b88f1a57cd84f3de5ae70ed76409 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:42:29 +0100 Subject: [PATCH 26/37] ErasureCodeJerasure.cc: prefer prefix ++operator for non-primitive types Prefer prefix ++operator for non-primitive types like iterators for performance reasons. Prefix ++/-- operators avoid creating a temporary copy. Signed-off-by: Danny Al-Gaaf --- src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc b/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc index f2be1ed06e711..fe656e58ee00f 100644 --- a/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc +++ b/src/osd/ErasureCodePluginJerasure/ErasureCodeJerasure.cc @@ -54,7 +54,7 @@ int ErasureCodeJerasure::minimum_to_decode(const set &want_to_read, return -EIO; set::iterator i; unsigned j; - for (i = available_chunks.begin(), j = 0; j < (unsigned)k; i++, j++) + for (i = available_chunks.begin(), j = 0; j < (unsigned)k; ++i, j++) minimum->insert(*i); } return 0; @@ -67,7 +67,7 @@ int ErasureCodeJerasure::minimum_to_decode_with_cost(const set &want_to_rea set available_chunks; for (map::const_iterator i = available.begin(); i != available.end(); - i++) + ++i) available_chunks.insert(i->first); return minimum_to_decode(want_to_read, available_chunks, minimum); } From 36ae9eb3214acee1c43e8a560ab20647197b5a47 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:46:09 +0100 Subject: [PATCH 27/37] osd/osd_types.cc: use !p.tiers.empty() instead of size() Use empty() since it should be prefered as it has, following the standard, a constant time complexity regardless of the containter type. The same is not guaranteed for size(). Signed-off-by: Danny Al-Gaaf --- src/osd/osd_types.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 05b83c4af21bd..0cb3c0c64897b 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1062,7 +1062,7 @@ ostream& operator<<(ostream& out, const pg_pool_t& p) out << " max_bytes " << p.quota_max_bytes; if (p.quota_max_objects) out << " max_objects " << p.quota_max_objects; - if (p.tiers.size()) + if (!p.tiers.empty()) out << " tiers " << p.tiers; if (p.is_tier()) out << " tier_of " << p.tier_of; From d0cf2bfb3206d90123b448732bd243ce73631d0c Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:51:14 +0100 Subject: [PATCH 28/37] ErasureCodeExample.h: prefer prefix ++operator for non-primitive types Prefer prefix ++operator for non-primitive types like iterators for performance reasons. Prefix ++/-- operators avoid creating a temporary copy. Signed-off-by: Danny Al-Gaaf --- src/test/osd/ErasureCodeExample.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/test/osd/ErasureCodeExample.h b/src/test/osd/ErasureCodeExample.h index 0fd55187559f4..07694ea409aae 100644 --- a/src/test/osd/ErasureCodeExample.h +++ b/src/test/osd/ErasureCodeExample.h @@ -76,7 +76,7 @@ class ErasureCodeExample : public ErasureCodeInterface { set available_chunks; for (map::const_iterator i = c2c.begin(); i != c2c.end(); - i++) + ++i) available_chunks.insert(i->first); return minimum_to_decode(want_to_read, available_chunks, minimum); } @@ -109,7 +109,7 @@ class ErasureCodeExample : public ErasureCodeInterface { const bufferptr ptr = out.buffers().front(); for (set::iterator j = want_to_encode.begin(); j != want_to_encode.end(); - j++) { + ++j) { bufferptr chunk(ptr, (*j) * chunk_length, chunk_length); (*encoded)[*j].push_front(chunk); } @@ -125,7 +125,7 @@ class ErasureCodeExample : public ErasureCodeInterface { unsigned chunk_length = (*chunks.begin()).second.length(); for (set::iterator i = want_to_read.begin(); i != want_to_read.end(); - i++) { + ++i) { if (chunks.find(*i) != chunks.end()) { // // If the chunk is available, just copy the bufferptr pointer @@ -146,7 +146,7 @@ class ErasureCodeExample : public ErasureCodeInterface { bufferptr chunk(chunk_length); map::const_iterator k = chunks.begin(); const char *a = k->second.buffers().front().c_str(); - k++; + ++k; const char *b = k->second.buffers().front().c_str(); for (unsigned j = 0; j < chunk_length; j++) { chunk[j] = a[j] ^ b[j]; From fbdfd87c95c42d46ff5e1ddf5c282fadbb8b7977 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 19:51:48 +0100 Subject: [PATCH 29/37] remove unused variable from Objecter::RequestStateHook::call() Signed-off-by: Danny Al-Gaaf --- src/osdc/Objecter.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc index d2c574d982e1e..734df5e9e6900 100644 --- a/src/osdc/Objecter.cc +++ b/src/osdc/Objecter.cc @@ -2406,7 +2406,6 @@ Objecter::RequestStateHook::RequestStateHook(Objecter *objecter) : bool Objecter::RequestStateHook::call(std::string command, cmdmap_t& cmdmap, std::string format, bufferlist& out) { - stringstream ss; Formatter *f = new_formatter(format); m_objecter->client_lock.Lock(); m_objecter->dump_requests(f); From 548fc13c179b182b9feef7e434bcb3ce5adc6c89 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 21:32:42 +0100 Subject: [PATCH 30/37] rgw/rgw_http_client.cc: use static_cast<>() instead of C-Style cast Signed-off-by: Danny Al-Gaaf --- src/rgw/rgw_http_client.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rgw/rgw_http_client.cc b/src/rgw/rgw_http_client.cc index 314e80b9ef26c..1c6b6d4d71ba0 100644 --- a/src/rgw/rgw_http_client.cc +++ b/src/rgw/rgw_http_client.cc @@ -234,7 +234,7 @@ static int do_curl_wait(CephContext *cct, CURLM *handle) int RGWHTTPClient::process_request(void *handle, bool wait_for_data, bool *done) { - multi_req_data *req_data = (multi_req_data *)handle; + multi_req_data *req_data = static_cast(handle); int still_running; int mstatus; @@ -282,7 +282,7 @@ int RGWHTTPClient::complete_request(void *handle) do { ret = process_request(handle, true, &done); } while (!done && !ret); - multi_req_data *req_data = (multi_req_data *)handle; + multi_req_data *req_data = static_cast(handle); delete req_data; return ret; From 9226a97aa26079e8e111742dc25807777dea748c Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 21:33:33 +0100 Subject: [PATCH 31/37] rgw/rgw_user.cc: use static_cast<>() instead of C-Style cast Signed-off-by: Danny Al-Gaaf --- src/rgw/rgw_user.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rgw/rgw_user.cc b/src/rgw/rgw_user.cc index dc529e3d48d4b..e4462ec11e05e 100644 --- a/src/rgw/rgw_user.cc +++ b/src/rgw/rgw_user.cc @@ -2358,7 +2358,7 @@ class RGWUserMetadataHandler : public RGWMetadataHandler { } int list_keys_next(void *handle, int max, list& keys, bool *truncated) { - list_keys_info *info = (list_keys_info *)handle; + list_keys_info *info = static_cast(handle); string no_filter; @@ -2387,7 +2387,7 @@ class RGWUserMetadataHandler : public RGWMetadataHandler { } void list_keys_complete(void *handle) { - list_keys_info *info = (list_keys_info *)handle; + list_keys_info *info = static_cast(handle); delete info; } }; From ace35f215460e1101bf0636c8c8b1ef117c5dd38 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 21:59:20 +0100 Subject: [PATCH 32/37] Client.cc: remove unused variable from Client::CommandHook::call() Signed-off-by: Danny Al-Gaaf --- src/client/Client.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/client/Client.cc b/src/client/Client.cc index 89de94ee6ea5b..11939c0a1531e 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -109,7 +109,6 @@ Client::CommandHook::CommandHook(Client *client) : bool Client::CommandHook::call(std::string command, cmdmap_t& cmdmap, std::string format, bufferlist& out) { - stringstream ss; Formatter *f = new_formatter(format); f->open_object_section("result"); m_client->client_lock.Lock(); From 6566dfb740e3e542e2356fb1c9217af94bdd656f Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 22:00:33 +0100 Subject: [PATCH 33/37] osd/ReplicatedPG.cc: remove unused variable Signed-off-by: Danny Al-Gaaf --- src/osd/ReplicatedPG.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 500042d16e3a4..f8100b33a8a92 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -8110,7 +8110,6 @@ int ReplicatedPG::recover_backfill( } PGBackend::RecoveryHandle *h = pgbackend->open_recovery_op(); - map > pushes; for (map >::iterator i = to_push.begin(); From 0da5a019f55b66c71216c39e88b61ca9363940eb Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 22:01:04 +0100 Subject: [PATCH 34/37] rgw: remove unused variables Signed-off-by: Danny Al-Gaaf --- src/rgw/rgw_quota.cc | 1 - src/rgw/rgw_rest_metadata.cc | 1 - 2 files changed, 2 deletions(-) diff --git a/src/rgw/rgw_quota.cc b/src/rgw/rgw_quota.cc index 66609ca723c28..89611f5858769 100644 --- a/src/rgw/rgw_quota.cc +++ b/src/rgw/rgw_quota.cc @@ -126,7 +126,6 @@ class AsyncRefreshHandler : public RGWGetBucketStats_CB { int AsyncRefreshHandler::init_fetch() { ldout(store->ctx(), 20) << "initiating async quota refresh for bucket=" << bucket << dendl; - map bucket_stats; int r = store->get_bucket_stats_async(bucket, this); if (r < 0) { ldout(store->ctx(), 0) << "could not get bucket info for bucket=" << bucket.name << dendl; diff --git a/src/rgw/rgw_rest_metadata.cc b/src/rgw/rgw_rest_metadata.cc index 5036235ebd2b6..afd5c7b6cc055 100644 --- a/src/rgw/rgw_rest_metadata.cc +++ b/src/rgw/rgw_rest_metadata.cc @@ -31,7 +31,6 @@ static inline void frame_metadata_key(req_state *s, string& out) { bool exists; string key = s->info.args.get("key", &exists); - string metadata_key; string section; if (!s->bucket_name_str.empty()) { section = s->bucket_name_str; From 91627f2d332fae35b421ff94fbf7451d33f35352 Mon Sep 17 00:00:00 2001 From: Danny Al-Gaaf Date: Tue, 5 Nov 2013 22:01:42 +0100 Subject: [PATCH 35/37] test_seek_read.c: remove unused variable 'off64_t so' Signed-off-by: Danny Al-Gaaf --- src/test/old/test_seek_read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/old/test_seek_read.c b/src/test/old/test_seek_read.c index aedb32a2ae0b5..1ea3b750b455f 100644 --- a/src/test/old/test_seek_read.c +++ b/src/test/old/test_seek_read.c @@ -38,7 +38,7 @@ int main(int argc, char **argv) utime_t start = ceph_clock_now(g_ceph_context); for (int i=0; iargs_vec; cmd_getval(g_ceph_context, cmdmap, "args", args_vec); From 01f7b463d13fb244957e6defeae5b3523db2c3c1 Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Thu, 7 Nov 2013 15:38:58 -0800 Subject: [PATCH 37/37] client: use platform-specific stat time members Signed-off-by: Noah Watkins --- configure.ac | 11 +++ src/client/Client.cc | 45 +++++++------ src/include/Makefile.am | 3 +- src/include/stat.h | 145 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 23 deletions(-) create mode 100644 src/include/stat.h diff --git a/configure.ac b/configure.ac index 13d80856f0060..d1aaa6a481472 100644 --- a/configure.ac +++ b/configure.ac @@ -539,6 +539,17 @@ AC_CHECK_FUNC([fallocate], [AC_DEFINE([CEPH_HAVE_FALLOCATE], [], [fallocate(2) is supported])], []) +# +# Test for time-related `struct stat` members. +# + +AC_CHECK_MEMBER([struct stat.st_mtim.tv_nsec], + [AC_DEFINE(HAVE_STAT_ST_MTIM_TV_NSEC, 1, + [Define if you have struct stat.st_mtim.tv_nsec])]) + +AC_CHECK_MEMBER([struct stat.st_mtimespec.tv_nsec], + [AC_DEFINE(HAVE_STAT_ST_MTIMESPEC_TV_NSEC, 1, + [Define if you have struct stat.st_mtimespec.tv_nsec])]) AC_CHECK_HEADERS([arpa/nameser_compat.h]) AC_CHECK_HEADERS([sys/prctl.h]) diff --git a/src/client/Client.cc b/src/client/Client.cc index 89de94ee6ea5b..a47f3e26b6b2a 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -84,6 +84,7 @@ using namespace std; #include "ObjecterWriteback.h" #include "include/assert.h" +#include "include/stat.h" #undef dout_prefix #define dout_prefix *_dout << "client." << whoami << " " @@ -4503,9 +4504,9 @@ int Client::_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid, I if (in->caps_issued_mask(CEPH_CAP_FILE_EXCL)) { if (mask & (CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME)) { if (mask & CEPH_SETATTR_MTIME) - in->mtime = utime_t(attr->st_mtim.tv_sec, attr->st_mtim.tv_nsec); + in->mtime = utime_t(stat_get_mtime_sec(attr), stat_get_mtime_nsec(attr)); if (mask & CEPH_SETATTR_ATIME) - in->atime = utime_t(attr->st_atim.tv_sec, attr->st_atim.tv_nsec); + in->atime = utime_t(stat_get_atime_sec(attr), stat_get_atime_nsec(attr)); in->ctime = ceph_clock_now(cct); in->time_warp_seq++; mark_caps_dirty(in, CEPH_CAP_FILE_EXCL); @@ -4535,14 +4536,14 @@ int Client::_setattr(Inode *in, struct stat *attr, int mask, int uid, int gid, I req->inode_drop |= CEPH_CAP_AUTH_SHARED; } if (mask & CEPH_SETATTR_MTIME) { - req->head.args.setattr.mtime = - utime_t(attr->st_mtim.tv_sec, attr->st_mtim.tv_nsec); + utime_t mtime = utime_t(stat_get_mtime_sec(attr), stat_get_mtime_nsec(attr)); + req->head.args.setattr.mtime = mtime; req->inode_drop |= CEPH_CAP_AUTH_SHARED | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } if (mask & CEPH_SETATTR_ATIME) { - req->head.args.setattr.atime = - utime_t(attr->st_atim.tv_sec, attr->st_atim.tv_nsec); + utime_t atime = utime_t(stat_get_atime_sec(attr), stat_get_atime_nsec(attr)); + req->head.args.setattr.atime = atime; req->inode_drop |= CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; } @@ -4652,16 +4653,16 @@ int Client::fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat, nest_inf st->st_uid = in->uid; st->st_gid = in->gid; if (in->ctime.sec() > in->mtime.sec()) { - st->st_ctim.tv_sec = in->ctime.sec(); - st->st_ctim.tv_nsec = in->ctime.nsec(); + stat_set_ctime_sec(st, in->ctime.sec()); + stat_set_ctime_nsec(st, in->ctime.nsec()); } else { - st->st_ctim.tv_sec = in->mtime.sec(); - st->st_ctim.tv_nsec = in->mtime.nsec(); + stat_set_ctime_sec(st, in->mtime.sec()); + stat_set_ctime_nsec(st, in->mtime.nsec()); } - st->st_atim.tv_sec = in->atime.sec(); - st->st_atim.tv_nsec = in->atime.nsec(); - st->st_mtim.tv_sec = in->mtime.sec(); - st->st_mtim.tv_nsec = in->mtime.nsec(); + stat_set_atime_sec(st, in->atime.sec()); + stat_set_atime_nsec(st, in->atime.nsec()); + stat_set_mtime_sec(st, in->mtime.sec()); + stat_set_mtime_nsec(st, in->mtime.nsec()); if (in->is_dir()) { //st->st_size = in->dirstat.size(); st->st_size = in->rstat.rbytes; @@ -4807,10 +4808,10 @@ int Client::utime(const char *relpath, struct utimbuf *buf) if (r < 0) return r; struct stat attr; - attr.st_mtim.tv_sec = buf->modtime; - attr.st_mtim.tv_nsec = 0; - attr.st_atim.tv_sec = buf->actime; - attr.st_atim.tv_nsec = 0; + stat_set_mtime_sec(&attr, buf->modtime); + stat_set_mtime_nsec(&attr, 0); + stat_set_atime_sec(&attr, buf->actime); + stat_set_atime_nsec(&attr, 0); return _setattr(in, &attr, CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME); } @@ -4828,10 +4829,10 @@ int Client::lutime(const char *relpath, struct utimbuf *buf) if (r < 0) return r; struct stat attr; - attr.st_mtim.tv_sec = buf->modtime; - attr.st_mtim.tv_nsec = 0; - attr.st_atim.tv_sec = buf->actime; - attr.st_atim.tv_nsec = 0; + stat_set_mtime_sec(&attr, buf->modtime); + stat_set_mtime_nsec(&attr, 0); + stat_set_atime_sec(&attr, buf->actime); + stat_set_atime_nsec(&attr, 0); return _setattr(in, &attr, CEPH_SETATTR_MTIME|CEPH_SETATTR_ATIME); } diff --git a/src/include/Makefile.am b/src/include/Makefile.am index 34976a6cc2990..6e076600e273c 100644 --- a/src/include/Makefile.am +++ b/src/include/Makefile.am @@ -78,4 +78,5 @@ noinst_HEADERS += \ include/rbd/features.h \ include/rbd/librbd.h \ include/rbd/librbd.hpp\ - include/util.h + include/util.h\ + include/stat.h diff --git a/src/include/stat.h b/src/include/stat.h new file mode 100644 index 0000000000000..19398758e210d --- /dev/null +++ b/src/include/stat.h @@ -0,0 +1,145 @@ +#ifndef CEPH_STAT_H +#define CEPH_STAT_H + +#include + +#include + +/* + * Access time-related `struct stat` members. + * + * Note that for each of the stat member get/set functions below, setting a + * high-res value (stat_set_*_nsec) on a platform without high-res support is + * a no-op. + */ + +#ifdef HAVE_STAT_ST_MTIM_TV_NSEC + +static inline uint32_t stat_get_mtime_nsec(struct stat *st) +{ + return st->st_mtim.tv_nsec; +} + +static inline void stat_set_mtime_nsec(struct stat *st, uint32_t nsec) +{ + st->st_mtim.tv_nsec = nsec; +} + +static inline uint32_t stat_get_atime_nsec(struct stat *st) +{ + return st->st_atim.tv_nsec; +} + +static inline void stat_set_atime_nsec(struct stat *st, uint32_t nsec) +{ + st->st_atim.tv_nsec = nsec; +} + +static inline uint32_t stat_get_ctime_nsec(struct stat *st) +{ + return st->st_ctim.tv_nsec; +} + +static inline void stat_set_ctime_nsec(struct stat *st, uint32_t nsec) +{ + st->st_ctim.tv_nsec = nsec; +} + +#elif defined(HAVE_STAT_ST_MTIMESPEC_TV_NSEC) + +static inline uint32_t stat_get_mtime_nsec(struct stat *st) +{ + return st->st_mtimespec.tv_nsec; +} + +static inline void stat_set_mtime_nsec(struct stat *st, uint32_t nsec) +{ + st->st_mtimespec.tv_nsec = nsec; +} + +static inline uint32_t stat_get_atime_nsec(struct stat *st) +{ + return st->st_atimespec.tv_nsec; +} + +static inline void stat_set_atime_nsec(struct stat *st, uint32_t nsec) +{ + st->st_atimespec.tv_nsec = nsec; +} + +static inline uint32_t stat_get_ctime_nsec(struct stat *st) +{ + return st->st_ctimespec.tv_nsec; +} + +static inline void stat_set_ctime_nsec(struct stat *st, uint32_t nsec) +{ + st->st_ctimespec.tv_nsec = nsec; +} + +#else + +static inline uint32_t stat_get_mtime_nsec(struct stat *st) +{ + return 0; +} + +static inline void stat_set_mtime_nsec(struct stat *st, uint32_t nsec) +{ +} + +static inline uint32_t stat_get_atime_nsec(struct stat *st) +{ + return 0; +} + +static inline void stat_set_atime_nsec(struct stat *st, uint32_t nsec) +{ +} + +static inline uint32_t stat_get_ctime_nsec(struct stat *st) +{ + return 0; +} + +static inline void stat_set_ctime_nsec(struct stat *st, uint32_t nsec) +{ +} + +#endif + +/* + * Access second-resolution `struct stat` members. + */ + +static inline uint32_t stat_get_mtime_sec(struct stat *st) +{ + return st->st_mtime; +} + +static inline void stat_set_mtime_sec(struct stat *st, uint32_t sec) +{ + st->st_mtime = sec; +} + +static inline uint32_t stat_get_atime_sec(struct stat *st) +{ + return st->st_atime; +} + +static inline void stat_set_atime_sec(struct stat *st, uint32_t sec) +{ + st->st_atime = sec; +} + +static inline uint32_t stat_get_ctime_sec(struct stat *st) +{ + return st->st_ctime; +} + +static inline void stat_set_ctime_sec(struct stat *st, uint32_t sec) +{ + st->st_ctime = sec; +} + +#endif