From fd3d135c2065757d1ab1b7959586cfe8be963692 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Tue, 21 Jan 2025 13:23:09 -0800 Subject: [PATCH 1/4] kvs: output min/max object size stats as ints Problem: The min/max cache object size stat is returned as floating point, even though such a statistic cannot have a decimal place by definition (in contrast to lets say "mean" or "stdev"). Return the min/max cache object size as an integer instead of a floating point. --- src/modules/kvs/kvs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/modules/kvs/kvs.c b/src/modules/kvs/kvs.c index d36229b24fe2..0125d3a05143 100644 --- a/src/modules/kvs/kvs.c +++ b/src/modules/kvs/kvs.c @@ -2371,12 +2371,12 @@ static void stats_get_cb (flux_t *h, goto error; } - if (!(tstats = json_pack ("{ s:i s:f s:f s:f s:f }", + if (!(tstats = json_pack ("{ s:i s:I s:f s:f s:I }", "count", tstat_count (&ts), - "min", tstat_min (&ts)*scale, + "min", (json_int_t)tstat_min (&ts)*scale, "mean", tstat_mean (&ts)*scale, "stddev", tstat_stddev (&ts)*scale, - "max", tstat_max (&ts)*scale))) + "max", (json_int_t)tstat_max (&ts)*scale))) goto nomem; if (!(cstats = json_pack ("{ s:f s:O s:i s:i s:i }", From af7a42b257dde431a14e747c6b83c476b97431ab Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Thu, 16 Jan 2025 14:29:27 -0800 Subject: [PATCH 2/4] kvs: add transaction stats Problem: In the future we would like to put a cap on the maximum number of operations in a transaction. However, we do not have any stat tracking of operations in a transaction. Add transaction stats for commits and fences in the kvs module. --- src/modules/kvs/kvs.c | 53 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/src/modules/kvs/kvs.c b/src/modules/kvs/kvs.c index 0125d3a05143..a55c7b8b0709 100644 --- a/src/modules/kvs/kvs.c +++ b/src/modules/kvs/kvs.c @@ -78,6 +78,8 @@ struct kvs_ctx { char *hash_name; unsigned int seq; /* for commit transactions */ kvs_checkpoint_t *kcp; + tstat_t txn_commit_stats; + tstat_t txn_fence_stats; zhashx_t *requests; /* track unfinished requests */ struct list_head work_queue; }; @@ -1721,6 +1723,8 @@ static void relaycommit_request_cb (flux_t *h, goto error; } + tstat_push (&ctx->txn_commit_stats, (double)json_array_size (ops)); + /* N.B. no request tracking for relay. The relay does not get a * response, only the original via finalize_transaction_bynames(). */ @@ -1813,6 +1817,8 @@ static void commit_request_cb (flux_t *h, goto error; } + tstat_push (&ctx->txn_commit_stats, (double)json_array_size (ops)); + work_queue_check_append (ctx, root); } else { @@ -1920,6 +1926,9 @@ static void relayfence_request_cb (flux_t *h, goto error; } + tstat_push (&ctx->txn_fence_stats, + (double)json_array_size (treq_get_ops (tr))); + work_queue_check_append (ctx, root); } @@ -2040,6 +2049,9 @@ static void fence_request_cb (flux_t *h, goto error; } + tstat_push (&ctx->txn_fence_stats, + (double)json_array_size (treq_get_ops (tr))); + work_queue_check_append (ctx, root); } } @@ -2349,6 +2361,21 @@ static int stats_get_root_cb (struct kvsroot *root, void *arg) return 0; } +static json_t *get_tstat_obj (tstat_t *ts, double scale) +{ + json_t *o = json_pack ("{ s:i s:I s:f s:f s:I }", + "count", tstat_count (ts), + "min", (json_int_t)(tstat_min (ts)*scale), + "mean", tstat_mean (ts)*scale, + "stddev", tstat_stddev (ts)*scale, + "max", (json_int_t)(tstat_max (ts)*scale)); + if (!o) { + errno = ENOMEM; + return NULL; + } + return o; +} + static void stats_get_cb (flux_t *h, flux_msg_handler_t *mh, const flux_msg_t *msg, @@ -2357,6 +2384,8 @@ static void stats_get_cb (flux_t *h, struct kvs_ctx *ctx = arg; json_t *tstats = NULL; json_t *cstats = NULL; + json_t *txncstats = NULL; + json_t *txnfstats = NULL; json_t *nsstats = NULL; tstat_t ts = { 0 }; int size = 0, incomplete = 0, dirty = 0; @@ -2371,12 +2400,7 @@ static void stats_get_cb (flux_t *h, goto error; } - if (!(tstats = json_pack ("{ s:i s:I s:f s:f s:I }", - "count", tstat_count (&ts), - "min", (json_int_t)tstat_min (&ts)*scale, - "mean", tstat_mean (&ts)*scale, - "stddev", tstat_stddev (&ts)*scale, - "max", (json_int_t)tstat_max (&ts)*scale))) + if (!(tstats = get_tstat_obj (&ts, scale))) goto nomem; if (!(cstats = json_pack ("{ s:f s:O s:i s:i s:i }", @@ -2387,6 +2411,12 @@ static void stats_get_cb (flux_t *h, "#faults", ctx->faults))) goto nomem; + if (!(txncstats = get_tstat_obj (&ctx->txn_commit_stats, 1.0))) + goto nomem; + + if (!(txnfstats = get_tstat_obj (&ctx->txn_fence_stats, 1.0))) + goto nomem; + if (!(nsstats = json_object ())) goto nomem; @@ -2415,13 +2445,18 @@ static void stats_get_cb (flux_t *h, if (flux_respond_pack (h, msg, - "{ s:O s:O s:i }", + "{ s:O s:O s:{s:O s:O} s:i }", "cache", cstats, "namespace", nsstats, + "transactions", + "commit", txncstats, + "fence", txnfstats, "pending_requests", zhashx_size (ctx->requests)) < 0) flux_log_error (h, "%s: flux_respond_pack", __FUNCTION__); json_decref (tstats); json_decref (cstats); + json_decref (txncstats); + json_decref (txnfstats); json_decref (nsstats); return; nomem: @@ -2431,6 +2466,8 @@ static void stats_get_cb (flux_t *h, flux_log_error (h, "%s: flux_respond_error", __FUNCTION__); json_decref (tstats); json_decref (cstats); + json_decref (txncstats); + json_decref (txnfstats); json_decref (nsstats); } @@ -2443,6 +2480,8 @@ static int stats_clear_root_cb (struct kvsroot *root, void *arg) static void stats_clear (struct kvs_ctx *ctx) { ctx->faults = 0; + memset (&ctx->txn_commit_stats, '\0', sizeof (ctx->txn_commit_stats)); + memset (&ctx->txn_fence_stats, '\0', sizeof (ctx->txn_fence_stats)); if (kvsroot_mgr_iter_roots (ctx->krm, stats_clear_root_cb, NULL) < 0) flux_log_error (ctx->h, "%s: kvsroot_mgr_iter_roots", __FUNCTION__); From 4a18a1efb88eb2b8551a7f45423a07d221d1a64e Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Fri, 17 Jan 2025 16:30:52 -0800 Subject: [PATCH 3/4] t: move some tests towards end of tests Problem: The testing of stats clearing in the KVS is done a little bit before the end of a set of tests. This can be inconvenient for future tests that do not wish stats to be cleared. Move the clearing of stats tests towards the end of tests in t1001-kvs-internals.t. --- src/modules/kvs/kvs.c | 2 +- t/t1001-kvs-internals.t | 60 ++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/src/modules/kvs/kvs.c b/src/modules/kvs/kvs.c index a55c7b8b0709..4cd5043d6552 100644 --- a/src/modules/kvs/kvs.c +++ b/src/modules/kvs/kvs.c @@ -2448,7 +2448,7 @@ static void stats_get_cb (flux_t *h, "{ s:O s:O s:{s:O s:O} s:i }", "cache", cstats, "namespace", nsstats, - "transactions", + "transaction-opcount", "commit", txncstats, "fence", txnfstats, "pending_requests", zhashx_size (ctx->requests)) < 0) diff --git a/t/t1001-kvs-internals.t b/t/t1001-kvs-internals.t index ec113a52f9e6..525edcef784f 100755 --- a/t/t1001-kvs-internals.t +++ b/t/t1001-kvs-internals.t @@ -461,36 +461,6 @@ test_expect_success 'kvs: read-your-writes consistency on alt namespace' ' flux kvs namespace remove rywtestns ' -# -# test clear of stats -# - -# each store of largeval will increase the noop store count, b/c we -# know that the identical large value will be cached as raw data - -test_expect_success 'kvs: clear stats locally' ' - flux kvs unlink -Rf $DIR && - flux module stats -c kvs && - flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 && - flux kvs put $DIR.largeval1=$largeval && - flux kvs put $DIR.largeval2=$largeval && - ! flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 && - flux module stats -c kvs && - flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 -' - -test_expect_success NO_ASAN 'kvs: clear stats globally' ' - flux kvs unlink -Rf $DIR && - flux module stats -C kvs && - flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" && - for i in `seq 0 $((${SIZE} - 1))`; do - flux exec -n -r $i sh -c "flux kvs put $DIR.$i.largeval1=$largeval $DIR.$i.largeval2=$largeval" - done && - ! flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" && - flux module stats -C kvs && - flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" -' - # # test fence api # @@ -534,6 +504,36 @@ test_expect_success 'kvs: 1 pending requests at end of tests before module remov test $pendingcount1 -eq 1 ' +# +# test clear of stats +# + +# each store of largeval will increase the noop store count, b/c we +# know that the identical large value will be cached as raw data + +test_expect_success 'kvs: clear stats locally' ' + flux kvs unlink -Rf $DIR && + flux module stats -c kvs && + flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 && + flux kvs put $DIR.largeval1=$largeval && + flux kvs put $DIR.largeval2=$largeval && + ! flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 && + flux module stats -c kvs && + flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 +' + +test_expect_success NO_ASAN 'kvs: clear stats globally' ' + flux kvs unlink -Rf $DIR && + flux module stats -C kvs && + flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" && + for i in `seq 0 $((${SIZE} - 1))`; do + flux exec -n -r $i sh -c "flux kvs put $DIR.$i.largeval1=$largeval $DIR.$i.largeval2=$largeval" + done && + ! flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" && + flux module stats -C kvs && + flux exec -n sh -c "flux module stats --parse \"namespace.primary.#no-op stores\" kvs | grep -q 0" +' + # # test ENOSYS on unfinished requests when unloading the KVS module # From dffe9e4e6f1519fbe98d6e4b972b959419e93c81 Mon Sep 17 00:00:00 2001 From: Albert Chu Date: Thu, 16 Jan 2025 14:30:20 -0800 Subject: [PATCH 4/4] t: cover kvs transaction stats Problem: There is no coverage for the new KVS transaction stats. Add coverage in t1001-kvs-internals.t. --- t/t1001-kvs-internals.t | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/t/t1001-kvs-internals.t b/t/t1001-kvs-internals.t index 525edcef784f..cec250e8117a 100755 --- a/t/t1001-kvs-internals.t +++ b/t/t1001-kvs-internals.t @@ -504,6 +504,25 @@ test_expect_success 'kvs: 1 pending requests at end of tests before module remov test $pendingcount1 -eq 1 ' +# +# transaction module stats +# + +test_expect_success 'kvs: module stats returns reasonable transaction stats' ' + commitdata=$(flux module stats -p transaction-opcount.commit kvs) && + echo $commitdata | jq -e ".count > 0" && + echo $commitdata | jq -e ".min > 0" && + echo $commitdata | jq -e ".max > 0" && + echo $commitdata | jq -e ".mean > 0.0" && + echo $commitdata | jq -e ".stddev >= 0.0" && + fencedata=$(flux module stats -p transaction-opcount.fence kvs) && + echo $fencedata | jq -e ".count > 0" && + echo $fencedata | jq -e ".min > 0" && + echo $fencedata | jq -e ".max > 0" && + echo $fencedata | jq -e ".mean > 0.0" && + echo $fencedata | jq -e ".stddev >= 0.0" +' + # # test clear of stats # @@ -522,6 +541,21 @@ test_expect_success 'kvs: clear stats locally' ' flux module stats --parse "namespace.primary.#no-op stores" kvs | grep -q 0 ' +test_expect_success 'kvs: clear of transaction stats works' ' + commitdata=$(flux module stats -p transaction-opcount.commit kvs) && + echo $commitdata | jq -e ".count == 0" && + echo $commitdata | jq -e ".min == 0" && + echo $commitdata | jq -e ".max == 0" && + echo $commitdata | jq -e ".mean == 0.0" && + echo $commitdata | jq -e ".stddev == 0.0" && + fencedata=$(flux module stats -p transaction-opcount.fence kvs) && + echo $fencedata | jq -e ".count == 0" && + echo $fencedata | jq -e ".min == 0" && + echo $fencedata | jq -e ".max == 0" && + echo $fencedata | jq -e ".mean == 0.0" && + echo $fencedata | jq -e ".stddev == 0.0" +' + test_expect_success NO_ASAN 'kvs: clear stats globally' ' flux kvs unlink -Rf $DIR && flux module stats -C kvs &&