diff --git a/config/docker-testval.config.src b/config/docker-testval.config.src index 38da0449e..ec609ec37 100644 --- a/config/docker-testval.config.src +++ b/config/docker-testval.config.src @@ -57,7 +57,7 @@ %% as without one miner_lora is not started %% including the params anyway in case someone needs it in this env {region_override, 'US915'}, - {metrics, [block_metrics, grpc_metrics]}, + {metrics, [block_metrics, txn_metrics, grpc_metrics, snapshot_metrics]}, {metrics_port, ${METRICS_PORT:-9090}} ]}, {grpcbox, diff --git a/config/docker-val.config.src b/config/docker-val.config.src index ecf844b03..0c1877789 100644 --- a/config/docker-val.config.src +++ b/config/docker-val.config.src @@ -53,7 +53,7 @@ %% as without one miner_lora is not started %% including the params anyway in case someone needs it in this env {region_override, 'US915'}, - {metrics, [block_metrics, grpc_metrics]}, + {metrics, [block_metrics, txn_metrics, grpc_metrics, snapshot_metrics]}, {metrics_port, ${METRICS_PORT:-9090}} ]}, {grpcbox, diff --git a/config/test_val.config.src b/config/test_val.config.src index 49b3f88dd..69da04a5e 100644 --- a/config/test_val.config.src +++ b/config/test_val.config.src @@ -63,7 +63,7 @@ {region_override, 'US915'}, {gateway_and_mux_enable, false}, {default_routers, ["${DEFAULT_ROUTERS:-/p2p/11w77YQLhgUt8HUJrMtntGGr97RyXmot1ofs5Ct2ELTmbFoYsQa}"]}, - {metrics, [block_metrics, grpc_metrics]}, + {metrics, [block_metrics, txn_metrics, grpc_metrics, snapshot_metrics]}, {metrics_port, ${METRICS_PORT:-9090}}, {seed_validators, [ {"1ZPNnNd9k5qiQXXigKifQpCPiy5HTbszQDSyLM56ywk7ihNRvt6", "18.223.171.149", 8080}, %% test-val2 diff --git a/config/val.config.src b/config/val.config.src index 8addeb13f..f35da68e5 100644 --- a/config/val.config.src +++ b/config/val.config.src @@ -50,7 +50,7 @@ {sidecar_parallelism_limit, ${SIDECAR_PARALLELISM_LIMIT:-3}}, {hotfix_dir, "${HOTFIX_DIR:-/opt/miner/hotfix}"}, {update_dir, "${UPDATE_DIR:-/opt/miner/update}"}, - {metrics, [block_metrics, grpc_metrics]}, + {metrics, [block_metrics, txn_metrics, grpc_metrics, snapshot_metrics]}, {metrics_port, ${METRICS_PORT:-9090}}, %% these two now disable all the poc stuff {use_ebus, false}, diff --git a/include/metrics.hrl b/include/metrics.hrl index 359737591..886b8e9f1 100644 --- a/include/metrics.hrl +++ b/include/metrics.hrl @@ -1,21 +1,73 @@ -define(METRICS_HISTOGRAM_BUCKETS, [50, 100, 250, 500, 1000, 2000, 5000, 10000, 30000, 60000]). -define(METRICS_BLOCK_ABSORB, "blockchain_block_absorb_duration"). +-define(METRICS_BLOCK_UNVAL_ABSORB, "blockchain_block_unval_absorb_duration"). -define(METRICS_BLOCK_HEIGHT, "blockchain_block_height"). +-define(METRICS_BLOCK_UNVAL_HEIGHT, "blockchain_block_unval_height"). +-define(METRICS_TXN_ABSORB_DURATION, "blockchain_txn_absorb_duration"). +-define(METRICS_TXN_SUBMIT_COUNT, "blockchain_txn_mgr_submited_count"). +-define(METRICS_TXN_REJECT_COUNT, "blockchain_txn_mgr_rejected_count"). +-define(METRICS_TXN_REJECT_SPAN, "blockchain_txn_mgr_rejected_span"). +-define(METRICS_TXN_ACCEPT_COUNT, "blockchain_txn_mgr_accepted_count"). +-define(METRICS_TXN_ACCEPT_SPAN, "blockchain_txn_mgr_accepted_span"). +-define(METRICS_TXN_PROCESS_DURATION, "blockchain_txn_mgr_process_duration"). +-define(METRICS_TXN_CACHE_SIZE, "blockchain_txn_mgr_cache_size"). +-define(METRICS_TXN_BLOCK_TIME, "blockchain_txn_mgr_block_time"). +-define(METRICS_TXN_BLOCK_AGE, "blockchain_txn_mgr_block_age"). -define(METRICS_GRPC_SESSIONS, "grpcbox_session_count"). -define(METRICS_GRPC_LATENCY, "grpcbox_session_latency"). +-define(METRICS_SC_COUNT, "blockchain_state_channel_count"). +-define(METRICS_SNAP_LOAD_SIZE, "blockchain_snapshot_load_size"). +-define(METRICS_SNAP_LOAD_DURATION, "blockchain_snapshot_load_duration"). +-define(METRICS_SNAP_GEN_SIZE, "blockchain_snapshot_gen_size"). +-define(METRICS_SNAP_GEN_DURATION, "blockchain_snapshot_gen_duration"). -define(METRICS, #{ block_metrics => { [ [blockchain, block, absorb], - [blockchain, block, height] ], + [blockchain, block, height], + [blockchain, block, unvalidated_absorb], + [blockchain, block, unvalidated_height] ], [ {?METRICS_BLOCK_ABSORB, prometheus_histogram, [stage], "Block absorb duration"}, - {?METRICS_BLOCK_HEIGHT, prometheus_gauge, [time], "Most recent block height"} ] + {?METRICS_BLOCK_HEIGHT, prometheus_gauge, [time], "Most recent block height"}, + {?METRICS_BLOCK_UNVAL_ABSORB, prometheus_histogram, [stage], "Block unvalidated absorb duration"}, + {?METRICS_BLOCK_UNVAL_HEIGHT, prometheus_gauge, [time], "Most recent unvalidated block height"} ] + }, + txn_metrics => { + [ [blockchain, txn, absorb], + [blockchain, txn_mgr, submit], + [blockchain, txn_mgr, reject], + [blockchain, txn_mgr, accept], + [blockchain, txn_mgr, process], + [blockchain, txn_mgr, add_block] ], + [ {?METRICS_TXN_ABSORB_DURATION, prometheus_histogram, [stage], "Txn absorb duration"}, + {?METRICS_TXN_SUBMIT_COUNT, prometheus_counter, [type], "Count of submitted transactions"}, + {?METRICS_TXN_REJECT_COUNT, prometheus_counter, [type], "Count of rejected transactions"}, + {?METRICS_TXN_REJECT_SPAN, prometheus_gauge, [type], "Block span of transactions on final rejection"}, + {?METRICS_TXN_ACCEPT_COUNT, prometheus_counter, [type], "Count of accepted transactions"}, + {?METRICS_TXN_ACCEPT_SPAN, prometheus_gauge, [type], "Block span of transactions on acceptance"}, + {?METRICS_TXN_PROCESS_DURATION, prometheus_histogram, [stage], "Transaction manager cache process duration"}, + {?METRICS_TXN_CACHE_SIZE, prometheus_gauge, [height], "Transaction manager buffer size"}, + {?METRICS_TXN_BLOCK_TIME, prometheus_gauge, [height], "Block time observed from the transaction mgr"}, + {?METRICS_TXN_BLOCK_AGE, prometheus_gauge, [height], "Block age observed from the transaction mgr"} ] }, grpc_metrics => { [ [grpcbox, server, rpc_begin], [grpcbox, server, rpc_end] ], [ {?METRICS_GRPC_SESSIONS, prometheus_gauge, [method], "Grpc session count"}, {?METRICS_GRPC_LATENCY, prometheus_histogram, [method, status], "Grpc session latency"} ] + }, + state_channel_metrics => { + [ [blockchain, state_channel, open], + [blockchain, state_channel, close] ], + [ {?METRICS_SC_COUNT, prometheus_gauge, [version, id], "Active state channel count"} ] + }, + snapshot_metrics => { + [ [blockchain, snapshot, generate], + [blockchain, snapshot, load] ], + [ {?METRICS_SNAP_GEN_SIZE, prometheus_gauge, [blocks, version], "Generated snapshot byte size"}, + {?METRICS_SNAP_GEN_DURATION, prometheus_gauge, [blocks, version], "Generated snapshot processing duration"}, + {?METRICS_SNAP_LOAD_SIZE, prometheus_gauge, [height, hash, version, source], "Loaded snapshot byte size"}, + {?METRICS_SNAP_LOAD_DURATION, prometheus_gauge, [height, hash, version, source], "Loaded snapshot processing duration"} ] } }). diff --git a/rebar.config b/rebar.config index a41d4fb1a..484668333 100644 --- a/rebar.config +++ b/rebar.config @@ -10,7 +10,7 @@ {deps, [ {blockchain, {git, "https://github.com/helium/blockchain-core.git", - {branch, "master"}}}, + {branch, "jg/txn_absorb_metrics"}}}, {sibyl, {git, "https://github.com/helium/sibyl.git", {branch, "master"}}}, {hbbft, {git, "https://github.com/helium/erlang-hbbft.git", diff --git a/src/handlers/miner_hbbft_handler.erl b/src/handlers/miner_hbbft_handler.erl index bef96fc08..ba3b1ed06 100644 --- a/src/handlers/miner_hbbft_handler.erl +++ b/src/handlers/miner_hbbft_handler.erl @@ -79,7 +79,7 @@ metadata(Version, Meta, Chain) -> Infos = blockchain_ledger_snapshot_v1:get_infos(Chain), case blockchain_ledger_snapshot_v1:snapshot(Ledger, Blocks, Infos) of {ok, Snapshot} -> - {ok, _SnapHeight, SnapHash} = blockchain:add_snapshot(Snapshot, Chain), + {ok, _SnapHeight, SnapHash, _SnapSize} = blockchain:add_snapshot(Snapshot, Chain), lager:info("snapshot hash is ~p", [SnapHash]), maps:put(snapshot_hash, SnapHash, ChainMeta0); _Err -> diff --git a/src/metrics/miner_metrics_server.erl b/src/metrics/miner_metrics_server.erl index b2f33b162..c2f7e4487 100644 --- a/src/metrics/miner_metrics_server.erl +++ b/src/metrics/miner_metrics_server.erl @@ -131,6 +131,41 @@ handle_metric_event([blockchain, block, absorb], #{duration := Duration}, #{stag handle_metric_event([blockchain, block, height], #{height := Height}, #{time := Time}) -> prometheus_gauge:set(?METRICS_BLOCK_HEIGHT, [Time], Height), ok; +handle_metric_event([blockchain, block, unvalidated_absorb], #{duration := Duration}, #{stage := Stage}) -> + prometheus_histogram:observe(?METRICS_BLOCK_UNVAL_ABSORB, [Stage], Duration), + ok; +handle_metric_event([blockchain, block, unvalidated_height], #{height := Height}, #{time := Time}) -> + prometheus_gauge:set(?METRICS_BLOCK_UNVAL_HEIGHT, [Time], Height), + ok; +handle_metric_event([blockchain, txn, absorb], #{duration := Duration}, #{type := Type}) -> + prometheus_histogram:observe(?METRICS_TXN_ABSORB_DURATION, [Type], Duration), + ok; +handle_metric_event([blockchain, txn_mgr, submit], _Measurements, #{type := Type}) -> + prometheus_counter:inc(?METRICS_TXN_SUBMIT_COUNT, [Type]), + ok; +handle_metric_event([blockchain, txn_mgr, reject], #{block_span := Span}, #{type := Type}) -> + prometheus_counter:inc(?METRICS_TXN_REJECT_COUNT, [Type]), + prometheus_gauge:set(?METRICS_TXN_REJECT_SPAN, [Type], Span), + ok; +handle_metric_event([blockchain, txn_mgr, accept], #{block_span := Span}, #{type := Type}) -> + prometheus_counter:inc(?METRICS_TXN_ACCEPT_COUNT, [Type]), + prometheus_gauge:set(?METRICS_TXN_ACCEPT_SPAN, [Type], Span), + ok; +handle_metric_event([blockchain, txn_mgr, process], #{duration := Duration}, #{stage := Stage}) -> + prometheus_histogram:observe(?METRICS_TXN_PROCESS_DURATION, [Stage], Duration), + ok; +handle_metric_event([blockchain, txn_mgr, add_block], #{cache := Cache, block_time := BlockTime, block_age := BlockAge}, + #{height := Height}) -> + prometheus_gauge:set(?METRICS_TXN_CACHE_SIZE, [Height], Cache), + prometheus_gauge:set(?METRICS_TXN_BLOCK_TIME, [Height], BlockTime), + prometheus_gauge:set(?METRICS_TXN_BLOCK_AGE, [Height], BlockAge), + ok; +handle_metric_event([blockchain, state_channel, open], _Measurements, #{version := Vzn, id := Id}) -> + prometheus_gauge:inc(?METRICS_SC_COUNT, [Vzn, Id]), + ok; +handle_metric_event([blockchain, state_channel, close], _Measurements, #{version := Vzn, id := Id}) -> + prometheus_gauge:dec(?METRICS_SC_COUNT, [Vzn, Id]), + ok; handle_metric_event([grpcbox, server, rpc_end], #{server_latency := Latency}, #{grpc_server_method := Method, grpc_server_status := Status}) -> prometheus_gauge:dec(?METRICS_GRPC_SESSIONS, [Method]), @@ -138,4 +173,14 @@ handle_metric_event([grpcbox, server, rpc_end], #{server_latency := Latency}, ok; handle_metric_event([grpcbox, server, rpc_begin], _Measurements, #{grpc_server_method := Method}) -> prometheus_gauge:inc(?METRICS_GRPC_SESSIONS, [Method]), + ok; +handle_metric_event([blockchain, snapshot, generate], #{duration := Duration, size := Size}, + #{blocks := Blocks, version := Vzn}) -> + prometheus_gauge:set(?METRICS_SNAP_GEN_SIZE, [Blocks, Vzn], Size), + prometheus_gauge:set(?METRICS_SNAP_GEN_DURATION, [Blocks, Vzn], Duration), + ok; +handle_metric_event([blockchain, snapshot, load], #{duration := Duration, size := Size}, + #{height := Height, hash := Hash, version := Vzn, source := Source}) -> + prometheus_gauge:set(?METRICS_SNAP_LOAD_SIZE, [Height, Hash, Vzn, Source], Size), + prometheus_gauge:set(?METRICS_SNAP_LOAD_DURATION, [Height, Hash, Vzn, Source], Duration), ok.