Skip to content

Commit 2784673

Browse files
anuthanAliaksey Artamonau
authored andcommitted
Make adding new stages easy.
Adding a new stage is now equivalent to signalling the start of the stage, master_activity_events:note_rebalance_stage_started(Stage, NodesInvolved) If no nodes are involved in the stage i.e., NodesInvolved are [], we ignore the stage as a part of rebalance visibility. This can result in new stages showing up in the UI which weren't part of the rebalance visibility output at the start of rebalance. To mark stage as completed, master_activity_events:note_rebalance_stage_completed(Stage) Part of EPIC, MB-30894: Rebalance visibility and reporting Change-Id: I95f7542f4fa6b1e0771e2ab83879efb98ac48e03 Reviewed-on: http://review.couchbase.org/105567 Tested-by: Abhijeeth Nuthan <[email protected]> Well-Formed: Build Bot <[email protected]> Reviewed-by: Aliaksey Artamonau <[email protected]>
1 parent 004fba7 commit 2784673

File tree

4 files changed

+77
-54
lines changed

4 files changed

+77
-54
lines changed

src/master_activity_events.erl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
note_autofailover_node_state_change/4,
6262
note_autofailover_server_group_state_change/4,
6363
note_autofailover_done/2,
64-
note_rebalance_stage_started/1,
64+
note_rebalance_stage_started/2,
6565
note_rebalance_stage_completed/1,
6666
note_rebalance_stage_event/2
6767
]).
@@ -87,8 +87,8 @@ get_stage_list(Stage) when is_atom(Stage) ->
8787
get_stage_list(Stage) when is_list(Stage) ->
8888
Stage.
8989

90-
note_rebalance_stage_started(Stage) ->
91-
submit_cast({rebalance_stage_started, get_stage_list(Stage)}).
90+
note_rebalance_stage_started(Stage, Nodes) ->
91+
submit_cast({rebalance_stage_started, get_stage_list(Stage), Nodes}).
9292

9393
note_rebalance_stage_completed(Stage) ->
9494
submit_cast({rebalance_stage_completed, get_stage_list(Stage)}).
@@ -408,10 +408,11 @@ maybe_get_pids_node(Pid) when is_pid(Pid) ->
408408
maybe_get_pids_node(_PerhapsBinary) ->
409409
skip_this_pair_please.
410410

411-
event_to_jsons({TS, rebalance_stage_started, Stage}) ->
411+
event_to_jsons({TS, rebalance_stage_started, Stage, Nodes}) ->
412412
[format_simple_plist_as_json([{type, rebalanceStageStarted},
413413
{ts, misc:time_to_epoch_float(TS)},
414-
{stage, {list, Stage}}])];
414+
{stage, {list, Stage}},
415+
{nodes, {list, Nodes}}])];
415416

416417
event_to_jsons({TS, rebalance_stage_completed, Stage}) ->
417418
[format_simple_plist_as_json([{type, rebalanceStageCompleted},

src/ns_rebalance_observer.erl

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ is_interesting_master_event({vbucket_move_start, _Pid, _BucketName, _Node, _VBuc
117117
fun handle_vbucket_move_start/2;
118118
is_interesting_master_event({vbucket_move_done, _BucketName, _VBucketId}) ->
119119
fun handle_vbucket_move_done/2;
120-
is_interesting_master_event({rebalance_stage_started, _Stage}) ->
120+
is_interesting_master_event({rebalance_stage_started, _Stage, _Nodes}) ->
121121
fun handle_rebalance_stage_started/2;
122122
is_interesting_master_event({rebalance_stage_completed, _Stage}) ->
123123
fun handle_rebalance_stage_completed/2;
@@ -142,16 +142,6 @@ is_interesting_master_event({seqno_waiting_ended, _BucketName, _VBucketId, _, _}
142142
is_interesting_master_event(_) ->
143143
undefined.
144144

145-
possible_substages(kv, NodesInfo) ->
146-
case proplists:get_value(delta_nodes, NodesInfo, []) of
147-
[] ->
148-
[];
149-
DeltaNodes ->
150-
[{kv_delta_recovery, DeltaNodes, []}]
151-
end;
152-
possible_substages(_,_) ->
153-
[].
154-
155145
get_stage_nodes(Services, NodesInfo) ->
156146
ActiveNodes = proplists:get_value(active_nodes, NodesInfo, []),
157147
lists:filtermap(
@@ -160,8 +150,7 @@ get_stage_nodes(Services, NodesInfo) ->
160150
[] ->
161151
false;
162152
Nodes ->
163-
SubStages = possible_substages(Service, NodesInfo),
164-
{true, {Service, Nodes, SubStages}}
153+
{true, {Service, Nodes}}
165154
end
166155
end, lists:usort(Services)).
167156

@@ -339,9 +328,10 @@ initiate_bucket_rebalance(BucketName, FFMap, OldState) ->
339328
TmpState = update_all_vb_info(OldState, BucketName, dict:from_list(Moves)),
340329
TmpState#state{bucket = BucketName}.
341330

342-
handle_rebalance_stage_started({TS, rebalance_stage_started, Stage},
331+
handle_rebalance_stage_started({TS, rebalance_stage_started, Stage, Nodes},
343332
#state{stage_info = Old} = State) ->
344-
New = rebalance_stage_info:update_stage_info(Stage, {started, TS}, Old),
333+
New = rebalance_stage_info:update_stage_info(Stage, {started, {TS, Nodes}},
334+
Old),
345335
{noreply, State#state{stage_info = New}}.
346336

347337
handle_rebalance_stage_completed({TS, rebalance_stage_completed, Stage},

src/ns_rebalancer.erl

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -540,9 +540,9 @@ rebalance_simple_services(Config, Services, KeepNodes) ->
540540
true ->
541541
lists:filtermap(
542542
fun (Service) ->
543-
master_activity_events:note_rebalance_stage_started(
544-
Service),
545543
ServiceNodes = ns_cluster_membership:service_nodes(KeepNodes, Service),
544+
master_activity_events:note_rebalance_stage_started(
545+
Service, ServiceNodes),
546546
Updated = update_service_map_with_config(Config, Service, ServiceNodes),
547547

548548
master_activity_events:note_rebalance_stage_completed(
@@ -601,7 +601,7 @@ rebalance_topology_aware_services(Config, Services, KeepNodesAll, EjectNodesAll)
601601
false;
602602
_ ->
603603
master_activity_events:note_rebalance_stage_started(
604-
Service),
604+
Service, AllNodes),
605605
update_service_map_with_config(Config, Service, AllNodes),
606606
ok = rebalance_topology_aware_service(Service, KeepNodes,
607607
EjectNodes, DeltaNodes),
@@ -716,7 +716,9 @@ rebalance_body(KeepNodes,
716716

717717
ok = drop_old_2i_indexes(KeepNodes),
718718

719-
master_activity_events:note_rebalance_stage_started(kv),
719+
LiveKVNodes = ns_cluster_membership:service_nodes(KeepNodes ++ EjectNodesAll,
720+
kv),
721+
master_activity_events:note_rebalance_stage_started(kv, LiveKVNodes),
720722
%% wait till all bucket shutdowns are done on nodes we're
721723
%% adding (or maybe adding).
722724
do_wait_buckets_shutdown(KeepNodes),
@@ -731,7 +733,7 @@ rebalance_body(KeepNodes,
731733
end, BucketConfigs),
732734

733735
master_activity_events:note_rebalance_stage_started(
734-
[kv, kv_delta_recovery]),
736+
[kv, kv_delta_recovery], KVDeltaNodes),
735737
ok = apply_delta_recovery_buckets(DeltaRecoveryBuckets,
736738
KVDeltaNodes, BucketConfigs),
737739
ok = maybe_clear_recovery_type(KeepNodes),
@@ -1371,7 +1373,9 @@ do_run_graceful_failover_moves(Nodes, BucketName, BucketConfig, I, N) ->
13711373
Map = proplists:get_value(map, BucketConfig, []),
13721374
Map1 = mb_map:promote_replicas_for_graceful_failover(Map, Nodes),
13731375

1374-
master_activity_events:note_rebalance_stage_started(kv),
1376+
ActiveNodes = ns_cluster_membership:active_nodes(),
1377+
InvolvedNodes = ns_cluster_membership:service_nodes(ActiveNodes, kv),
1378+
master_activity_events:note_rebalance_stage_started(kv, InvolvedNodes),
13751379
ProgressFun = make_progress_fun(I, N),
13761380
RV = run_mover(BucketName, BucketConfig,
13771381
proplists:get_value(servers, BucketConfig),

src/rebalance_stage_info.erl

Lines changed: 56 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -40,35 +40,27 @@
4040

4141
-type stage_info() :: #stage_info{}.
4242

43-
init(Stages) ->
44-
PerStageProgress = dict:from_list(init_per_stage_progress(Stages)),
43+
%% Need StageNodes as when rebalance starts we need to show a minimum stages of
44+
%% rebalance that are expected to occur, usually the services involved.
45+
init(StageNodes) ->
46+
PerStageProgress = dict:from_list(init_per_stage_progress(StageNodes)),
4547
Aggregated = aggregate(PerStageProgress),
46-
StageInfo = init_per_stage_info(Stages),
48+
StageInfo = init_per_stage_info(StageNodes),
4749
#stage_info{per_stage_progress = PerStageProgress,
4850
aggregated = Aggregated,
4951
per_stage_info = StageInfo}.
5052

51-
init_per_stage_progress(Stages) ->
52-
lists:flatten([init_stage_progress(S, N, SS) || {S, N, SS} <- Stages]).
53+
init_per_stage_progress(StageNodes) ->
54+
[{Stage, dict:from_list([{N, 0} || N <- Nodes])} ||
55+
{Stage, Nodes} <- StageNodes, Nodes =/= []].
5356

54-
init_stage_progress(_Stage, [], _SubStage) ->
55-
[];
56-
init_stage_progress(Stage, Nodes, SubStages) ->
57-
SubStageNodes = init_per_stage_progress(SubStages),
58-
[{Stage, dict:from_list([{N, 0} || N <- Nodes])} | SubStageNodes].
57+
init_per_stage_info(StageNodes) ->
58+
[{Stage, #stage_details{}} || {Stage, Nodes} <- StageNodes, Nodes =/= []].
5959

6060
%% For backward compatibility.
6161
get_progress(#stage_info{aggregated = Aggregated}) ->
6262
Aggregated.
6363

64-
init_per_stage_info(Stages) ->
65-
[{Stage, #stage_details{
66-
start_time = false,
67-
complete_time = false,
68-
sub_stages = init_per_stage_info(SubStages),
69-
notable_events = []
70-
}} || {Stage, Nodes, SubStages} <- Stages, Nodes =/= []].
71-
7264
update_progress(
7365
Stage, StageProgress,
7466
#stage_info{per_stage_progress = OldPerStageProgress} = StageInfo) ->
@@ -85,7 +77,7 @@ do_update_progress(Stage, StageProgress, PerStage) ->
8577
dict:merge(fun (_, _, New) ->
8678
New
8779
end, OldStageProgress, StageProgress)
88-
end, PerStage).
80+
end, StageProgress, PerStage).
8981

9082
aggregate(PerStage) ->
9183
TmpAggr = dict:fold(
@@ -212,32 +204,39 @@ get_per_stage_progress(PerStageProgress) ->
212204
dict:to_list(StageProgress)
213205
end, PerStageProgress).
214206

215-
update_stage_info({started, Time}, StageInfo) ->
207+
update_stage({started, {Time, _}}, StageInfo) ->
216208
StageInfo#stage_details{start_time = Time,
217209
complete_time = false};
218-
update_stage_info({completed, Time}, StageInfo) ->
210+
update_stage({completed, Time}, StageInfo) ->
219211
StageInfo#stage_details{complete_time = Time};
220-
update_stage_info({notable_event, TS, Text},
221-
#stage_details{notable_events = NotableEvents} = StageInfo) ->
212+
update_stage({notable_event, TS, Text},
213+
#stage_details{notable_events = NotableEvents} = StageInfo) ->
222214
Time = binarify_timestamp(TS),
223215
Msg = list_to_binary(Text),
224216
StageInfo#stage_details{notable_events = [{Time, Msg} | NotableEvents]}.
225217

226-
update_stage_info(Stage, StageInfoUpdate,
227-
#stage_info{per_stage_info = PerStageInfo} = StageInfo) ->
218+
update_stage_info(Stage, StageInfoUpdate, StageInfo) ->
219+
NewStageInfo = maybe_create(Stage, StageInfoUpdate, StageInfo,
220+
fun maybe_create_new_stage_progress/3),
221+
update_stage_info_inner(Stage, StageInfoUpdate, NewStageInfo).
222+
223+
update_stage_info_inner(Stage, StageInfoUpdate,
224+
#stage_info{per_stage_info = PerStageInfo} = StageInfo) ->
228225
NewPerStageInfo = update_stage_info_rec(Stage, StageInfoUpdate,
229226
PerStageInfo),
230227
StageInfo#stage_info{per_stage_info = NewPerStageInfo}.
231228

232-
update_stage_info_rec([Stage | SubStages], StageInfoUpdate, AllStageInfo) ->
229+
update_stage_info_rec([Stage | SubStages] = AllStages, StageInfoUpdate,
230+
AllStageInfo) ->
233231
case lists:keysearch(Stage, 1, AllStageInfo) of
234232
false ->
235-
AllStageInfo;
233+
maybe_create(AllStages, StageInfoUpdate, AllStageInfo,
234+
fun create_stage/3);
236235
{value, {Stage, OldStageInfo}} ->
237236
NewStageInfo =
238237
case SubStages of
239238
[] ->
240-
update_stage_info(StageInfoUpdate, OldStageInfo);
239+
update_stage(StageInfoUpdate, OldStageInfo);
241240
_ ->
242241
NewSubStages = update_stage_info_rec(
243242
SubStages,
@@ -248,3 +247,32 @@ update_stage_info_rec([Stage | SubStages], StageInfoUpdate, AllStageInfo) ->
248247
end,
249248
lists:keyreplace(Stage, 1, AllStageInfo, {Stage, NewStageInfo})
250249
end.
250+
251+
create_new_field({started, {_, []}}) ->
252+
false;
253+
create_new_field({started, {_, _}}) ->
254+
true;
255+
create_new_field(_) ->
256+
false.
257+
258+
maybe_create(Stage, Info, Old, Fun) ->
259+
case create_new_field(Info) of
260+
true -> Fun(Stage, Info, Old);
261+
false -> Old
262+
end.
263+
264+
create_stage([Stage | _] = AllStages, {started, {_,_}} = Info, AllStageInfo) ->
265+
update_stage_info_rec(AllStages, Info,
266+
[{Stage, #stage_details{}} | AllStageInfo]).
267+
268+
maybe_create_new_stage_progress(
269+
Stage, {started, {_, Nodes}},
270+
#stage_info{per_stage_progress = PerStageProgress} = StageInfo) ->
271+
ProgressStage = lists:last(Stage),
272+
case dict:find(ProgressStage, PerStageProgress) of
273+
{ok, _} ->
274+
StageInfo;
275+
_ ->
276+
[{ProgressStage, Dict}] = init_per_stage_progress([{ProgressStage, Nodes}]),
277+
update_progress(ProgressStage, Dict, StageInfo)
278+
end.

0 commit comments

Comments
 (0)