From 5f7351d78473634e717c956362d233b8000854c4 Mon Sep 17 00:00:00 2001 From: Fred Hebert Date: Sat, 14 Nov 2015 14:56:56 +0000 Subject: [PATCH] Fix IO locking up in shell apps Application masters are booted at the root of apps, and take over the group leader role to redirect IO. To cut the chain short and properly have their role inherited, they are their own leader, and keep a reference to the old leader in their internal state, which we cannot change. This is done so process ownership to a given application can be established, and allows to properly clean up resources outside the supervision tree when an app is shut down. This patch goes around and finds all processes whose group leaders are application masters older than the new `user' process booted by the shell providers, and swaps them with that new `user'. This lets the application masters survive, and fixes the blocking IO issue (resolving issue #899) This may mean an incomplete clean up is down on application shutdown, but that seems like a fair compromise. --- src/rebar_prv_shell.erl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/rebar_prv_shell.erl b/src/rebar_prv_shell.erl index d93a21f41..c6449309d 100644 --- a/src/rebar_prv_shell.erl +++ b/src/rebar_prv_shell.erl @@ -113,20 +113,34 @@ info() -> setup_shell() -> %% scan all processes for any with references to the old user and save them to %% update later - NeedsUpdate = [Pid || Pid <- erlang:processes(), - proplists:get_value(group_leader, erlang:process_info(Pid)) == whereis(user) - ], + OldUser = whereis(user), %% terminate the current user ok = supervisor:terminate_child(kernel_sup, user), %% start a new shell (this also starts a new user under the correct group) _ = user_drv:start(), %% wait until user_drv and user have been registered (max 3 seconds) ok = wait_until_user_started(3000), + NewUser = whereis(user), %% set any process that had a reference to the old user's group leader to the %% new user process. Catch the race condition when the Pid exited after the %% liveness check. - _ = [catch erlang:group_leader(whereis(user), Pid) || Pid <- NeedsUpdate, - is_process_alive(Pid)], + _ = [catch erlang:group_leader(NewUser, Pid) + || Pid <- erlang:processes(), + proplists:get_value(group_leader, erlang:process_info(Pid)) == OldUser, + is_process_alive(Pid)], + %% Application masters have the same problem, but they hold the old group + %% leader in their state and hold on to it. Re-point the processes whose + %% leaders are application masters. This can mess up a few things around + %% shutdown time, but is nicer than the current lock-up. + OldMasters = [Pid + || Pid <- erlang:processes(), + Pid < NewUser, % only change old masters + {_,Dict} <- [erlang:process_info(Pid, dictionary)], + {application_master,init,4} == proplists:get_value('$initial_call', Dict)], + _ = [catch erlang:group_leader(NewUser, Pid) + || Pid <- erlang:processes(), + lists:member(proplists:get_value(group_leader, erlang:process_info(Pid)), + OldMasters)], try %% enable error_logger's tty output error_logger:swap_handler(tty),