From e18e0d77b976d13c360e143d1d3461eceeda11e5 Mon Sep 17 00:00:00 2001 From: Francesco Noacco Date: Thu, 8 Jun 2023 09:51:14 +0200 Subject: [PATCH 1/3] feat(e2e): add allowed retries parameter to scheduler_options Signed-off-by: Francesco Noacco --- tools/astarte_e2e/lib/astarte_e2e/config.ex | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tools/astarte_e2e/lib/astarte_e2e/config.ex b/tools/astarte_e2e/lib/astarte_e2e/config.ex index fa9937876..297e5f3ce 100644 --- a/tools/astarte_e2e/lib/astarte_e2e/config.ex +++ b/tools/astarte_e2e/lib/astarte_e2e/config.ex @@ -97,6 +97,22 @@ defmodule AstarteE2E.Config do type: :integer, default: 60 + @envdoc """ + Number of checks performed before declaring a timeout at startup. + Defaults to 1. + """ + app_env :check_limit, :astarte_e2e, :check_limit, + os_env: "E2E_CHECK_LIMIT", + type: :integer + + @envdoc """ + Time interval before declaring a timeout at startup (in seconds). + This option takes priority if both E2E_CHECK_LIMIT and E2E_STARTUP_TIMEOUT_SECONDS are defined. + """ + app_env :startup_timeout_s, :astarte_e2e, :startup_timeout_s, + os_env: "E2E_STARTUP_TIMEOUT_SECONDS", + type: :integer + @envdoc "The port used to expose AstarteE2E's metrics. Defaults to 4010." app_env :port, :astarte_e2e, :port, os_env: "E2E_PORT", @@ -229,9 +245,17 @@ defmodule AstarteE2E.Config do @spec scheduler_opts() :: scheduler_options() def scheduler_opts do + check_interval = check_interval_s!() + timeout_s = startup_timeout_s!() + + limit = check_limit!() + limit_from_timeout = timeout_s && div(timeout_s, check_interval) + allowed_retries = max(limit || limit_from_timeout || 1, 1) + [ - check_interval_s: check_interval_s!(), + check_interval_s: check_interval, check_repetitions: check_repetitions!(), + timeout: {:active, allowed_retries}, realm: realm!(), device_id: device_id!() ] From 5cdb1ffe500bd4c494a667a7f779fca72b5e1f15 Mon Sep 17 00:00:00 2001 From: Francesco Noacco Date: Thu, 8 Jun 2023 14:42:32 +0200 Subject: [PATCH 2/3] feat(e2e)!: schedule next check immediately Doing this makes timing more consistent, executing a check approximately every check_interval_ms milliseconds. Signed-off-by: Francesco Noacco --- .../astarte_e2e/lib/astarte_e2e/scheduler.ex | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex b/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex index 42a1bca20..0a664d7d8 100644 --- a/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex +++ b/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex @@ -55,24 +55,22 @@ defmodule AstarteE2E.Scheduler do @impl true def handle_info(:do_perform_check, state) do - return_val = - case AstarteE2E.perform_check() do - :ok -> - handle_successful_job(state) + Process.send_after(self(), :do_perform_check, state.check_interval_ms) - {:error, :timeout} -> - handle_timed_out_job(state) + case AstarteE2E.perform_check() do + :ok -> + handle_successful_job(state) - {:error, :not_connected} -> - {:noreply, state} + {:error, :timeout} -> + handle_timed_out_job(state) - e -> - Logger.warn("Unhandled condition #{inspect(e)}. Pretending everything is ok.") - {:noreply, state} - end + {:error, :not_connected} -> + {:noreply, state} - Process.send_after(self(), :do_perform_check, state.check_interval_ms) - return_val + e -> + Logger.warn("Unhandled condition #{inspect(e)}. Pretending everything is ok.") + {:noreply, state} + end end defp handle_successful_job(state) do From aa826ce433a7620b4271202204a5d4660afe21a9 Mon Sep 17 00:00:00 2001 From: Francesco Noacco Date: Thu, 8 Jun 2023 17:51:20 +0200 Subject: [PATCH 3/3] feat(e2e): let the scheduler call the timeout Signed-off-by: Francesco Noacco --- tools/astarte_e2e/lib/astarte_e2e/client.ex | 10 +++++ .../astarte_e2e/lib/astarte_e2e/scheduler.ex | 43 ++++++++++++++++--- .../lib/astarte_e2e/service_notifier.ex | 11 +++-- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/tools/astarte_e2e/lib/astarte_e2e/client.ex b/tools/astarte_e2e/lib/astarte_e2e/client.ex index fed1507bf..04f2d045e 100644 --- a/tools/astarte_e2e/lib/astarte_e2e/client.ex +++ b/tools/astarte_e2e/lib/astarte_e2e/client.ex @@ -90,6 +90,11 @@ defmodule AstarteE2E.Client do |> GenSocketClient.call(:wait_for_connection, :infinity) end + def notify_startup_timeout(realm, device_id) do + via_tuple(realm, device_id) + |> GenSocketClient.call(:notify_startup_timeout, :infinity) + end + defp join_topic(transport, state) do topic = state @@ -588,4 +593,9 @@ defmodule AstarteE2E.Client do {:noreply, new_state} end end + + def handle_call(:notify_startup_timeout, from, _transport, state) do + ServiceNotifier.notify_timeout() + {:reply, :ok, state} + end end diff --git a/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex b/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex index 0a664d7d8..37cd10ed8 100644 --- a/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex +++ b/tools/astarte_e2e/lib/astarte_e2e/scheduler.ex @@ -17,7 +17,7 @@ # defmodule AstarteE2E.Scheduler do - alias AstarteE2E.Utils + alias AstarteE2E.{Client, Utils} require Logger use GenServer, restart: :transient @@ -37,7 +37,18 @@ defmodule AstarteE2E.Scheduler do check_repetitions = Keyword.fetch!(opts, :check_repetitions) - state = %{check_repetitions: check_repetitions, check_interval_ms: check_interval_ms} + realm = Keyword.fetch(opts, :realm) + device_id = Keyword.fetch!(opts, :device_id) + timeout = Keyword.fetch!(opts, :timeout) + + state = %{ + check_repetitions: check_repetitions, + check_interval_ms: check_interval_ms, + realm: realm, + device_id: device_id, + timeout: timeout + } + Process.send_after(self(), :do_perform_check, check_interval_ms) {:ok, state} @@ -57,19 +68,23 @@ defmodule AstarteE2E.Scheduler do def handle_info(:do_perform_check, state) do Process.send_after(self(), :do_perform_check, state.check_interval_ms) - case AstarteE2E.perform_check() do + check_result = AstarteE2E.perform_check() + + updated_state = update_in(state.timeout, &maybe_timeout(&1, state, check_result)) + + case check_result do :ok -> - handle_successful_job(state) + handle_successful_job(updated_state) {:error, :timeout} -> - handle_timed_out_job(state) + handle_timed_out_job(updated_state) {:error, :not_connected} -> - {:noreply, state} + {:noreply, updated_state} e -> Logger.warn("Unhandled condition #{inspect(e)}. Pretending everything is ok.") - {:noreply, state} + {:noreply, updated_state} end end @@ -103,6 +118,20 @@ defmodule AstarteE2E.Scheduler do end end + defp maybe_timeout(timeout, state, check_result) do + cond do + timeout == :inactive or check_result == :ok -> :inactive + timeout == {:active, 1} -> call_timeout_and_set_inactive(state) + {:active, x} = timeout -> {:active, x - 1} + end + end + + defp call_timeout_and_set_inactive(state) do + %{realm: {:ok, realm}, device_id: device_id} = state + Client.notify_startup_timeout(realm, device_id) + :inactive + end + defp via_tuple(realm, device_id) do {:via, Registry, {Registry.AstarteE2E, {:scheduler, realm, device_id}}} end diff --git a/tools/astarte_e2e/lib/astarte_e2e/service_notifier.ex b/tools/astarte_e2e/lib/astarte_e2e/service_notifier.ex index b802c9a57..79ede4b25 100644 --- a/tools/astarte_e2e/lib/astarte_e2e/service_notifier.ex +++ b/tools/astarte_e2e/lib/astarte_e2e/service_notifier.ex @@ -53,6 +53,10 @@ defmodule AstarteE2E.ServiceNotifier do :gen_statem.call(__MODULE__, :notify_service_up) end + def notify_timeout do + :gen_statem.call(__MODULE__, :timeout) + end + defp deliver(%Bamboo.Email{} = email) do service_notifier_config = Config.service_notifier_config() @@ -82,10 +86,10 @@ defmodule AstarteE2E.ServiceNotifier do mail_subject: mail_subject } - {:ok, :starting, data, [{:state_timeout, 60_000, nil}]} + {:ok, :starting, data} end - def starting(:state_timeout, _content, %{mail_subject: mail_subject} = data) do + def starting({:call, from}, :timeout, %{mail_subject: mail_subject} = data) do reason = "Timeout at startup" event_id = Hukai.generate("%a-%A") @@ -107,7 +111,8 @@ defmodule AstarteE2E.ServiceNotifier do failure_id: event_id ) - {:next_state, :service_down, updated_data} + actions = [{:reply, from, :ok}] + {:next_state, :service_down, updated_data, actions} end def starting({:call, from}, :notify_service_up, data) do