From 251f064c4b795860aac51bb43271fdc74ebb664d Mon Sep 17 00:00:00 2001 From: Adam Grare Date: Wed, 5 Aug 2020 15:25:33 -0400 Subject: [PATCH] Use the systemd services instead of miq_workers table For the purposes of starting and stopping workers only looking at the miq_workers table for the "current" list ignores the fact that there might be existing services out there already created that are starting, stopping, or failed and don't have a record tracking them in the miq_workers table. --- .../worker_management/monitor/quiesce.rb | 2 -- app/models/miq_worker.rb | 15 +++++++++- app/models/miq_worker/systemd_common.rb | 29 ++++++++++++++++--- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/app/models/miq_server/worker_management/monitor/quiesce.rb b/app/models/miq_server/worker_management/monitor/quiesce.rb index a5b201a2dcff..46434d662471 100644 --- a/app/models/miq_server/worker_management/monitor/quiesce.rb +++ b/app/models/miq_server/worker_management/monitor/quiesce.rb @@ -36,8 +36,6 @@ def quiesce_workers_loop miq_workers.each do |w| if w.containerized_worker? w.delete_container_objects - elsif w.systemd_worker? - w.stop_systemd_worker else stop_worker(w) end diff --git a/app/models/miq_worker.rb b/app/models/miq_worker.rb index 4cbc556cad64..d4eab6f5252d 100644 --- a/app/models/miq_worker.rb +++ b/app/models/miq_worker.rb @@ -128,9 +128,20 @@ def self.enough_resource_to_start_worker? MiqServer.my_server.enough_resource_to_start_worker?(self) end + def self.cleanup_failed_workers + if containerized_worker? + elsif systemd_worker? + failed_services.each do |service| + cleanup_systemd_worker(service) + end + end + end + def self.sync_workers + cleanup_failed_workers + w = include_stopping_workers_on_synchronize ? find_alive : find_current_or_starting - current = w.length + current = systemd_worker? ? active_services : w.length desired = workers result = {:adds => [], :deletes => []} @@ -143,6 +154,8 @@ def self.sync_workers w = w.to_a (current - desired).times do ww = w.pop + # TODO add interface to get worker instance by service name + ww = find_by(:guid => ww.split("@")) result[:deletes] << ww.pid ww.stop end diff --git a/app/models/miq_worker/systemd_common.rb b/app/models/miq_worker/systemd_common.rb index 6ddf861fa25c..b6eab1aa0ea2 100644 --- a/app/models/miq_worker/systemd_common.rb +++ b/app/models/miq_worker/systemd_common.rb @@ -11,6 +11,30 @@ def supports_systemd? false end + def systemd_manager + @systemd_manager ||= begin + require "dbus/systemd" + DBus::Systemd::Manager.new + end + end + + def services + systemd_manager.units.select { |unit| unit[:name].start_with?(service_name) } + end + + def active_services + services.select { |service| service[:active_state] == "active" }.map { |service| service[:name] } + end + + def failed_services + services.select { |service| service[:active_state] == "failed" }.map { |service| service[:name] } + end + + def cleanup_systemd_worker(unit_name, runtime: false) + # TODO cleanup unit settings + systemd_manager.DisableUnitFiles([unit_name], runtime) + end + def ensure_systemd_files target_file_path.write(target_file) unless target_file_path.exist? service_file_path.write(unit_file) unless service_file_path.exist? @@ -118,10 +142,7 @@ def stop_systemd_unit(mode: "replace") private def systemd - @systemd ||= begin - require "dbus/systemd" - DBus::Systemd::Manager.new - end + self.class.systemd_manager end def service_base_name