From f2b86f186cf288d5fc9c4ab49e027a5a1fe3a144 Mon Sep 17 00:00:00 2001 From: Felix Fontein Date: Sun, 7 Jul 2024 22:56:04 +0200 Subject: [PATCH] Allow to wait for a container to become healthy. --- .../921-docker_container-healthy.yml | 4 ++ .../module_utils/module_container/module.py | 46 +++++++++---- plugins/modules/docker_container.py | 14 ++++ .../docker_container/tasks/tests/healthy.yml | 64 +++++++++++++++++++ 4 files changed, 115 insertions(+), 13 deletions(-) create mode 100644 changelogs/fragments/921-docker_container-healthy.yml create mode 100644 tests/integration/targets/docker_container/tasks/tests/healthy.yml diff --git a/changelogs/fragments/921-docker_container-healthy.yml b/changelogs/fragments/921-docker_container-healthy.yml new file mode 100644 index 000000000..007148d26 --- /dev/null +++ b/changelogs/fragments/921-docker_container-healthy.yml @@ -0,0 +1,4 @@ +minor_changes: + - "docker_container - the new ``state=healthy`` allows to wait for a container to become healthy on startup. + The ``healthy_wait_timeout`` option allows to configure the maximum time to wait for this to happen + (https://github.com/ansible-collections/community.docker/issues/890, https://github.com/ansible-collections/community.docker/pull/921)." diff --git a/plugins/module_utils/module_container/module.py b/plugins/module_utils/module_container/module.py index 5d819efa4..b58ad351a 100644 --- a/plugins/module_utils/module_container/module.py +++ b/plugins/module_utils/module_container/module.py @@ -85,6 +85,9 @@ def __init__(self, module, engine_driver, client, active_options): self.param_pull_check_mode_behavior = self.module.params['pull_check_mode_behavior'] self.param_recreate = self.module.params['recreate'] self.param_removal_wait_timeout = self.module.params['removal_wait_timeout'] + self.param_healthy_wait_timeout = self.module.params['healthy_wait_timeout'] + if self.param_healthy_wait_timeout <= 0: + self.param_healthy_wait_timeout = None self.param_restart = self.module.params['restart'] self.param_state = self.module.params['state'] self._parse_comparisons() @@ -212,7 +215,7 @@ def fail(self, *args, **kwargs): self.client.fail(*args, **kwargs) def run(self): - if self.param_state in ('stopped', 'started', 'present'): + if self.param_state in ('stopped', 'started', 'present', 'healthy'): self.present(self.param_state) elif self.param_state == 'absent': self.absent() @@ -227,7 +230,7 @@ def run(self): if self.facts: self.results['container'] = self.facts - def wait_for_state(self, container_id, complete_states=None, wait_states=None, accept_removal=False, max_wait=None): + def wait_for_state(self, container_id, complete_states=None, wait_states=None, accept_removal=False, max_wait=None, health_state=False): delay = 1.0 total_wait = 0 while True: @@ -235,21 +238,24 @@ def wait_for_state(self, container_id, complete_states=None, wait_states=None, a result = self.engine_driver.inspect_container_by_id(self.client, container_id) if result is None: if accept_removal: - return + return result msg = 'Encontered vanished container while waiting for container "{0}"' self.fail(msg.format(container_id)) # Check container state - state = result.get('State', {}).get('Status') + state_info = result.get('State') or {} + if health_state: + state_info = state_info.get('Health') or {} + state = state_info.get('Status') if complete_states is not None and state in complete_states: - return + return result if wait_states is not None and state not in wait_states: msg = 'Encontered unexpected state "{1}" while waiting for container "{0}"' - self.fail(msg.format(container_id, state)) + self.fail(msg.format(container_id, state), container=result) # Wait if max_wait is not None: - if total_wait > max_wait: + if total_wait > max_wait or delay < 1E-4: msg = 'Timeout of {1} seconds exceeded while waiting for container "{0}"' - self.fail(msg.format(container_id, max_wait)) + self.fail(msg.format(container_id, max_wait), container=result) if total_wait + delay > max_wait: delay = max_wait - total_wait sleep(delay) @@ -368,10 +374,10 @@ def present(self, state): container = self.update_limits(container, container_image, comparison_image, host_info) container = self.update_networks(container, container_created) - if state == 'started' and not container.running: + if state in ('started', 'healthy') and not container.running: self.diff_tracker.add('running', parameter=True, active=was_running) container = self.container_start(container.id) - elif state == 'started' and self.param_restart: + elif state in ('started', 'healthy') and self.param_restart: self.diff_tracker.add('running', parameter=True, active=was_running) self.diff_tracker.add('restarted', parameter=True, active=False) container = self.container_restart(container.id) @@ -380,7 +386,7 @@ def present(self, state): self.container_stop(container.id) container = self._get_container(container.id) - if state == 'started' and self.param_paused is not None and container.paused != self.param_paused: + if state in ('started', 'healthy') and self.param_paused is not None and container.paused != self.param_paused: self.diff_tracker.add('paused', parameter=self.param_paused, active=was_paused) if not self.check_mode: try: @@ -398,6 +404,19 @@ def present(self, state): self.facts = container.raw + if state == 'healthy' and not self.check_mode: + # `None` means that no health check enabled; simply treat this as 'healthy' + inspect_result = self.wait_for_state( + container.id, + wait_states=['starting', 'unhealthy'], + complete_states=['healthy', None], + max_wait=self.param_healthy_wait_timeout, + health_state=True, + ) + if inspect_result: + # Return the latest inspection results retrieved + self.facts = inspect_result + def absent(self): container = self._get_container(self.param_name) if container.exists: @@ -878,10 +897,11 @@ def run_module(engine_driver): recreate=dict(type='bool', default=False), removal_wait_timeout=dict(type='float'), restart=dict(type='bool', default=False), - state=dict(type='str', default='started', choices=['absent', 'present', 'started', 'stopped']), + state=dict(type='str', default='started', choices=['absent', 'present', 'healthy', 'started', 'stopped']), + healthy_wait_timeout=dict(type='float', default=300), ), required_if=[ - ('state', 'present', ['image']) + ('state', 'present', ['image']), ], ) diff --git a/plugins/modules/docker_container.py b/plugins/modules/docker_container.py index 2c3dbd40f..83ead19f6 100644 --- a/plugins/modules/docker_container.py +++ b/plugins/modules/docker_container.py @@ -381,6 +381,7 @@ - "O(healthcheck.interval), O(healthcheck.timeout), O(healthcheck.start_period), and O(healthcheck.start_interval) are specified as durations. They accept duration as a string in a format that look like: V(5h34m56s), V(1m30s), and so on. The supported units are V(us), V(ms), V(s), V(m) and V(h)." + - See also O(state=healthy). type: dict suboptions: test: @@ -919,6 +920,8 @@ with the requested config.' - 'V(started) - Asserts that the container is first V(present), and then if the container is not running moves it to a running state. Use O(restart) to force a matching container to be stopped and restarted.' + - V(healthy) - Asserts that the container is V(present) and V(started), and is actually healthy as well. + The time waited can be controlled with O(healthy_wait_timeout). This state has been added in community.docker 3.11.0. - 'V(stopped) - Asserts that the container is first V(present), and then if the container is running moves it to a stopped state.' - "To control what will be taken into account when comparing configuration, see the O(comparisons) option. To avoid that the @@ -932,12 +935,23 @@ choices: - absent - present + - healthy - stopped - started stop_signal: description: - Override default signal used to stop the container. type: str + healthy_wait_timeout: + description: + - When waiting for the container to become healthy if O(state=healthy), this option controls for how long + the module waits until the container's state becomes healthy. + - The timeout is specified in seconds. The default, V(300), is 5 minutes. + - Set this to 0 or a negative value to wait however long it takes. + Note that depending on the container this can result in the module not terminating. + default: 300 + type: float + version_added: 3.11.0 stop_timeout: description: - Number of seconds to wait for the container to stop before sending C(SIGKILL). diff --git a/tests/integration/targets/docker_container/tasks/tests/healthy.yml b/tests/integration/targets/docker_container/tasks/tests/healthy.yml new file mode 100644 index 000000000..3be7312af --- /dev/null +++ b/tests/integration/targets/docker_container/tasks/tests/healthy.yml @@ -0,0 +1,64 @@ +--- +# Copyright (c) Ansible Project +# GNU General Public License v3.0+ (see LICENSES/GPL-3.0-or-later.txt or https://www.gnu.org/licenses/gpl-3.0.txt) +# SPDX-License-Identifier: GPL-3.0-or-later + +- name: Registering container name + set_fact: + cname: "{{ cname_prefix ~ '-hi' }}" +- name: Registering container name + set_fact: + cnames: "{{ cnames + [cname] }}" + +- name: Prepare container + docker_container: + name: "{{ cname }}" + image: "{{ docker_test_image_healthcheck }}" + command: '10m' + state: stopped + register: healthy_1 + +- debug: var=healthy_1.container.State + +- name: Start container (not healthy in time) + docker_container: + name: "{{ cname }}" + state: healthy + healthy_wait_timeout: 1 + register: healthy_2 + ignore_errors: true + +- debug: var=healthy_2.container.State + +- name: Prepare container + docker_container: + name: "{{ cname }}" + image: "{{ docker_test_image_healthcheck }}" + command: '10m 5s' + state: stopped + force_kill: true + register: healthy_3 + +- debug: var=healthy_3.container.State + +- name: Start container (healthy in time) + docker_container: + name: "{{ cname }}" + state: healthy + healthy_wait_timeout: 10 + register: healthy_4 + +- debug: var=healthy_4.container.State + +- name: Cleanup + docker_container: + name: "{{ cname }}" + state: absent + force_kill: true +- assert: + that: + - healthy_2 is failed + - healthy_2.container.State.Health.Status == "starting" + - healthy_2.msg.startswith("Timeout of 1.0 seconds exceeded while waiting for container ") + - healthy_4 is changed + - healthy_4.container.State.Health.Status == "healthy"