From 9b772f1ff5b00c52d3983cb3c2f846c21de69e55 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Fri, 26 Jul 2024 18:50:39 +0200 Subject: [PATCH] Fix container recording for pulsar And re-use abstractions for determining file path. --- lib/galaxy/job_metrics/instrumenters/core.py | 7 ++++--- lib/galaxy/jobs/command_factory.py | 19 +++++++++++++------ test/integration/test_containerized_jobs.py | 3 +-- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/galaxy/job_metrics/instrumenters/core.py b/lib/galaxy/job_metrics/instrumenters/core.py index 7abcc3152520..76cd4053ffd2 100644 --- a/lib/galaxy/job_metrics/instrumenters/core.py +++ b/lib/galaxy/job_metrics/instrumenters/core.py @@ -2,7 +2,6 @@ import json import logging -import os import time from typing import ( Any, @@ -25,7 +24,6 @@ START_EPOCH_KEY = "start_epoch" END_EPOCH_KEY = "end_epoch" RUNTIME_SECONDS_KEY = "runtime_seconds" -CONTAINER_FILE = "__container.json" CONTAINER_ID = "container_id" CONTAINER_TYPE = "container_type" @@ -89,9 +87,12 @@ def job_properties(self, job_id, job_directory: str) -> Dict[str, Any]: properties[RUNTIME_SECONDS_KEY] = end - start return properties + def get_container_file_path(self, job_directory): + return self._instrument_file_path(job_directory, "container") + def __read_container_details(self, job_directory) -> Dict[str, str]: try: - with open(os.path.join(job_directory, CONTAINER_FILE)) as fh: + with open(self.get_container_file_path(job_directory)) as fh: return json.load(fh) except FileNotFoundError: return {} diff --git a/lib/galaxy/jobs/command_factory.py b/lib/galaxy/jobs/command_factory.py index 0b7e19904307..9ff10b079723 100644 --- a/lib/galaxy/jobs/command_factory.py +++ b/lib/galaxy/jobs/command_factory.py @@ -1,7 +1,10 @@ import json import typing from logging import getLogger -from os import getcwd +from os import ( + getcwd, + makedirs, +) from os.path import ( abspath, join, @@ -81,8 +84,16 @@ def build_command( __handle_dependency_resolution(commands_builder, job_wrapper, remote_command_params) __handle_task_splitting(commands_builder, job_wrapper) - for_pulsar = "pulsar_version" in remote_command_params + if container: + if core_job_metric_plugin := runner.app.job_metrics.default_job_instrumenter.get_configured_plugin("core"): + directory = join(job_wrapper.working_directory, "metadata") if for_pulsar else job_wrapper.working_directory + makedirs(directory, exist_ok=True) + container_file_path = core_job_metric_plugin.get_container_file_path(directory) + with open(container_file_path, "w") as container_file: + container_file.write( + json.dumps({"container_id": container.container_id, "container_type": container.container_type}) + ) if (container and modify_command_for_container) or job_wrapper.commands_in_new_shell: if container and modify_command_for_container: # Many Docker containers do not have /bin/bash. @@ -181,10 +192,6 @@ def __externalize_commands( source_command = "" if container: source_command = container.source_environment - with open(join(job_wrapper.working_directory, "__container.json"), "w") as container_file: - container_file.write( - json.dumps({"container_id": container.container_id, "container_type": container.container_type}) - ) script_contents = f"#!{shell}\n{integrity_injection}{set_e}{source_command}{tool_commands}" write_script( local_container_script, diff --git a/test/integration/test_containerized_jobs.py b/test/integration/test_containerized_jobs.py index 5c5c1516552a..2254b79a8a5a 100644 --- a/test/integration/test_containerized_jobs.py +++ b/test/integration/test_containerized_jobs.py @@ -36,8 +36,7 @@ def _run_and_get_contents(self, tool_id: str, history_id: str): run_response = self.dataset_populator.run_tool(tool_id, {}, history_id) job_id = run_response["jobs"][0]["id"] self.dataset_populator.wait_for_job(job_id=job_id, assert_ok=True, timeout=EXTENDED_TIMEOUT) - job_details = self.dataset_populator.get_job_details(job_id=job_id, full=True).json() - job_metrics = job_details["job_metrics"] + job_metrics = self.dataset_populator._get(f"/api/jobs/{job_id}/metrics").json() # would be nice if it wasn't just a list of unpredictable order ... container_id = None container_type = None