From 9acc7cb1f99a880f6c53c210dca6d23afad081cc Mon Sep 17 00:00:00 2001 From: Kamoltat Sirivadhna Date: Wed, 13 Mar 2024 17:45:59 -0400 Subject: [PATCH 1/3] src/teuthology_api/suite: Modify how we handle Error and Success runs The changes includes: 1. make suite route return {"run": run_details, "logs": logs, "job_count": job_count} 2. Improve how we handle Exception by utilizing Queue from python multiprocessing library. 3. Set the timeout for the process to be 180 seconds, if teuthology doesn't respond back within that time, then we return a Process Timeout Signed-off-by: Kamoltat Sirivadhna --- src/teuthology_api/routes/suite.py | 7 +++- src/teuthology_api/schemas/suite.py | 2 +- src/teuthology_api/services/helpers.py | 36 +++++++++++++++----- src/teuthology_api/services/suite.py | 47 +++++++++++++------------- 4 files changed, 57 insertions(+), 35 deletions(-) diff --git a/src/teuthology_api/routes/suite.py b/src/teuthology_api/routes/suite.py index 13f1905..5737a16 100644 --- a/src/teuthology_api/routes/suite.py +++ b/src/teuthology_api/routes/suite.py @@ -24,4 +24,9 @@ def create_run( ): args = args.model_dump(by_alias=True) args["--user"] = get_username(request) - return run(args, logs, access_token) + try: + created_run = run(args, logs, access_token) + log.debug(created_run) + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + return created_run diff --git a/src/teuthology_api/schemas/suite.py b/src/teuthology_api/schemas/suite.py index 409e219..14b4042 100644 --- a/src/teuthology_api/schemas/suite.py +++ b/src/teuthology_api/schemas/suite.py @@ -37,7 +37,7 @@ class SuiteArgs(BaseArgs): default="https://github.com/ceph/ceph-ci.git", alias="--suite_repo" ) teuthology_branch: Union[str, None] = Field( - default="main", alias="--teuthology-branch" + default=None, alias="--teuthology-branch" ) validate_sha1: Union[str, None] = Field(default="true", alias="--validate-sha1") wait: Union[bool, None] = Field(default=False, alias="--wait") diff --git a/src/teuthology_api/services/helpers.py b/src/teuthology_api/services/helpers.py index ef7abc3..2bf484f 100644 --- a/src/teuthology_api/services/helpers.py +++ b/src/teuthology_api/services/helpers.py @@ -1,4 +1,4 @@ -from multiprocessing import Process +from multiprocessing import Process, Queue import logging import os import uuid @@ -26,26 +26,44 @@ def logs_run(func, args): _id = str(uuid.uuid4()) archive = Path(ARCHIVE_DIR) log_file = archive / f"{_id}.log" - - teuthology_process = Process(target=_execute_with_logs, args=(func, args, log_file)) - teuthology_process.start() - teuthology_process.join() - + teuth_queue = Queue() + teuth_process = Process( + target=_execute_with_logs, args=(func, args, log_file, teuth_queue) + ) + teuth_process.daemon = True # Set the process as a daemon + teuth_process.start() + teuth_process.join(timeout=180) # Set the timeout value in seconds + if teuth_process.is_alive(): + teuth_process.terminate() # Terminate the process if it exceeds the timeout + teuth_process.join() + raise TimeoutError("Process execution timed out") logs = "" with open(log_file, encoding="utf-8") as file: logs = file.readlines() if os.path.isfile(log_file): os.remove(log_file) - return logs + log.debug(logs) + if teuth_process.exitcode > 0: + e = teuth_queue.get() + log.error(e) + return "fail", e, 0 + else: + job_count = teuth_queue.get() + return "success", logs, job_count -def _execute_with_logs(func, args, log_file): +def _execute_with_logs(func, args, log_file, teuth_queue): """ To store logs, set a new FileHandler for teuthology root logger and then execute the command function. """ teuthology.setup_log_file(log_file) - func(args) + try: + job_count = func(args) + teuth_queue.put(job_count) + except Exception as e: + teuth_queue.put(e) + raise def get_run_details(run_name: str): diff --git a/src/teuthology_api/services/suite.py b/src/teuthology_api/services/suite.py index 99d2d6d..43506b1 100644 --- a/src/teuthology_api/services/suite.py +++ b/src/teuthology_api/services/suite.py @@ -20,30 +20,29 @@ def run(args, send_logs: bool, access_token: str): detail="You need to be logged in", headers={"WWW-Authenticate": "Bearer"}, ) - try: - args["--timestamp"] = datetime.now().strftime("%Y-%m-%d_%H:%M:%S") - - logs = logs_run(teuthology.suite.main, args) - - # get run details from paddles - run_name = make_run_name( - { - "machine_type": args["--machine-type"], - "user": args["--user"], - "timestamp": args["--timestamp"], - "suite": args["--suite"], - "ceph_branch": args["--ceph"], - "kernel_branch": args["--kernel"], - "flavor": args["--flavor"], - } - ) - run_details = get_run_details(run_name) - if send_logs or args["--dry-run"]: - return {"run": run_details, "logs": logs} - return {"run": run_details} - except Exception as exc: - log.error("teuthology.suite.main failed with the error: %s", repr(exc)) - raise HTTPException(status_code=500, detail=repr(exc)) from exc + args["--timestamp"] = datetime.now().strftime("%Y-%m-%d_%H:%M:%S") + status, logs, job_count = logs_run(teuthology.suite.main, args) + if status == "fail": + raise logs + if args["--dry-run"] or job_count < 1: + return {"run": {}, "logs": logs, "job_count": job_count} + # get run details from paddles + run_name = make_run_name( + { + "machine_type": args["--machine-type"], + "user": args["--user"], + "timestamp": args["--timestamp"], + "suite": args["--suite"], + "ceph_branch": args["--ceph"], + "kernel_branch": args["--kernel"], + "flavor": args["--flavor"], + } + ) + run_details = get_run_details(run_name) + if send_logs: + return {"run": run_details, "logs": logs, "job_count": job_count} + else: + return {"run": run_details, "job_count": job_count} def make_run_name(run_dic): From 651c46962fc9049bc29b6a494bb021b09285d640 Mon Sep 17 00:00:00 2001 From: Kamoltat Sirivadhna Date: Wed, 13 Mar 2024 17:54:34 -0400 Subject: [PATCH 2/3] .gitignore: Add .env to gitignore Signed-off-by: Kamoltat Sirivadhna --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3d98dbd..cffc62c 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,4 @@ MANIFEST .conda*/ .python-version venv +.env From 8df1c86a7822ad3b2f74b713d74f132eaa3686c0 Mon Sep 17 00:00:00 2001 From: Kamoltat Sirivadhna Date: Fri, 10 May 2024 15:52:49 -0400 Subject: [PATCH 3/3] tests/test_suite.py modify test_suite_run_success Added job counts to unittest. Signed-off-by: Kamoltat Sirivadhna --- tests/test_suite.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/test_suite.py b/tests/test_suite.py index ab2f6f7..6f6841f 100644 --- a/tests/test_suite.py +++ b/tests/test_suite.py @@ -2,7 +2,7 @@ from teuthology_api.main import app from unittest.mock import patch from teuthology_api.services.helpers import get_token -from teuthology_api.services.suite import make_run_name, get_run_details +from teuthology_api.services.suite import make_run_name import json client = TestClient(app) @@ -29,15 +29,18 @@ async def override_get_token(): } # suite + + @patch("teuthology_api.services.suite.logs_run") @patch("teuthology_api.routes.suite.get_username") @patch("teuthology_api.services.suite.get_run_details") def test_suite_run_success(m_get_run_details, m_get_username, m_logs_run): + m_logs_run.return_value = ("success", "example logs", 1) m_get_username.return_value = "user1" m_get_run_details.return_value = {"id": "7451978", "user": "user1"} response = client.post("/suite", data=json.dumps(mock_suite_args)) assert response.status_code == 200 - assert response.json() == {"run": {"id": "7451978", "user": "user1"}} + assert response.json() == {"run": {"id": "7451978", "user": "user1"}, "job_count": 1} # make_run_name