From b33978bb81cd38ade0c624ea0803ccbe8bcdef54 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Tue, 16 Mar 2021 01:55:57 -0400 Subject: [PATCH 1/2] config.py.in: use SERIALIZABLE isolation level for the db By default postgres uses the READ COMMITTED isolation level, which allows transactions to read data that may be modified concurrently by another transaction [0]. When teuthology queues a job, it adds it to beanstalkd, then reports that new job id to paddles. In handling this request, paddles: 1) checks for a job with the same id 2) adds a new row in the jobs table for this job On the server side, when teuthology-dispatcher reads from the queue, it also updates the job status in paddles, going through the same request handling as above. When both requests are run simultaneously, both requests may do step (1) and record no existing job, then both proceed to step (2) and add a new row. With SERIALIZABLE isolation, this is impossible - the 2nd transaction will be aborted when the job is added by the 1st transaction. This makes DB operations easier to reason about, and since this db is not performance-sensitive, is a reasonable default for all transactions. This may also address some races we've seen with multiple jobs thinking they locked the same machine. I don't have evidence of that race, though it seems possible based on the implementation of the nodes model. Both node locking and job updating APIs retry on the client side, so a transaction hitting a conflict and returning an error shouldn't cause any issues. The result of this race adding jobs is duplicate DB records for each job like these: SELECT job_id, posted, updated, status FROM jobs WHERE run_id = X AND job_id IN (SELECT job_id FROM jobs WHERE run_id = X AND status = 'queued') ORDER BY posted ASC; job_id | posted | updated | status ---------+----------------------------+----------------------------+-------- 5931205 | 2021-03-03 20:53:52.124034 | 2021-03-03 20:53:52.124047 | queued 5931205 | 2021-03-03 20:53:52.131244 | 2021-03-03 22:01:12.236465 | pass 5931209 | 2021-03-03 20:53:54.146957 | 2021-03-03 20:53:54.146968 | queued 5931209 | 2021-03-03 20:53:54.147755 | 2021-03-03 21:52:56.254025 | pass 5931223 | 2021-03-03 20:54:00.96613 | 2021-03-03 20:54:00.966142 | queued 5931223 | 2021-03-03 20:54:00.971739 | 2021-03-03 22:03:12.814133 | pass 5931236 | 2021-03-03 20:54:07.313768 | 2021-03-03 20:54:07.313778 | queued 5931236 | 2021-03-03 20:54:07.319185 | 2021-03-04 04:54:00.689333 | pass 5931264 | 2021-03-03 20:54:20.714723 | 2021-03-03 20:54:20.714736 | queued 5931264 | 2021-03-03 20:54:20.715809 | 2021-03-03 21:28:38.805917 | pass 5931273 | 2021-03-03 20:54:25.00497 | 2021-03-03 20:54:25.004981 | queued 5931273 | 2021-03-03 20:54:25.019827 | 2021-03-03 21:29:41.890552 | pass 5931277 | 2021-03-03 20:54:26.963148 | 2021-03-03 20:54:26.96316 | queued 5931277 | 2021-03-03 20:54:26.965546 | 2021-03-03 21:20:27.454767 | pass 5931286 | 2021-03-03 20:54:31.254743 | 2021-03-03 20:54:31.254755 | queued 5931286 | 2021-03-03 20:54:31.258285 | 2021-03-03 21:45:30.325423 | pass 5931304 | 2021-03-03 20:54:39.934984 | 2021-03-03 20:54:39.934996 | queued 5931304 | 2021-03-03 20:54:39.935474 | 2021-03-03 22:41:53.914094 | pass 5931317 | 2021-03-03 20:54:46.290555 | 2021-03-03 20:54:46.290566 | queued 5931317 | 2021-03-03 20:54:46.294983 | 2021-03-03 22:12:38.405483 | pass 5931322 | 2021-03-03 20:54:48.68503 | 2021-03-03 20:54:48.685043 | queued 5931322 | 2021-03-03 20:54:48.694535 | 2021-03-03 22:01:12.71814 | pass 5931326 | 2021-03-03 20:54:51.643081 | 2021-03-03 20:54:51.643105 | queued 5931326 | 2021-03-03 20:54:51.670604 | 2021-03-03 22:09:21.44187 | pass 5931331 | 2021-03-03 20:54:54.922704 | 2021-03-03 20:54:54.922716 | queued 5931331 | 2021-03-03 20:54:54.925522 | 2021-03-03 22:00:25.523793 | pass 5931347 | 2021-03-03 20:55:03.562209 | 2021-03-03 20:55:03.56222 | queued 5931347 | 2021-03-03 20:55:03.564115 | 2021-03-03 21:58:32.887666 | pass 5931356 | 2021-03-03 20:55:07.922281 | 2021-03-03 20:55:07.922293 | queued 5931356 | 2021-03-03 20:55:07.922722 | 2021-03-03 21:57:17.223546 | pass 5931365 | 2021-03-03 20:55:12.282366 | 2021-03-03 21:52:59.552419 | fail 5931365 | 2021-03-03 20:55:12.31297 | 2021-03-03 20:55:12.31298 | queued 5931379 | 2021-03-03 20:55:19.008472 | 2021-03-03 22:04:57.608941 | pass 5931379 | 2021-03-03 20:55:19.019315 | 2021-03-03 20:55:19.019328 | queued 5931380 | 2021-03-03 20:55:19.505256 | 2021-03-03 20:55:19.505267 | queued 5931380 | 2021-03-03 20:55:19.525025 | 2021-03-03 21:53:02.772712 | pass Note that sometimes the queued job is posted first, and sometimes the job with the correctly updated status is first, demonstrating the race between teuthology-schedule and teuthology-dispatcher both adding the job to paddles. There is a secondary race that needs to be fixed in teuthology, to prevent teuthology-schedule from updating the status to 'queued' after the job has begun. Pulpito displays these jobs as queued on the queue page [1], and the run view [2], but as passed or failed on the individual job page [3], causing confusion about the actual state of the queue. [0] https://www.postgresql.org/docs/9.5/transaction-iso.html [1] https://pulpito.ceph.com/queue/ [2] https://pulpito.ceph.com/jdillaman-2021-03-03_15:53:41-rbd-wip-jd-testing-pacific-testing-basic-smithi/ [3] https://pulpito.ceph.com/jdillaman-2021-03-03_15:53:41-rbd-wip-jd-testing-pacific-testing-basic-smithi/5931205/ Signed-off-by: Josh Durgin --- config.py.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config.py.in b/config.py.in index 31dccd2..eb87a4f 100644 --- a/config.py.in +++ b/config.py.in @@ -84,5 +84,6 @@ sqlalchemy = { 'echo': True, 'echo_pool': True, 'pool_recycle': 3600, - 'encoding': 'utf-8' + 'encoding': 'utf-8', + 'isolation_level': 'SERIALIZABLE', # required for correct job reporting } From 843d82a257ee309041062fa9798f23b08e38e3e8 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Sun, 21 Mar 2021 03:32:48 -0400 Subject: [PATCH 2/2] tests: add reproducer for job creation/update race condition This requires using postgres or another DB that defaults to non-serializable isolation, and running paddles via gunicorn with multiple workers and keeping postgres in non-serializable mode. This setup reproduces multiple rows for the same job every time for me. Inspired by test_nodes_race.py. Signed-off-by: Josh Durgin --- paddles/tests/controllers/test_jobs_race.py | 103 ++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 paddles/tests/controllers/test_jobs_race.py diff --git a/paddles/tests/controllers/test_jobs_race.py b/paddles/tests/controllers/test_jobs_race.py new file mode 100644 index 0000000..614a405 --- /dev/null +++ b/paddles/tests/controllers/test_jobs_race.py @@ -0,0 +1,103 @@ +try: + from queue import Queue +except: + from Queue import Queue + +from datetime import datetime +import threading +import requests +import json +import pytest +import time +import random + +base_uri = 'http://localhost:8080' + + +class TestJobsControllerRace(object): + def setup_class(cls): + try: + assert requests.get(base_uri + '/runs/').ok + except Exception: + pytest.skip("Cannot find paddles server; skipping") + + def setup_method(self, meth): + self.run = 'test-jobs-' + datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f') + resp = requests.post(base_uri + '/runs/', + data=json.dumps(dict(name=self.run)), + headers={'content-type': 'application/json'}) + resp.raise_for_status() + + def teardown_method(self, meth): + resp = requests.delete(base_uri + '/runs/' + self.run + '/') + resp.raise_for_status() + + def test_job_create_threaded(self): + + def job_update(job_id, status): + job_data = dict(job_id=job_id, status=status, run=self.run, name=str(job_id)) + headers = {'content-type': 'application/json'} + run_uri = base_uri + '/runs/' + self.run + '/jobs/' + + attempts = 5 + while attempts > 0: + response = requests.post( + run_uri, + data=json.dumps(job_data), + headers=headers, + ) + print(job_data, response.status_code, response.text) + try: + resp_json = response.json() + except ValueError: + resp_json = dict() + + if resp_json: + msg = resp_json.get('message', '') + else: + msg = response.text + + if msg and msg.endswith('already exists'): + response = requests.put( + run_uri + str(job_id) + '/', + data=json.dumps(job_data), + headers=headers, + ) + if response.ok: + break + print(f'Raced updating job {job_id} status, retries left: {attempts}') + attempts -= 1 + time.sleep(random.uniform(0, 1)) + response.raise_for_status() + + queue.put(dict(text=response.text, + status_int=response.status_code)) + + jobs = [] + queue = Queue() + job_ids = list(range(1, 10)) + for job_id in job_ids: + for status in ['queued', 'queued', 'running', 'running']: + jobs.append(threading.Thread(target=job_update, args=(job_id, status))) + for job in jobs: + job.start() + for job in jobs: + job.join() + results = [] + while not queue.empty(): + results.append(queue.get()) + statuses = [result['status_int'] for result in results] + assert statuses.count(200) == len(statuses) + + # check for duplicate rows by looking at status in at the run + # and job level - this can be different when there are + # duplicate rows created due to a race + resp = requests.get(base_uri + '/runs/' + self.run + '/jobs/') + assert resp.status_code == 200 + for job in resp.json(): + job_id = job['job_id'] + job_status_in_run = job['status'] + job_resp = requests.get(base_uri + '/runs/' + self.run + '/jobs/' + str(job_id) + '/') + assert job_resp.status_code == 200 + assert job_resp.json()['status'] == job_status_in_run + assert len(resp.json()) == len(job_ids)