From 850ba1d4d7ac788a90c63e0d04a3b8bfb8f48ada Mon Sep 17 00:00:00 2001 From: Marius Kittler Date: Wed, 6 Nov 2024 14:33:46 +0100 Subject: [PATCH 1/2] Improve code for processing cloned jobs * Avoid having two times the same identical loop * Use better variable names --- lib/OpenQA/Schema/Result/Jobs.pm | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lib/OpenQA/Schema/Result/Jobs.pm b/lib/OpenQA/Schema/Result/Jobs.pm index 0e3f112e3c5..4a1e85c604f 100644 --- a/lib/OpenQA/Schema/Result/Jobs.pm +++ b/lib/OpenQA/Schema/Result/Jobs.pm @@ -693,13 +693,12 @@ sub _create_clones ($self, $jobs, $comments, $comment_text, $comment_user_id, @c $res->register_assets_from_settings; } - # calculate blocked_by - $clones{$_}->calculate_blocked_by for @original_job_ids; - - # add a reference to the clone within $jobs - for my $job (@original_job_ids) { - my $clone = $clones{$job}; - $jobs->{$job}->{clone} = $clone->id if $clone; + for my $original_job_id (@original_job_ids) { + my $cloned_job = $clones{$original_job_id}; + # calculate blocked_by + $cloned_job->calculate_blocked_by; + # add a reference to the clone within $jobs + $jobs->{$original_job_id}->{clone} = $cloned_job->id; } # create comments on original jobs From 2dea55e2b320c88cd6e0ac732cb07f7152ac0e80 Mon Sep 17 00:00:00 2001 From: Marius Kittler Date: Wed, 6 Nov 2024 14:45:03 +0100 Subject: [PATCH 2/2] Fix enqueueing of Minion jobs breaking `PARALLEL_ONE_HOST_ONLY=1` When restarting openQA jobs, additional Minion jobs are enqueued (of `git_clone` task, this is happening especially often when `git_auto_clone` is enabled). So far this didn't happen in a transaction. So the scheduler might see the openQA jobs but not the Minion jobs they are blocked by. This is problematic because the scheduler might assign jobs too soon to a worker. This is especially problematic if it does not happen consistently across a parallel job cluster as it then breaks the `PARALLEL_ONE_HOST_ONLY=1` feature. Related ticket: https://progress.opensuse.org/issues/169342 --- lib/OpenQA/Resource/Jobs.pm | 6 ------ lib/OpenQA/Schema/Result/Jobs.pm | 11 +++++++++-- lib/OpenQA/Shared/Plugin/Gru.pm | 2 +- t/api/04-jobs.t | 5 +++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lib/OpenQA/Resource/Jobs.pm b/lib/OpenQA/Resource/Jobs.pm index 83b67942ff7..65b1a1cc978 100644 --- a/lib/OpenQA/Resource/Jobs.pm +++ b/lib/OpenQA/Resource/Jobs.pm @@ -7,7 +7,6 @@ use Mojo::Base -strict, -signatures; use OpenQA::Jobs::Constants; use OpenQA::Schema; -use OpenQA::Utils qw(create_git_clone_list); use Exporter 'import'; our @EXPORT_OK = qw(job_restart); @@ -50,8 +49,6 @@ sub job_restart ($jobids, %args) { my $jobs_rs = $schema->resultset('Jobs'); my $jobs = $jobs_rs->search({id => $jobids, state => {'not in' => [PRISTINE_STATES]}}); $duplication_args{no_directly_chained_parent} = 1 unless $force; - my %clones; - my @clone_ids; while (my $job = $jobs->next) { my $job_id = $job->id; my $missing_assets = $job->missing_assets; @@ -76,10 +73,8 @@ sub job_restart ($jobids, %args) { my $cloned_job_or_error = $job->auto_duplicate(\%duplication_args); if (ref $cloned_job_or_error) { - create_git_clone_list($job->settings_hash, \%clones); push @duplicates, $cloned_job_or_error->{cluster_cloned}; push @comments, @{$cloned_job_or_error->{comments_created}}; - push @clone_ids, $cloned_job_or_error->{cluster_cloned}->{$job_id}; } else { $res{enforceable} = 1 if index($cloned_job_or_error, 'Direct parent ') == 0; @@ -87,7 +82,6 @@ sub job_restart ($jobids, %args) { } push @processed, $job_id; } - OpenQA::App->singleton->gru->enqueue_git_clones(\%clones, \@clone_ids) if keys %clones; # abort running jobs return \%res if $args{skip_aborting_jobs}; diff --git a/lib/OpenQA/Schema/Result/Jobs.pm b/lib/OpenQA/Schema/Result/Jobs.pm index 4a1e85c604f..e609578a414 100644 --- a/lib/OpenQA/Schema/Result/Jobs.pm +++ b/lib/OpenQA/Schema/Result/Jobs.pm @@ -11,7 +11,7 @@ use DateTime; use OpenQA::Constants qw(WORKER_COMMAND_ABORT WORKER_COMMAND_CANCEL); use OpenQA::Log qw(log_trace log_debug log_info log_warning log_error); use OpenQA::Utils ( - qw(parse_assets_from_settings locate_asset), + qw(create_git_clone_list parse_assets_from_settings locate_asset), qw(resultdir assetdir read_test_modules find_bugref random_string), qw(run_cmd_with_log_return_error needledir testcasedir gitrepodir find_video_files) ); @@ -693,18 +693,25 @@ sub _create_clones ($self, $jobs, $comments, $comment_text, $comment_user_id, @c $res->register_assets_from_settings; } + my %git_clones; + my @clone_ids; for my $original_job_id (@original_job_ids) { my $cloned_job = $clones{$original_job_id}; # calculate blocked_by $cloned_job->calculate_blocked_by; # add a reference to the clone within $jobs - $jobs->{$original_job_id}->{clone} = $cloned_job->id; + push @clone_ids, $jobs->{$original_job_id}->{clone} = $cloned_job->id; + # add Git repositories to clone + create_git_clone_list($cloned_job->settings_hash, \%git_clones); } # create comments on original jobs $result_source->schema->resultset('Comments') ->create_for_jobs(\@original_job_ids, $comment_text, $comment_user_id, $comments) if defined $comment_text; + + # enqueue Minion jobs to clone required Git repositories + OpenQA::App->singleton->gru->enqueue_git_clones(\%clones, \@clone_ids); } # internal (recursive) function for duplicate - returns hash of all jobs in the diff --git a/lib/OpenQA/Shared/Plugin/Gru.pm b/lib/OpenQA/Shared/Plugin/Gru.pm index 5ac31a611bd..f12fd54dba5 100644 --- a/lib/OpenQA/Shared/Plugin/Gru.pm +++ b/lib/OpenQA/Shared/Plugin/Gru.pm @@ -249,7 +249,7 @@ sub enqueue_git_update_all ($self) { } sub enqueue_git_clones ($self, $clones, $job_ids) { - return unless %$clones; + return unless keys %$clones; return unless OpenQA::App->singleton->config->{'scm git'}->{git_auto_clone} eq 'yes'; # $clones is a hashref with paths as keys and git urls as values # $job_id is used to create entries in a related table (gru_dependencies) diff --git a/t/api/04-jobs.t b/t/api/04-jobs.t index 4a2b879c7b1..67b4e1e61f7 100644 --- a/t/api/04-jobs.t +++ b/t/api/04-jobs.t @@ -62,6 +62,7 @@ $ENV{MOJO_MAX_MESSAGE_SIZE} = 207741824; my $t = client(Test::Mojo->new('OpenQA::WebAPI')); my $cfg = $t->app->config; +$cfg->{'scm git'}->{git_auto_clone} = 'no'; $cfg->{'scm git'}->{git_auto_update} = 'no'; is $cfg->{audit}->{blocklist}, 'job_grab', 'blocklist updated'; @@ -1614,7 +1615,7 @@ subtest 'handle FOO_URL' => sub { }; subtest 'handle git_clone with CASEDIR' => sub { - OpenQA::App->singleton->config->{'scm git'}->{git_auto_clone} = 'yes'; + $cfg->{'scm git'}->{git_auto_clone} = 'yes'; $testsuites->create( { name => 'handle_foo_casedir', @@ -1645,7 +1646,7 @@ subtest 'handle git_clone with CASEDIR' => sub { }; subtest 'handle git_clone without CASEDIR' => sub { - OpenQA::App->singleton->config->{'scm git'}->{git_auto_update} = 'yes'; + $cfg->{'scm git'}->{git_auto_update} = 'yes'; $testsuites->create( { name => 'handle_git_clone',