diff --git a/ApiController.cs b/ApiController.cs index 6e26b12..854ea45 100644 --- a/ApiController.cs +++ b/ApiController.cs @@ -13,6 +13,22 @@ public async Task GetRunners() var recentRunners = await db.Runners.Include(x => x.Lifecycle).OrderByDescending(x => x.RunnerId).Take(100).ToListAsync(); return Results.Json(recentRunners); } + + [Route("get-jobs")] + public async Task GetJobs() + { + var db = new ActionsRunnerContext(); + var recentRunners = await db.Jobs.OrderByDescending(x => x.JobId).Take(100).ToListAsync(); + return Results.Json(recentRunners); + } + + [Route("get-runner/{runnerid}")] + public async Task GetRunner(int runnerid) + { + var db = new ActionsRunnerContext(); + var runner = await db.Runners.Include(x => x.Lifecycle).FirstOrDefaultAsync(x => x.RunnerId == runnerid); + return Results.Json(runner); + } [Route("get-job/{jobid}")] public async Task GetJob(int jobid) @@ -22,6 +38,27 @@ public async Task GetJob(int jobid) return Results.Json(job); } + + [Route("get-potential-runners/{jobId}")] + public async Task GetPotentialRunners(int jobId) + { + var db = new ActionsRunnerContext(); + var job = await db.Jobs.FirstOrDefaultAsync(x => x.JobId == jobId); + + // get labels + string size = job.RequestedSize; + string owner = job.Owner; + string profile = job.RequestedProfile; + + var potentialRunners = db.Runners + .Include(x => x.Lifecycle) + .Where(x => x.Size == size && x.Owner == owner && x.Profile == profile) + .AsEnumerable() + .Where(x => x.LastState == RunnerStatus.Created || x.LastState == RunnerStatus.Provisioned) + .ToList(); + + return Results.Json(potentialRunners); + } } \ No newline at end of file diff --git a/Database/DbContext.cs b/Database/DbContext.cs index eff59a9..bf8989f 100644 --- a/Database/DbContext.cs +++ b/Database/DbContext.cs @@ -123,4 +123,6 @@ public class Job public int? RunnerId { get; set; } public Runner Runner { get; set; } public bool Orphan { get; set; } + public string RequestedProfile { get; set; } + public string RequestedSize { get; set; } } diff --git a/Migrations/20240729091932_jobadditions.Designer.cs b/Migrations/20240729091932_jobadditions.Designer.cs new file mode 100644 index 0000000..68c8992 --- /dev/null +++ b/Migrations/20240729091932_jobadditions.Designer.cs @@ -0,0 +1,188 @@ +// +using System; +using GithubActionsOrchestrator.Database; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; +using Npgsql.EntityFrameworkCore.PostgreSQL.Metadata; + +#nullable disable + +namespace GithubActionsOrchestrator.Migrations +{ + [DbContext(typeof(ActionsRunnerContext))] + [Migration("20240729091932_jobadditions")] + partial class jobadditions + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder + .HasAnnotation("ProductVersion", "8.0.7") + .HasAnnotation("Relational:MaxIdentifierLength", 63); + + NpgsqlModelBuilderExtensions.UseIdentityByDefaultColumns(modelBuilder); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.Job", b => + { + b.Property("JobId") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("JobId")); + + b.Property("CompleteTime") + .HasColumnType("timestamp with time zone"); + + b.Property("GithubJobId") + .HasColumnType("bigint"); + + b.Property("InProgressTime") + .HasColumnType("timestamp with time zone"); + + b.Property("JobUrl") + .HasColumnType("text"); + + b.Property("Orphan") + .HasColumnType("boolean"); + + b.Property("Owner") + .HasColumnType("text"); + + b.Property("QueueTime") + .HasColumnType("timestamp with time zone"); + + b.Property("Repository") + .HasColumnType("text"); + + b.Property("RequestedProfile") + .HasColumnType("text"); + + b.Property("RequestedSize") + .HasColumnType("text"); + + b.Property("RunnerId") + .HasColumnType("integer"); + + b.Property("State") + .HasColumnType("integer"); + + b.HasKey("JobId"); + + b.HasIndex("RunnerId") + .IsUnique(); + + b.ToTable("Jobs"); + }); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.Runner", b => + { + b.Property("RunnerId") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("RunnerId")); + + b.Property("Arch") + .HasColumnType("text"); + + b.Property("Cloud") + .HasColumnType("text"); + + b.Property("CloudServerId") + .HasColumnType("bigint"); + + b.Property("Hostname") + .HasColumnType("text"); + + b.Property("IPv4") + .HasColumnType("text"); + + b.Property("IsCustom") + .HasColumnType("boolean"); + + b.Property("IsOnline") + .HasColumnType("boolean"); + + b.Property("JobId") + .HasColumnType("integer"); + + b.Property("Owner") + .HasColumnType("text"); + + b.Property("Profile") + .HasColumnType("text"); + + b.Property("Size") + .HasColumnType("text"); + + b.HasKey("RunnerId"); + + b.HasIndex("JobId") + .IsUnique(); + + b.ToTable("Runners"); + }); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.RunnerLifecycle", b => + { + b.Property("RunnerLifecycleId") + .ValueGeneratedOnAdd() + .HasColumnType("integer"); + + NpgsqlPropertyBuilderExtensions.UseIdentityByDefaultColumn(b.Property("RunnerLifecycleId")); + + b.Property("Event") + .HasColumnType("text"); + + b.Property("EventTimeUtc") + .HasColumnType("timestamp with time zone"); + + b.Property("RunnerId") + .HasColumnType("integer"); + + b.Property("Status") + .HasColumnType("integer"); + + b.HasKey("RunnerLifecycleId"); + + b.HasIndex("RunnerId"); + + b.ToTable("RunnerLifecycles"); + }); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.Job", b => + { + b.HasOne("GithubActionsOrchestrator.Database.Runner", "Runner") + .WithOne() + .HasForeignKey("GithubActionsOrchestrator.Database.Job", "RunnerId"); + + b.Navigation("Runner"); + }); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.Runner", b => + { + b.HasOne("GithubActionsOrchestrator.Database.Job", "Job") + .WithOne() + .HasForeignKey("GithubActionsOrchestrator.Database.Runner", "JobId"); + + b.Navigation("Job"); + }); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.RunnerLifecycle", b => + { + b.HasOne("GithubActionsOrchestrator.Database.Runner", null) + .WithMany("Lifecycle") + .HasForeignKey("RunnerId"); + }); + + modelBuilder.Entity("GithubActionsOrchestrator.Database.Runner", b => + { + b.Navigation("Lifecycle"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/Migrations/20240729091932_jobadditions.cs b/Migrations/20240729091932_jobadditions.cs new file mode 100644 index 0000000..6242d74 --- /dev/null +++ b/Migrations/20240729091932_jobadditions.cs @@ -0,0 +1,38 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace GithubActionsOrchestrator.Migrations +{ + /// + public partial class jobadditions : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + migrationBuilder.AddColumn( + name: "RequestedProfile", + table: "Jobs", + type: "text", + nullable: true); + + migrationBuilder.AddColumn( + name: "RequestedSize", + table: "Jobs", + type: "text", + nullable: true); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + migrationBuilder.DropColumn( + name: "RequestedProfile", + table: "Jobs"); + + migrationBuilder.DropColumn( + name: "RequestedSize", + table: "Jobs"); + } + } +} diff --git a/Migrations/ActionsRunnerContextModelSnapshot.cs b/Migrations/ActionsRunnerContextModelSnapshot.cs index 3edf83d..4a230f5 100644 --- a/Migrations/ActionsRunnerContextModelSnapshot.cs +++ b/Migrations/ActionsRunnerContextModelSnapshot.cs @@ -54,6 +54,12 @@ protected override void BuildModel(ModelBuilder modelBuilder) b.Property("Repository") .HasColumnType("text"); + b.Property("RequestedProfile") + .HasColumnType("text"); + + b.Property("RequestedSize") + .HasColumnType("text"); + b.Property("RunnerId") .HasColumnType("integer"); diff --git a/PoolManager.cs b/PoolManager.cs index 61974b2..ffd2aa3 100644 --- a/PoolManager.cs +++ b/PoolManager.cs @@ -67,10 +67,9 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken) if (DateTime.UtcNow - crudeTimer > TimeSpan.FromMinutes(cullMinutes)) { _logger.LogInformation("Cleaning runners..."); - // update the world state for htz - allHtzSrvs = await _cc.GetAllServersFromCsp(); await CleanUpRunners(targetConfig); await StartPoolRunners(targetConfig); + await CheckForStuckJobs(targetConfig); crudeTimer = DateTime.UtcNow; } @@ -221,6 +220,65 @@ private async Task StartPoolRunners(List targetConfig } } + private async Task CheckForStuckJobs(List targetConfig) + { + var db = new ActionsRunnerContext(); + var stuckTime = DateTime.UtcNow - TimeSpan.FromMinutes(10); + var stuckJobs = db.Jobs.Where(x => x.RunnerId == null && x.QueueTime < stuckTime).AsEnumerable(); + foreach (var stuckJob in stuckJobs) + { + _logger.LogWarning($"Found stuck Job: {stuckJob.JobId} in {stuckJob.Repository}. Starting new runner to compensate..."); + + var owner = targetConfig.FirstOrDefault(x => x.Name == stuckJob.Owner); + if (owner == null) + { + _logger.LogError($"Unable to get owner for stuck job. {stuckJob.JobId}"); + continue; + } + + string runnerToken = owner.Target switch + { + TargetType.Repository => await GitHubApi.GetRunnerTokenForRepo(owner.GitHubToken, owner.Name), + TargetType.Organization => await GitHubApi.GetRunnerTokenForOrg(owner.GitHubToken, owner.Name), + _ => throw new ArgumentOutOfRangeException() + }; + var profile = stuckJob.RequestedProfile ?? "default"; + string arch = Program.Config.Sizes.FirstOrDefault(x => x.Name == stuckJob.RequestedSize)?.Arch; + Runner newRunner = new() + { + Size = stuckJob.RequestedSize, + Cloud = "htz", + Hostname = "Unknown", + Profile = profile, + Lifecycle = + [ + new RunnerLifecycle + { + EventTimeUtc = DateTime.UtcNow, + Status = RunnerStatus.CreationQueued, + Event = $"Created for stuck job {stuckJob.JobId}" + } + ], + IsOnline = false, + Arch = arch, + IPv4 = string.Empty, + IsCustom = profile != "default", + Owner = stuckJob.Owner + }; + await db.Runners.AddAsync(newRunner); + await db.SaveChangesAsync(); + + _queues.CreateTasks.Enqueue(new CreateRunnerTask + { + RunnerToken = runnerToken, + RepoName = stuckJob.Repository, + TargetType = owner.Target, + RunnerDbId = newRunner.RunnerId, + + }); + } + } + private async Task CleanUpRunners(List targetConfigs) { List registeredServerNames = new(); @@ -340,8 +398,18 @@ private async Task CleanUpRunners(List targetConfigs) } var runner = await db.Runners.Include(x => x.Lifecycle).FirstOrDefaultAsync(x => x.CloudServerId == htzSrv.Id); - - if (runner.LastState >= RunnerStatus.Provisioned && DateTime.UtcNow - runner.LastStateTime > TimeSpan.FromMinutes(5)) + if (runner.Lifecycle.Any(x => x.Status == RunnerStatus.DeletionQueued)) + { + runner.Lifecycle.Add(new() + { + Status = RunnerStatus.DeletionQueued, + Event = "Don't queue deletion due to Github registration. Runner already queued for deletion.", + EventTimeUtc = DateTime.UtcNow + }); + await db.SaveChangesAsync(); + + } + else if (runner.LastState >= RunnerStatus.Provisioned && DateTime.UtcNow - runner.LastStateTime > TimeSpan.FromMinutes(5)) { _logger.LogInformation($"Removing VM that is not in any GitHub registration: {htzSrv.Name} created at {htzSrv.Created:u}"); runner.IsOnline = false; diff --git a/Program.cs b/Program.cs index e98a84e..b24af5e 100644 --- a/Program.cs +++ b/Program.cs @@ -213,19 +213,7 @@ private static async Task GithubWebhookHandler(HttpRequest request, [Fr switch (action) { case "queued": - Job queuedJob = new() - { - GithubJobId = jobId, - Repository = repoName, - Owner = orgName, - State = JobState.Queued, - QueueTime = DateTime.UtcNow, - JobUrl = jobUrl, - Orphan = false - }; - await db.Jobs.AddAsync(queuedJob); - await db.SaveChangesAsync(); - await JobQueued(logger, repoName, labels, orgName, poolMgr, isRepo ? TargetType.Repository : TargetType.Organization); + await JobQueued(logger, repoName, labels, orgName, poolMgr, isRepo ? TargetType.Repository : TargetType.Organization, jobId, jobUrl); break; case "in_progress": var dbWorkflow = await db.Jobs.FirstOrDefaultAsync(x => x.GithubJobId == jobId); @@ -358,7 +346,7 @@ private static async Task AddRunnerManuallyHandler(HttpRequest request, try { - await JobQueued(logger, repoName, labels, orgName, poolMgr, isRepo ? TargetType.Repository : TargetType.Organization); + await JobQueued(logger, repoName, labels, orgName, poolMgr, isRepo ? TargetType.Repository : TargetType.Organization, -1, null); } catch (Exception ex) { @@ -454,7 +442,7 @@ private static async Task JobInProgress(JsonElement workflowJson, ILogger logger, string repoName, List labels, string orgName, RunnerQueue poolMgr, TargetType targetType) + private static async Task JobQueued(ILogger logger, string repoName, List labels, string orgName, RunnerQueue poolMgr, TargetType targetType, long jobId, string jobUrl) { logger.LogInformation($"New Workflow Job was queued for {repoName}. Queuing VM creation to replenish pool..."); @@ -530,8 +518,31 @@ private static async Task JobQueued(ILogger logger, string repoName, Li return; } + string owner = targetType switch + { + TargetType.Organization => orgName, + TargetType.Repository => repoName, + _ => throw new ArgumentOutOfRangeException(nameof(targetType), targetType, null) + }; // Record runner to database await using var db = new ActionsRunnerContext(); + if (jobId > 0) + { + Job queuedJob = new() + { + GithubJobId = jobId, + Repository = repoName, + Owner = owner, + State = JobState.Queued, + QueueTime = DateTime.UtcNow, + JobUrl = jobUrl, + Orphan = false, + RequestedProfile = profileName, + RequestedSize = size + }; + await db.Jobs.AddAsync(queuedJob); + } + Runner newRunner = new() { Size = size, @@ -551,7 +562,7 @@ private static async Task JobQueued(ILogger logger, string repoName, Li Arch = arch, IPv4 = string.Empty, IsCustom = isCustom, - Owner = orgName + Owner = owner }; await db.Runners.AddAsync(newRunner);