From 935cb7eaa62ad6198118892905c289e4e52aecf9 Mon Sep 17 00:00:00 2001 From: Aditya Choudhari <48932219+adityachoudhari26@users.noreply.github.com> Date: Tue, 7 Jan 2025 16:01:08 -0800 Subject: [PATCH] fix: Deployment timeouts (#278) --- apps/jobs/src/index.ts | 23 +- apps/jobs/src/timeout-checker/index.ts | 34 + .../EditDeploymentSection.tsx | 73 +- packages/db/drizzle/0051_brown_gambit.sql | 1 + packages/db/drizzle/meta/0051_snapshot.json | 4431 +++++++++++++++++ packages/db/drizzle/meta/_journal.json | 7 + packages/db/src/schema/deployment.ts | 8 + 7 files changed, 4556 insertions(+), 21 deletions(-) create mode 100644 apps/jobs/src/timeout-checker/index.ts create mode 100644 packages/db/drizzle/0051_brown_gambit.sql create mode 100644 packages/db/drizzle/meta/0051_snapshot.json diff --git a/apps/jobs/src/index.ts b/apps/jobs/src/index.ts index cb2c17d98..1af9bceb2 100644 --- a/apps/jobs/src/index.ts +++ b/apps/jobs/src/index.ts @@ -17,16 +17,12 @@ import { logger } from "@ctrlplane/logger"; import { run as expiredEnvChecker } from "./expired-env-checker/index.js"; import { run as jobPolicyChecker } from "./policy-checker/index.js"; +import { run as timeoutChecker } from "./timeout-checker/index.js"; const jobs: Record Promise; schedule: string }> = { - "policy-checker": { - run: jobPolicyChecker, - schedule: "* * * * *", // Default: Every minute - }, - "expired-env-checker": { - run: expiredEnvChecker, - schedule: "* * * * *", // Default: Every minute - }, + "policy-checker": { run: jobPolicyChecker, schedule: "* * * * *" }, + "expired-env-checker": { run: expiredEnvChecker, schedule: "* * * * *" }, + "timeout-checker": { run: timeoutChecker, schedule: "* * * * *" }, }; const jobSchema = z.object({ @@ -37,15 +33,8 @@ const jobSchema = z.object({ const parseJobArgs = () => { const { values } = parseArgs({ options: { - job: { - type: "string", - short: "j", - multiple: true, - }, - runOnce: { - type: "boolean", - short: "r", - }, + job: { type: "string", short: "j", multiple: true }, + runOnce: { type: "boolean", short: "r" }, }, }); return jobSchema.parse(values); diff --git a/apps/jobs/src/timeout-checker/index.ts b/apps/jobs/src/timeout-checker/index.ts new file mode 100644 index 000000000..6ef421824 --- /dev/null +++ b/apps/jobs/src/timeout-checker/index.ts @@ -0,0 +1,34 @@ +import { and, eq, isNotNull, lt, sql } from "@ctrlplane/db"; +import { db } from "@ctrlplane/db/client"; +import * as SCHEMA from "@ctrlplane/db/schema"; +import { updateJob } from "@ctrlplane/job-dispatch"; +import { JobStatus } from "@ctrlplane/validators/jobs"; + +export const run = async () => + db + .select({ id: SCHEMA.job.id }) + .from(SCHEMA.deployment) + .innerJoin( + SCHEMA.release, + eq(SCHEMA.release.deploymentId, SCHEMA.deployment.id), + ) + .innerJoin( + SCHEMA.releaseJobTrigger, + eq(SCHEMA.releaseJobTrigger.releaseId, SCHEMA.release.id), + ) + .innerJoin(SCHEMA.job, eq(SCHEMA.releaseJobTrigger.jobId, SCHEMA.job.id)) + .where( + and( + isNotNull(SCHEMA.deployment.timeout), + eq(SCHEMA.job.status, JobStatus.InProgress), + lt( + SCHEMA.job.createdAt, + sql`now() - ${SCHEMA.deployment.timeout} * interval '1 second'`, + ), + ), + ) + .then(async (jobs) => { + await Promise.all( + jobs.map((job) => updateJob(job.id, { status: JobStatus.Failure })), + ); + }); diff --git a/apps/webservice/src/app/[workspaceSlug]/(app)/systems/[systemSlug]/deployments/[deploymentSlug]/EditDeploymentSection.tsx b/apps/webservice/src/app/[workspaceSlug]/(app)/systems/[systemSlug]/deployments/[deploymentSlug]/EditDeploymentSection.tsx index 1a875158a..2c4e2a10a 100644 --- a/apps/webservice/src/app/[workspaceSlug]/(app)/systems/[systemSlug]/deployments/[deploymentSlug]/EditDeploymentSection.tsx +++ b/apps/webservice/src/app/[workspaceSlug]/(app)/systems/[systemSlug]/deployments/[deploymentSlug]/EditDeploymentSection.tsx @@ -2,7 +2,9 @@ import type { RouterOutputs } from "@ctrlplane/api"; import { useParams, useRouter } from "next/navigation"; -import { IconX } from "@tabler/icons-react"; +import { IconInfoCircle, IconX } from "@tabler/icons-react"; +import ms from "ms"; +import prettyMilliseconds from "pretty-ms"; import { z } from "zod"; import * as SCHEMA from "@ctrlplane/db/schema"; @@ -26,6 +28,12 @@ import { SelectValue, } from "@ctrlplane/ui/select"; import { Textarea } from "@ctrlplane/ui/textarea"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@ctrlplane/ui/tooltip"; import { defaultCondition, isEmptyCondition, @@ -35,7 +43,29 @@ import { ResourceConditionRender } from "~/app/[workspaceSlug]/(app)/_components import { api } from "~/trpc/react"; import { DeploymentResourcesDialog } from "./DeploymentResourcesDialog"; -const schema = z.object(SCHEMA.deploymentSchema.shape); +const timeoutSchema = z + .string() + .optional() + .refine((val) => { + if (val == null || val === "") return true; + try { + ms(val); + return true; + } catch { + return false; + } + }, "Invalid timeout, must be a valid duration string") + .refine((val) => { + if (val == null || val === "") return true; + const timeout = ms(val); + if (timeout < 1000) return false; + return true; + }, "Timeout must be at least 1 second"); + +const schema = z + .object(SCHEMA.deploymentSchema.shape) + .omit({ timeout: true }) + .extend({ timeout: timeoutSchema }); type System = RouterOutputs["system"]["list"]["items"][number]; @@ -58,7 +88,11 @@ export const EditDeploymentSection: React.FC = ({ .map((e) => ({ ...e, resourceFilter: e.resourceFilter! })) ?? []; const resourceFilter = deployment.resourceFilter ?? undefined; - const defaultValues = { ...deployment, resourceFilter }; + const timeout = + deployment.timeout != null + ? prettyMilliseconds(deployment.timeout) + : undefined; + const defaultValues = { ...deployment, resourceFilter, timeout }; const form = useForm({ schema, defaultValues, mode: "onSubmit" }); const { handleSubmit, setError } = form; @@ -70,7 +104,12 @@ export const EditDeploymentSection: React.FC = ({ data.resourceFilter == null || isEmptyCondition(data.resourceFilter) ? null : data.resourceFilter; - const updates = { ...data, resourceFilter: filter }; + const timeout = + data.timeout != null && data.timeout !== "" + ? ms(data.timeout) / 1000 + : null; + const updates = { ...data, resourceFilter: filter, timeout }; + updateDeployment .mutateAsync({ id: deployment.id, data: updates }) .then((updatedDeployment) => { @@ -194,6 +233,32 @@ export const EditDeploymentSection: React.FC = ({ )} /> + ( + + + Timeout + + + + + + + If a job for this deployment takes longer than the + timeout, it will be marked as failed. + + + + + + + + + + )} + /> val >= 0, { message: "Retry count must be a non-negative number.", }), + timeout: z + .number() + .nullable() + .default(null) + .refine((val) => val == null || val >= 0, { + message: "Timeout must be a non-negative number.", + }), resourceFilter: resourceCondition .nullable() .optional() @@ -69,6 +76,7 @@ export const deployment = pgTable( .$type>() .notNull(), retryCount: integer("retry_count").notNull().default(0), + timeout: integer("timeout").default(sql`NULL`), resourceFilter: jsonb("resource_filter") .$type() .default(sql`NULL`),