diff --git a/pkg/cmd/run.go b/pkg/cmd/run.go index 7d8aa5ae..c66e7c33 100644 --- a/pkg/cmd/run.go +++ b/pkg/cmd/run.go @@ -25,6 +25,11 @@ var ( ImageNameTag = "quay.io/iovisor/kubectl-trace-bpftrace:latest" // InitImageNameTag represents the default init container image InitImageNameTag = "quay.io/iovisor/kubectl-trace-init:latest" + // DefaultDeadline is the maximum time a tracejob is allowed to run, in seconds + DefaultDeadline = 3600 + // DefaultDeadlineGracePeriod is the maximum time to wait to print a map or histogram, in seconds + // note that it must account for startup time, as the deadline as based on start time + DefaultDeadlineGracePeriod = 30 ) var ( @@ -66,13 +71,15 @@ type RunOptions struct { explicitNamespace bool // Flags local to this command - container string - eval string - program string - serviceAccount string - imageName string - initImageName string - fetchHeaders bool + container string + eval string + program string + serviceAccount string + imageName string + initImageName string + fetchHeaders bool + deadline int64 + deadlineGracePeriod int64 resourceArg string attach bool @@ -88,9 +95,11 @@ func NewRunOptions(streams genericclioptions.IOStreams) *RunOptions { return &RunOptions{ IOStreams: streams, - serviceAccount: "default", - imageName: ImageNameTag, - initImageName: InitImageNameTag, + serviceAccount: "default", + imageName: ImageNameTag, + initImageName: InitImageNameTag, + deadline: int64(DefaultDeadline), + deadlineGracePeriod: int64(DefaultDeadlineGracePeriod), } } @@ -127,6 +136,8 @@ func NewRunCommand(factory factory.Factory, streams genericclioptions.IOStreams) cmd.Flags().StringVar(&o.imageName, "imagename", o.imageName, "Custom image for the tracerunner") cmd.Flags().StringVar(&o.initImageName, "init-imagename", o.initImageName, "Custom image for the init container responsible to fetch and prepare linux headers") cmd.Flags().BoolVar(&o.fetchHeaders, "fetch-headers", o.fetchHeaders, "Whether to fetch linux headers or not") + cmd.Flags().Int64Var(&o.deadline, "deadline", o.deadline, "Maximum time to allow trace to run in seconds") + cmd.Flags().Int64Var(&o.deadlineGracePeriod, "deadline-grace-period", o.deadlineGracePeriod, "Maximum wait time to print maps or histograms after deadline, in seconds") return cmd } @@ -289,19 +300,20 @@ func (o *RunOptions) Run() error { } tj := tracejob.TraceJob{ - Name: fmt.Sprintf("%s%s", meta.ObjectNamePrefix, string(juid)), - Namespace: o.namespace, - ServiceAccount: o.serviceAccount, - ID: juid, - Hostname: o.nodeName, - Program: o.program, - PodUID: o.podUID, - ContainerName: o.container, - IsPod: o.isPod, - // todo(dalehamel) > following fields to be used for #48 - ImageNameTag: o.imageName, - InitImageNameTag: o.initImageName, - FetchHeaders: o.fetchHeaders, + Name: fmt.Sprintf("%s%s", meta.ObjectNamePrefix, string(juid)), + Namespace: o.namespace, + ServiceAccount: o.serviceAccount, + ID: juid, + Hostname: o.nodeName, + Program: o.program, + PodUID: o.podUID, + ContainerName: o.container, + IsPod: o.isPod, + ImageNameTag: o.imageName, + InitImageNameTag: o.initImageName, + FetchHeaders: o.fetchHeaders, + Deadline: o.deadline, + DeadlineGracePeriod: o.deadlineGracePeriod, } job, err := tc.CreateJob(tj) diff --git a/pkg/tracejob/job.go b/pkg/tracejob/job.go index dd68fc3c..6a9517ec 100644 --- a/pkg/tracejob/job.go +++ b/pkg/tracejob/job.go @@ -4,6 +4,7 @@ import ( "fmt" "io" "io/ioutil" + "strconv" "github.com/iovisor/kubectl-trace/pkg/meta" batchv1 "k8s.io/api/batch/v1" @@ -23,20 +24,22 @@ type TraceJobClient struct { // TraceJob is a container of info needed to create the job responsible for tracing. type TraceJob struct { - Name string - ID types.UID - Namespace string - ServiceAccount string - Hostname string - Program string - PodUID string - ContainerName string - IsPod bool - ImageNameTag string - InitImageNameTag string - FetchHeaders bool - StartTime *metav1.Time - Status TraceJobStatus + Name string + ID types.UID + Namespace string + ServiceAccount string + Hostname string + Program string + PodUID string + ContainerName string + IsPod bool + ImageNameTag string + InitImageNameTag string + FetchHeaders bool + Deadline int64 + DeadlineGracePeriod int64 + StartTime *metav1.Time + Status TraceJobStatus } // WithOutStream setup a file stream to output trace job operation information @@ -184,6 +187,11 @@ func (t *TraceJobClient) DeleteJobs(nf TraceJobFilter) error { func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) { bpfTraceCmd := []string{ + "/bin/timeout", + "--preserve-status", + "--signal", + "INT", + strconv.FormatInt(nj.Deadline, 10), "/bin/trace-runner", "--program=/programs/program.bt", } @@ -217,6 +225,7 @@ func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) { job := &batchv1.Job{ ObjectMeta: commonMeta, Spec: batchv1.JobSpec{ + ActiveDeadlineSeconds: int64Ptr(nj.Deadline + nj.DeadlineGracePeriod), TTLSecondsAfterFinished: int32Ptr(5), Parallelism: int32Ptr(1), Completions: int32Ptr(1), @@ -294,6 +303,20 @@ func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) { SecurityContext: &apiv1.SecurityContext{ Privileged: boolPtr(true), }, + // We want to send SIGINT prior to the pod being killed, so we can print the map + // we will also wait for an arbitrary amount of time (10s) to give bpftrace time to + // process and summarize the data + Lifecycle: &apiv1.Lifecycle{ + PreStop: &apiv1.Handler{ + Exec: &apiv1.ExecAction{ + Command: []string{ + "/bin/bash", + "-c", + fmt.Sprintf("kill -SIGINT $(pidof bpftrace) && sleep %s", strconv.FormatInt(nj.DeadlineGracePeriod, 10)), + }, + }, + }, + }, }, }, RestartPolicy: "Never",