Skip to content

Commit

Permalink
Merge pull request #88 from iovisor/job-deadlines
Browse files Browse the repository at this point in the history
feat: Job deadlines
  • Loading branch information
dalehamel authored Sep 18, 2019
2 parents ccbe98a + 7f9d490 commit 7da686a
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 37 deletions.
58 changes: 35 additions & 23 deletions pkg/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ var (
ImageNameTag = "quay.io/iovisor/kubectl-trace-bpftrace:latest"
// InitImageNameTag represents the default init container image
InitImageNameTag = "quay.io/iovisor/kubectl-trace-init:latest"
// DefaultDeadline is the maximum time a tracejob is allowed to run, in seconds
DefaultDeadline = 3600
// DefaultDeadlineGracePeriod is the maximum time to wait to print a map or histogram, in seconds
// note that it must account for startup time, as the deadline as based on start time
DefaultDeadlineGracePeriod = 30
)

var (
Expand Down Expand Up @@ -66,13 +71,15 @@ type RunOptions struct {
explicitNamespace bool

// Flags local to this command
container string
eval string
program string
serviceAccount string
imageName string
initImageName string
fetchHeaders bool
container string
eval string
program string
serviceAccount string
imageName string
initImageName string
fetchHeaders bool
deadline int64
deadlineGracePeriod int64

resourceArg string
attach bool
Expand All @@ -88,9 +95,11 @@ func NewRunOptions(streams genericclioptions.IOStreams) *RunOptions {
return &RunOptions{
IOStreams: streams,

serviceAccount: "default",
imageName: ImageNameTag,
initImageName: InitImageNameTag,
serviceAccount: "default",
imageName: ImageNameTag,
initImageName: InitImageNameTag,
deadline: int64(DefaultDeadline),
deadlineGracePeriod: int64(DefaultDeadlineGracePeriod),
}
}

Expand Down Expand Up @@ -127,6 +136,8 @@ func NewRunCommand(factory factory.Factory, streams genericclioptions.IOStreams)
cmd.Flags().StringVar(&o.imageName, "imagename", o.imageName, "Custom image for the tracerunner")
cmd.Flags().StringVar(&o.initImageName, "init-imagename", o.initImageName, "Custom image for the init container responsible to fetch and prepare linux headers")
cmd.Flags().BoolVar(&o.fetchHeaders, "fetch-headers", o.fetchHeaders, "Whether to fetch linux headers or not")
cmd.Flags().Int64Var(&o.deadline, "deadline", o.deadline, "Maximum time to allow trace to run in seconds")
cmd.Flags().Int64Var(&o.deadlineGracePeriod, "deadline-grace-period", o.deadlineGracePeriod, "Maximum wait time to print maps or histograms after deadline, in seconds")

return cmd
}
Expand Down Expand Up @@ -289,19 +300,20 @@ func (o *RunOptions) Run() error {
}

tj := tracejob.TraceJob{
Name: fmt.Sprintf("%s%s", meta.ObjectNamePrefix, string(juid)),
Namespace: o.namespace,
ServiceAccount: o.serviceAccount,
ID: juid,
Hostname: o.nodeName,
Program: o.program,
PodUID: o.podUID,
ContainerName: o.container,
IsPod: o.isPod,
// todo(dalehamel) > following fields to be used for #48
ImageNameTag: o.imageName,
InitImageNameTag: o.initImageName,
FetchHeaders: o.fetchHeaders,
Name: fmt.Sprintf("%s%s", meta.ObjectNamePrefix, string(juid)),
Namespace: o.namespace,
ServiceAccount: o.serviceAccount,
ID: juid,
Hostname: o.nodeName,
Program: o.program,
PodUID: o.podUID,
ContainerName: o.container,
IsPod: o.isPod,
ImageNameTag: o.imageName,
InitImageNameTag: o.initImageName,
FetchHeaders: o.fetchHeaders,
Deadline: o.deadline,
DeadlineGracePeriod: o.deadlineGracePeriod,
}

job, err := tc.CreateJob(tj)
Expand Down
51 changes: 37 additions & 14 deletions pkg/tracejob/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"io"
"io/ioutil"
"strconv"

"github.com/iovisor/kubectl-trace/pkg/meta"
batchv1 "k8s.io/api/batch/v1"
Expand All @@ -23,20 +24,22 @@ type TraceJobClient struct {

// TraceJob is a container of info needed to create the job responsible for tracing.
type TraceJob struct {
Name string
ID types.UID
Namespace string
ServiceAccount string
Hostname string
Program string
PodUID string
ContainerName string
IsPod bool
ImageNameTag string
InitImageNameTag string
FetchHeaders bool
StartTime *metav1.Time
Status TraceJobStatus
Name string
ID types.UID
Namespace string
ServiceAccount string
Hostname string
Program string
PodUID string
ContainerName string
IsPod bool
ImageNameTag string
InitImageNameTag string
FetchHeaders bool
Deadline int64
DeadlineGracePeriod int64
StartTime *metav1.Time
Status TraceJobStatus
}

// WithOutStream setup a file stream to output trace job operation information
Expand Down Expand Up @@ -184,6 +187,11 @@ func (t *TraceJobClient) DeleteJobs(nf TraceJobFilter) error {
func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) {

bpfTraceCmd := []string{
"/bin/timeout",
"--preserve-status",
"--signal",
"INT",
strconv.FormatInt(nj.Deadline, 10),
"/bin/trace-runner",
"--program=/programs/program.bt",
}
Expand Down Expand Up @@ -217,6 +225,7 @@ func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) {
job := &batchv1.Job{
ObjectMeta: commonMeta,
Spec: batchv1.JobSpec{
ActiveDeadlineSeconds: int64Ptr(nj.Deadline + nj.DeadlineGracePeriod),
TTLSecondsAfterFinished: int32Ptr(5),
Parallelism: int32Ptr(1),
Completions: int32Ptr(1),
Expand Down Expand Up @@ -294,6 +303,20 @@ func (t *TraceJobClient) CreateJob(nj TraceJob) (*batchv1.Job, error) {
SecurityContext: &apiv1.SecurityContext{
Privileged: boolPtr(true),
},
// We want to send SIGINT prior to the pod being killed, so we can print the map
// we will also wait for an arbitrary amount of time (10s) to give bpftrace time to
// process and summarize the data
Lifecycle: &apiv1.Lifecycle{
PreStop: &apiv1.Handler{
Exec: &apiv1.ExecAction{
Command: []string{
"/bin/bash",
"-c",
fmt.Sprintf("kill -SIGINT $(pidof bpftrace) && sleep %s", strconv.FormatInt(nj.DeadlineGracePeriod, 10)),
},
},
},
},
},
},
RestartPolicy: "Never",
Expand Down

0 comments on commit 7da686a

Please sign in to comment.