Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Spark pod template overrides #4183

Merged
merged 7 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package flytek8s

import (
pluginsCore "github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/core"
)

// Wraps a regular TaskExecutionMetadata and overrides the IsInterruptible method to always return false
// This is useful as the runner and the scheduler pods should never be interruptible
type NonInterruptibleTaskExecutionMetadata struct {
pluginsCore.TaskExecutionMetadata
}

func (n NonInterruptibleTaskExecutionMetadata) IsInterruptible() bool {
return false

Check warning on line 14 in flyteplugins/go/tasks/pluginmachinery/flytek8s/non_interruptible.go

View check run for this annotation

Codecov / codecov/patch

flyteplugins/go/tasks/pluginmachinery/flytek8s/non_interruptible.go#L13-L14

Added lines #L13 - L14 were not covered by tests
}

// A wrapper around a regular TaskExecutionContext allowing to inject a custom TaskExecutionMetadata which is
// non-interruptible
type NonInterruptibleTaskExecutionContext struct {
pluginsCore.TaskExecutionContext
metadata NonInterruptibleTaskExecutionMetadata
}

func (n NonInterruptibleTaskExecutionContext) TaskExecutionMetadata() pluginsCore.TaskExecutionMetadata {
return n.metadata

Check warning on line 25 in flyteplugins/go/tasks/pluginmachinery/flytek8s/non_interruptible.go

View check run for this annotation

Codecov / codecov/patch

flyteplugins/go/tasks/pluginmachinery/flytek8s/non_interruptible.go#L24-L25

Added lines #L24 - L25 were not covered by tests
}

func NewNonInterruptibleTaskExecutionContext(ctx pluginsCore.TaskExecutionContext) NonInterruptibleTaskExecutionContext {
return NonInterruptibleTaskExecutionContext{
TaskExecutionContext: ctx,
metadata: NonInterruptibleTaskExecutionMetadata{
ctx.TaskExecutionMetadata(),
},
}

Check warning on line 34 in flyteplugins/go/tasks/pluginmachinery/flytek8s/non_interruptible.go

View check run for this annotation

Codecov / codecov/patch

flyteplugins/go/tasks/pluginmachinery/flytek8s/non_interruptible.go#L28-L34

Added lines #L28 - L34 were not covered by tests
}
9 changes: 9 additions & 0 deletions flyteplugins/go/tasks/pluginmachinery/flytek8s/pod_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,15 @@
return podSpec, objectMeta, primaryContainerName, nil
}

func GetContainer(podSpec *v1.PodSpec, name string) (*v1.Container, error) {
for _, container := range podSpec.Containers {
if container.Name == name {
return &container, nil
}

Check warning on line 278 in flyteplugins/go/tasks/pluginmachinery/flytek8s/pod_helper.go

View check run for this annotation

Codecov / codecov/patch

flyteplugins/go/tasks/pluginmachinery/flytek8s/pod_helper.go#L274-L278

Added lines #L274 - L278 were not covered by tests
}
return nil, pluginserrors.Errorf(pluginserrors.BadTaskSpecification, "invalid TaskSpecification, container [%s] not defined", name)

Check warning on line 280 in flyteplugins/go/tasks/pluginmachinery/flytek8s/pod_helper.go

View check run for this annotation

Codecov / codecov/patch

flyteplugins/go/tasks/pluginmachinery/flytek8s/pod_helper.go#L280

Added line #L280 was not covered by tests
}

// getBasePodTemplate attempts to retrieve the PodTemplate to use as the base for k8s Pod configuration. This value can
// come from one of the following:
// (1) PodTemplate name in the TaskMetadata: This name is then looked up in the PodTemplateStore.
Expand Down
50 changes: 10 additions & 40 deletions flyteplugins/go/tasks/plugins/k8s/dask/dask.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ import (
"time"

daskAPI "github.com/dask/dask-kubernetes/v2023/dask_kubernetes/operator/go_client/pkg/apis/kubernetes.dask.org/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins"
"github.com/flyteorg/flyte/flyteplugins/go/tasks/errors"
"github.com/flyteorg/flyte/flyteplugins/go/tasks/logs"
Expand All @@ -15,54 +21,19 @@ import (
"github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/k8s"
"github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/tasklog"
"github.com/flyteorg/flyte/flyteplugins/go/tasks/pluginmachinery/utils"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes/scheme"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const (
daskTaskType = "dask"
KindDaskJob = "DaskJob"
)

// Wraps a regular TaskExecutionMetadata and overrides the IsInterruptible method to always return false
// This is useful as the runner and the scheduler pods should never be interruptible
type nonInterruptibleTaskExecutionMetadata struct {
pluginsCore.TaskExecutionMetadata
}

func (n nonInterruptibleTaskExecutionMetadata) IsInterruptible() bool {
return false
}

// A wrapper around a regular TaskExecutionContext allowing to inject a custom TaskExecutionMetadata which is
// non-interruptible
type nonInterruptibleTaskExecutionContext struct {
pluginsCore.TaskExecutionContext
metadata nonInterruptibleTaskExecutionMetadata
}

func (n nonInterruptibleTaskExecutionContext) TaskExecutionMetadata() pluginsCore.TaskExecutionMetadata {
return n.metadata
}

func mergeMapInto(src map[string]string, dst map[string]string) {
for key, value := range src {
dst[key] = value
}
}

func getPrimaryContainer(spec *v1.PodSpec, primaryContainerName string) (*v1.Container, error) {
for _, container := range spec.Containers {
if container.Name == primaryContainerName {
return &container, nil
}
}
return nil, errors.Errorf(errors.BadTaskSpecification, "primary container [%v] not found in pod spec", primaryContainerName)
}

func replacePrimaryContainer(spec *v1.PodSpec, primaryContainerName string, container v1.Container) error {
for i, c := range spec.Containers {
if c.Name == primaryContainerName {
Expand Down Expand Up @@ -104,8 +75,7 @@ func (p daskResourceHandler) BuildResource(ctx context.Context, taskCtx pluginsC
if err != nil {
return nil, err
}
nonInterruptibleTaskMetadata := nonInterruptibleTaskExecutionMetadata{taskCtx.TaskExecutionMetadata()}
nonInterruptibleTaskCtx := nonInterruptibleTaskExecutionContext{taskCtx, nonInterruptibleTaskMetadata}
nonInterruptibleTaskCtx := flytek8s.NewNonInterruptibleTaskExecutionContext(taskCtx)
nonInterruptiblePodSpec, _, _, err := flytek8s.ToK8sPodSpec(ctx, nonInterruptibleTaskCtx)
if err != nil {
return nil, err
Expand Down Expand Up @@ -144,7 +114,7 @@ func (p daskResourceHandler) BuildResource(ctx context.Context, taskCtx pluginsC

func createWorkerSpec(cluster plugins.DaskWorkerGroup, podSpec *v1.PodSpec, primaryContainerName string) (*daskAPI.WorkerSpec, error) {
workerPodSpec := podSpec.DeepCopy()
primaryContainer, err := getPrimaryContainer(workerPodSpec, primaryContainerName)
primaryContainer, err := flytek8s.GetContainer(workerPodSpec, primaryContainerName)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -206,7 +176,7 @@ func createWorkerSpec(cluster plugins.DaskWorkerGroup, podSpec *v1.PodSpec, prim

func createSchedulerSpec(scheduler plugins.DaskScheduler, clusterName string, podSpec *v1.PodSpec, primaryContainerName string) (*daskAPI.SchedulerSpec, error) {
schedulerPodSpec := podSpec.DeepCopy()
primaryContainer, err := getPrimaryContainer(schedulerPodSpec, primaryContainerName)
primaryContainer, err := flytek8s.GetContainer(schedulerPodSpec, primaryContainerName)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -283,7 +253,7 @@ func createJobSpec(workerSpec daskAPI.WorkerSpec, schedulerSpec daskAPI.Schedule
jobPodSpec := podSpec.DeepCopy()
jobPodSpec.RestartPolicy = v1.RestartPolicyNever

primaryContainer, err := getPrimaryContainer(jobPodSpec, primaryContainerName)
primaryContainer, err := flytek8s.GetContainer(jobPodSpec, primaryContainerName)
if err != nil {
return nil, err
}
Expand Down
Loading
Loading