Skip to content

[apiserver] Start apiserver v2 in apiserver/cmd/main.go #3603

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apiserver/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ RUN go mod download
COPY proto/ proto/
COPY ray-operator/ ray-operator/
COPY apiserver/ apiserver/
COPY apiserversdk/ apiserversdk/

WORKDIR /workspace/apiserver

Expand Down
21 changes: 20 additions & 1 deletion apiserver/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ import (
"google.golang.org/grpc/reflection"
"google.golang.org/protobuf/encoding/protojson"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client/config"

"github.com/ray-project/kuberay/apiserver/pkg/interceptor"
"github.com/ray-project/kuberay/apiserver/pkg/manager"
"github.com/ray-project/kuberay/apiserver/pkg/server"
"github.com/ray-project/kuberay/apiserver/pkg/swagger"
"github.com/ray-project/kuberay/apiserver/pkg/util"
"github.com/ray-project/kuberay/apiserversdk"
api "github.com/ray-project/kuberay/proto/go_client"
)

Expand All @@ -39,6 +41,7 @@ var (
logFile = flag.String("logFilePath", "", "Synchronize logs to local file")
localSwaggerPath = flag.String("localSwaggerPath", "", "Specify the root directory for `*.swagger.json` the swagger files.")
grpcTimeout = flag.Duration("grpc_timeout", util.GRPCServerDefaultTimeout, "gRPC server timeout duration")
enableAPIServerV2 = flag.Bool("enable-api-server-v2", true, "Enable API server V2")
healthy int32
)

Expand Down Expand Up @@ -145,7 +148,23 @@ func startHttpProxy() {
registerHttpHandlerFromEndpoint(ctx, api.RegisterRayJobSubmissionServiceHandlerFromEndpoint, "RayJobSubmissionService", runtimeMux)

// Create a top level mux to include both Http gRPC servers and other endpoints like metrics
topMux := http.NewServeMux()
var topMux *http.ServeMux
if *enableAPIServerV2 {
kubernetesConfig, err := config.GetConfig()
if err != nil {
klog.Fatalf("Failed to load kubeconfig: %v", err)
}

topMux, err = apiserversdk.NewMux(apiserversdk.MuxConfig{
KubernetesConfig: kubernetesConfig,
})
if err != nil {
klog.Fatalf("Failed to create API server mux: %v", err)
}
} else {
topMux = http.NewServeMux()
}

// Seems /apis (matches /apis/v1alpha1/clusters) works fine
topMux.Handle("/", runtimeMux)
topMux.Handle("/metrics", promhttp.Handler())
Expand Down
26 changes: 26 additions & 0 deletions apiserver/deploy/base/insecure/apiserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,32 @@ rules:
- get
- list
---
# apiserversdk requires the following permissions to be able to list and proxy services
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to update Helm chart's RBAC? If you plan to open a follow up PR for Helm chart, can you open an issue to track the progress?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I think we should. Thank you for catching this. Let me create an issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created #3648

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: allow-service-access
rules:
- apiGroups: [""]
resources: ["services"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["services/proxy"]
verbs: ["get", "watch", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-service-access
subjects:
- kind: ServiceAccount
name: kuberay-apiserver
namespace: ray-system
roleRef:
kind: ClusterRole
name: allow-service-access
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: Namespace
metadata:
Expand Down
26 changes: 26 additions & 0 deletions apiserver/deploy/base/secure/apiserver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,32 @@ rules:
- get
- list
---
# apiserversdk requires the following permissions to be able to list and proxy services
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: allow-service-access
rules:
- apiGroups: [""]
resources: ["services"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["services/proxy"]
verbs: ["get", "watch", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-service-access
subjects:
- kind: ServiceAccount
name: kuberay-apiserver
namespace: ray-system
roleRef:
kind: ClusterRole
name: allow-service-access
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: Namespace
metadata:
Expand Down
45 changes: 45 additions & 0 deletions apiserver/test/e2e/apiserversdk/cluster_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package apiserversdk

import (
"testing"

"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
)

func TestCreateCluster(t *testing.T) {
tCtx, err := NewEnd2EndTestingContext(t)
require.NoError(t, err, "No error expected when creating testing context")
rayClient := tCtx.GetRayHttpClient()
rayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: tCtx.GetRayClusterName(),
Namespace: tCtx.GetNamespaceName(),
},
Spec: rayv1.RayClusterSpec{
HeadGroupSpec: rayv1.HeadGroupSpec{
RayStartParams: map[string]string{},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{Name: "test", Image: "test"},
},
},
},
},
},
}
_, err = rayClient.RayClusters(tCtx.GetNamespaceName()).Create(tCtx.GetCtx(), rayCluster, metav1.CreateOptions{})
require.NoError(t, err)

actualCluster, err := rayClient.RayClusters(tCtx.GetNamespaceName()).Get(tCtx.GetCtx(), tCtx.GetRayClusterName(), metav1.GetOptions{})
require.NoError(t, err)
require.Equal(t, tCtx.GetRayClusterName(), actualCluster.Name)
require.Equal(t, rayCluster.Spec, actualCluster.Spec)

err = rayClient.RayClusters(tCtx.GetNamespaceName()).Delete(tCtx.GetCtx(), tCtx.GetRayClusterName(), metav1.DeleteOptions{})
require.NoError(t, err)
}
5 changes: 5 additions & 0 deletions apiserver/test/e2e/apiserversdk/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package apiserversdk

const (
RayImage = "rayproject/ray:2.9.0"
)
58 changes: 58 additions & 0 deletions apiserver/test/e2e/apiserversdk/event_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package apiserversdk

import (
"testing"

"github.com/onsi/gomega"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/ray-project/kuberay/apiserver/test/e2e"
rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
)

func TestGetRayClusterEvent(t *testing.T) {
tCtx, err := NewEnd2EndTestingContext(t)
require.NoError(t, err, "No error expected when creating testing context")
rayClient := tCtx.GetRayHttpClient()
rayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: tCtx.GetRayClusterName(),
Namespace: tCtx.GetNamespaceName(),
},
Spec: rayv1.RayClusterSpec{
HeadGroupSpec: rayv1.HeadGroupSpec{
RayStartParams: map[string]string{},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{Name: "test", Image: "test"},
},
},
},
},
},
}
_, err = rayClient.RayClusters(tCtx.GetNamespaceName()).Create(tCtx.GetCtx(), rayCluster, metav1.CreateOptions{})
require.NoError(t, err)

k8sClient := tCtx.GetK8sHttpClient()
g := gomega.NewWithT(t)
g.Eventually(func() bool {
events, err := k8sClient.CoreV1().Events(tCtx.GetNamespaceName()).List(tCtx.GetCtx(), metav1.ListOptions{})
if err != nil {
return false
}

for _, e := range events.Items {
if e.InvolvedObject.Name == tCtx.GetRayClusterName() && e.InvolvedObject.Kind == "RayCluster" {
return true
}
}
return false
}, e2e.TestTimeoutShort, e2e.TestPollingInterval).Should(gomega.BeTrue(), "Expected to see RayCluster event in the list of events")

err = rayClient.RayClusters(tCtx.GetNamespaceName()).Delete(tCtx.GetCtx(), tCtx.GetRayClusterName(), metav1.DeleteOptions{})
require.NoError(t, err)
}
53 changes: 53 additions & 0 deletions apiserver/test/e2e/apiserversdk/proxy_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package apiserversdk

import (
"testing"

"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
)

func TestGetRayClusterProxy(t *testing.T) {
tCtx, err := NewEnd2EndTestingContext(t)
require.NoError(t, err, "No error expected when creating testing context")
rayClient := tCtx.GetRayHttpClient()
rayCluster := &rayv1.RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: tCtx.GetRayClusterName(),
Namespace: tCtx.GetNamespaceName(),
},
Spec: rayv1.RayClusterSpec{
HeadGroupSpec: rayv1.HeadGroupSpec{
RayStartParams: map[string]string{},
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "ray-head",
Image: tCtx.GetRayImage(),
},
},
},
},
},
},
}
_, err = rayClient.RayClusters(tCtx.GetNamespaceName()).Create(tCtx.GetCtx(), rayCluster, metav1.CreateOptions{})
require.NoError(t, err)

// Wait for the Ray cluster's head pod to be ready so that we can access the dashboard
waitForClusterConditions(t, tCtx, tCtx.GetRayClusterName(), []rayv1.RayClusterConditionType{rayv1.HeadPodReady})

k8sClient := tCtx.GetK8sHttpClient()
serviceName := tCtx.GetRayClusterName() + "-head-svc"
r := k8sClient.CoreV1().Services(tCtx.GetNamespaceName()).ProxyGet("http", serviceName, "8265", "", nil)
resp, err := r.DoRaw(tCtx.GetCtx())
require.NoError(t, err)
require.Contains(t, string(resp), "Ray Dashboard")

err = rayClient.RayClusters(tCtx.GetNamespaceName()).Delete(tCtx.GetCtx(), tCtx.GetRayClusterName(), metav1.DeleteOptions{})
require.NoError(t, err)
}
Loading
Loading