Skip to content

Unbreak build failures in Kubernetes benchmarks. #11074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 43 additions & 27 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,19 @@ http_archive(

http_archive(
name = "googleapis",
sha256 = "fd9e4d17b92be6b6718ee9b40062a4ce81feb6ea6cdd80fc723daf127ce3f350",
strip_prefix = "googleapis-3effbf23b1a1d1fe1306356e94397e20d01d31a0",
sha256 = "134f5a38940615c3248964a86f7faf7fed5ba9d0d4afbd3cafc5038aac172bd1",
strip_prefix = "googleapis-beea48a164c2a8cc8485185c3a4f56c587090e1a",
urls = [
"https://github.com/googleapis/googleapis/archive/3effbf23b1a1d1fe1306356e94397e20d01d31a0.zip",
# Released on 2024-10-21.
"https://github.com/googleapis/googleapis/archive/beea48a164c2a8cc8485185c3a4f56c587090e1a.zip",
],
)

load("@googleapis//:repository_rules.bzl", "switched_rules_by_language")

switched_rules_by_language(
name = "com_google_googleapis_imports",
go = True, # Enable go_proto_library BUILD rules inside @googleapis repo.
go = False, # Disable building proto Go libraries; use org_golang_google_genproto instead.
)

http_archive(
Expand Down Expand Up @@ -105,7 +106,8 @@ gazelle_dependencies()

# Some repository below has a transitive dependency on these repositories.
# These declarations must precede any later declarations that transitively
# depend on older versions, since only the first declaration is considered. go_repository(
# depend on older versions, since only the first declaration is considered.

go_repository(
name = "org_golang_x_tools",
importpath = "golang.org/x/tools",
Expand Down Expand Up @@ -169,6 +171,42 @@ go_repository(
version = "v1.1.0-rc.1",
)

go_repository(
name = "com_google_cloud_go_container",
importpath = "cloud.google.com/go/container",
sum = "h1:JVoEg/4RvoGW37r2Eja/cTBc3X9c2loGWYq7QDsRDuI=",
version = "v1.40.0",
)

go_repository(
name = "org_golang_google_genproto",
importpath = "google.golang.org/genproto",
sum = "h1:Q3nlH8iSQSRUwOskjbcSMcF2jiYMNiQYZ0c2KEJLKKU=",
version = "v0.0.0-20241021214115-324edc3d5d38",
)

go_repository(
name = "org_golang_google_genproto_googleapis_api",
importpath = "google.golang.org/genproto/googleapis/api",
sum = "h1:2oV8dfuIkM1Ti7DwXc0BJfnwr9csz4TDXI9EmiI+Rbw=",
version = "v0.0.0-20241021214115-324edc3d5d38",
)

go_repository(
name = "org_golang_google_genproto_googleapis_rpc",
importpath = "google.golang.org/genproto/googleapis/rpc",
sum = "h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI=",
version = "v0.0.0-20241021214115-324edc3d5d38",
)

go_repository(
name = "org_golang_google_grpc",
build_file_proto_mode = "disable",
importpath = "google.golang.org/grpc",
sum = "h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=",
version = "v1.67.1",
)

# Load C++ rules.
http_archive(
name = "rules_cc",
Expand Down Expand Up @@ -804,13 +842,6 @@ go_repository(
version = "v1.4.0",
)

go_repository(
name = "com_google_cloud_go_container",
importpath = "cloud.google.com/go/container",
sum = "h1:nbEK/59GyDRKKlo1SqpohY1TK8LmJ2XNcvS9Gyom2A0=",
version = "v1.7.0",
)

go_repository(
name = "com_google_cloud_go_containeranalysis",
importpath = "cloud.google.com/go/containeranalysis",
Expand Down Expand Up @@ -2481,14 +2512,6 @@ go_repository(
version = "v1.1.1-0.20211118161826-650dca95af54",
)

go_repository(
name = "org_golang_google_grpc",
build_file_proto_mode = "disable",
importpath = "google.golang.org/grpc",
sum = "h1:qq9WB3Dez2tMAKtZTVtZsZSmTkDgPeXx+FRPt5kLEkM=",
version = "v1.53.0-dev.0.20230123225046-4075ef07c5d5",
)

go_repository(
name = "in_gopkg_check_v1",
importpath = "gopkg.in/check.v1",
Expand Down Expand Up @@ -2926,13 +2949,6 @@ go_repository(
version = "v1.6.7",
)

go_repository(
name = "org_golang_google_genproto",
importpath = "google.golang.org/genproto",
sum = "h1:BWUVssLB0HVOSY78gIdvk1dTVYtT1y8SBWtPYuTJ/6w=",
version = "v0.0.0-20230110181048-76db0878b65f",
)

go_repository(
name = "org_golang_google_protobuf",
importpath = "google.golang.org/protobuf",
Expand Down
4 changes: 2 additions & 2 deletions test/kubernetes/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ package(
proto_library(
name = "test_range_config",
srcs = ["test_range_config.proto"],
has_services = 1,
has_services = 0,
visibility = [
"//visibility:public",
],
deps = [
"@googleapis//google/container/v1:container_proto",
"@com_google_protobuf//:any_proto",
],
)
4 changes: 2 additions & 2 deletions test/kubernetes/benchmarks/ruby_dev_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,12 @@ func doRubyDevTest(ctx context.Context, t *testing.T, k8sCtx k8sctx.KubernetesCo

reader, err := cluster.GetLogReader(ctx, pod, v13.PodLogOptions{})
if err != nil {
t.Fatalf("Failed to get log reader on cluster %q: %v", cluster.Cluster().GetCluster().GetName(), err)
t.Fatalf("Failed to get log reader on cluster %q: %v", cluster.GetName(), err)
}
defer reader.Close()
buf := new(bytes.Buffer)
if _, err := io.Copy(buf, reader); err != nil {
t.Fatalf("Failed to read log on cluster %q: %v", cluster.Cluster().GetCluster().GetName(), err)
t.Fatalf("Failed to read log on cluster %q: %v", cluster.GetName(), err)
}

output := buf.String()
Expand Down
4 changes: 2 additions & 2 deletions test/kubernetes/benchmarks/startup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,13 @@ func TestStartup(t *testing.T) {
}
reader, err := cluster.GetLogReader(ctx, p, v13.PodLogOptions{})
if err != nil {
t.Fatalf("Failed to get log reader on cluster %q: %v", cluster.Cluster().GetCluster().GetName(), err)
t.Fatalf("Failed to get log reader on cluster %q: %v", cluster.GetName(), err)
}
defer reader.Close()

buf := new(bytes.Buffer)
if _, err := io.Copy(buf, reader); err != nil {
t.Fatalf("Failed to read log on cluster %q: %v", cluster.Cluster().GetCluster().GetName(), err)
t.Fatalf("Failed to read log on cluster %q: %v", cluster.GetName(), err)
}
if strings.TrimSpace(buf.String()) != "hello" {
t.Fatalf("Mistmatch output: got: %q want: %q", buf.String(), "hello")
Expand Down
2 changes: 1 addition & 1 deletion test/kubernetes/k8sctx/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ go_library(
"//test/kubernetes:test_range_config_go_proto",
"//test/kubernetes/testcluster",
"//tools/gvisor_k8s_tool/provider/kubectl",
"@googleapis//google/container/v1:container_go_proto",
"@org_golang_google_protobuf//encoding/prototext:go_default_library",
"@org_golang_google_protobuf//types/known/anypb:go_default_library",
],
)
4 changes: 2 additions & 2 deletions test/kubernetes/k8sctx/k8sctx_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
"os"
"testing"

cspb "cloud.google.com/go/container/apiv1/containerpb"
"google.golang.org/protobuf/encoding/prototext"
"google.golang.org/protobuf/types/known/anypb"
"gvisor.dev/gvisor/runsc/flag"
testpb "gvisor.dev/gvisor/test/kubernetes/test_range_config_go_proto"
"gvisor.dev/gvisor/test/kubernetes/testcluster"
Expand Down Expand Up @@ -55,7 +55,7 @@ func newKubectlContext(ctx context.Context) (KubernetesContext, error) {
if err != nil {
return nil, fmt.Errorf("cannot initialize cluster %q: %w", *kubectlContextName, err)
}
var clusterPB cspb.Cluster
var clusterPB anypb.Any
clusterBytes, err := os.ReadFile(*clusterProtoPath)
if err != nil {
return nil, fmt.Errorf("cannot read cluster textproto file %q: %w", *clusterProtoPath, err)
Expand Down
19 changes: 13 additions & 6 deletions test/kubernetes/test_range_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ syntax = "proto3";

package k8s_tester.test_cluster_config;

import "google/container/v1/cluster_service.proto";
import "google/protobuf/any.proto";

// TestRangeSpec is a description of the test environment to be created. It is
// the input of the setup step which creates the required clusters.
Expand All @@ -14,13 +14,13 @@ message TestRangeSpec {
string name = 1;

// A nodepool built with the runtime under test.
google.container.v1.NodePool test_runtime = 2;
NodePool test_runtime = 2;

// clients is another nodepool in the cluster to use against the test_runtime.
// For example, in most client-server tests, the runtime under test is the
// server and the clients are the client. Clients always use the default
// runtime runc.
google.container.v1.NodePool clients = 3;
NodePool clients = 3;

// tertiary is a third nodepool in the cluster, used by some benchmarks that
// need it for isolation.
Expand All @@ -29,7 +29,7 @@ message TestRangeSpec {
// host's non-local network stack for a fair comparison between runsc/runc.
// The tertiary nodepool may use gVisor or runc as a runtime, depending on
// user configuration.
google.container.v1.NodePool tertiary = 8;
NodePool tertiary = 8;

// versions are the GKE patch versions to use for the clusters. The number
// of clusters created will be num(versions) * replicas.
Expand All @@ -49,6 +49,13 @@ message TestRangeSpec {
string gke_service_account = 7;
}

// NodePool represents a set of Kubernetes nodes.
message NodePool {
// Opaque implementation-specific nodepool config.
// In GKE, this is a google.container.v1.NodePool.
google.protobuf.Any node_pool = 1;
}

// TestRange contains the created clusters. This is an output from the setup
// phase and an input for the test phase.
message TestRange {
Expand All @@ -57,8 +64,8 @@ message TestRange {

// Cluster holds the created cluster and its credential file.
message Cluster {
// Created Cluster proto.
google.container.v1.Cluster cluster = 1;
// Created cluster proto.
google.protobuf.Any cluster = 1;

// The setup step will create individual credential files for each created
// cluster.
Expand Down
2 changes: 1 addition & 1 deletion test/kubernetes/testcluster/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ go_library(
],
deps = [
"//test/kubernetes:test_range_config_go_proto",
"@googleapis//google/container/v1:container_go_proto",
"@io_k8s_api//apps/v1:go_default_library",
"@io_k8s_api//core/v1:go_default_library",
"@io_k8s_apimachinery//pkg/api/resource:go_default_library",
Expand All @@ -25,6 +24,7 @@ go_library(
"@io_k8s_apimachinery//pkg/watch:go_default_library",
"@io_k8s_client_go//kubernetes:go_default_library",
"@io_k8s_client_go//tools/clientcmd:go_default_library",
"@org_golang_google_genproto//googleapis/container/v1:container",
"@org_golang_google_protobuf//proto:go_default_library",
"@org_golang_x_sync//errgroup:go_default_library",
],
Expand Down
41 changes: 33 additions & 8 deletions test/kubernetes/testcluster/objects.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@ package testcluster

import (
"context"
"errors"
"fmt"
"reflect"
"strconv"

cspb "cloud.google.com/go/container/apiv1/containerpb"
cspb "google.golang.org/genproto/googleapis/container/v1"
"google.golang.org/protobuf/proto"
v13 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -224,11 +226,9 @@ func (t RuntimeType) ApplyNodepool(nodepool *cspb.NodePool, accelType Accelerato
nodepool.Config.Labels[NodepoolNumAcceleratorsKey] = strconv.Itoa(accelCount)
case RuntimeTypeGVisorTPU:
nodepool.Config.MachineType = TPUAcceleratorMachineTypeMap[accelType]
nodepool.PlacementPolicy = &cspb.NodePool_PlacementPolicy{
TpuTopology: accelShape,
Type: cspb.NodePool_PlacementPolicy_COMPACT,
if err := setNodePlacementPolicyCompact(nodepool, accelShape); err != nil {
panic(fmt.Sprintf("failed to set node placement policy: %v", err))
}

nodepool.Config.Labels[gvisorNodepoolKey] = gvisorRuntimeClass
nodepool.Config.Labels[NodepoolRuntimeKey] = string(RuntimeTypeGVisorTPU)
nodepool.Config.Labels[NodepoolTPUTopologyKey] = accelShape
Expand Down Expand Up @@ -256,9 +256,8 @@ func (t RuntimeType) ApplyNodepool(nodepool *cspb.NodePool, accelType Accelerato
nodepool.Config.Labels[NodepoolNumAcceleratorsKey] = strconv.Itoa(accelCount)
case RuntimeTypeUnsandboxedTPU:
nodepool.Config.MachineType = TPUAcceleratorMachineTypeMap[accelType]
nodepool.PlacementPolicy = &cspb.NodePool_PlacementPolicy{
TpuTopology: accelShape,
Type: cspb.NodePool_PlacementPolicy_COMPACT,
if err := setNodePlacementPolicyCompact(nodepool, accelShape); err != nil {
panic(fmt.Sprintf("failed to set node placement policy: %v", err))
}
nodepool.Config.Labels[NodepoolRuntimeKey] = string(RuntimeTypeUnsandboxedTPU)
nodepool.Config.Labels[NodepoolTPUTopologyKey] = accelShape
Expand All @@ -274,6 +273,32 @@ func (t RuntimeType) ApplyNodepool(nodepool *cspb.NodePool, accelType Accelerato
}
}

// setNodePlacementPolicyCompact sets the node placement policy to COMPACT
// and with the given TPU topology.
// This is done by reflection because the NodePool_PlacementPolicy proto
// message isn't available in the latest exported version of the genproto API.
// This is only used for TPU nodepools so not critical for most benchmarks.
func setNodePlacementPolicyCompact(nodepool *cspb.NodePool, tpuTopology string) error {
placementPolicyField := reflect.ValueOf(nodepool).Elem().FieldByName("PlacementPolicy")
if !placementPolicyField.IsValid() {
return errors.New("nodepool does not have a PlacementPolicy field")
}
nodePlacementPolicy := reflect.New(placementPolicyField.Type().Elem()).Elem()
tpuTopologyField := nodePlacementPolicy.FieldByName("TpuTopology")
if !tpuTopologyField.IsValid() {
return errors.New("nodepool.PlacementPolicy does not have a TpuTopology field")
}
tpuTopologyField.SetString(tpuTopology)
typeField := nodePlacementPolicy.FieldByName("Type")
if !typeField.IsValid() {
return errors.New("nodepool.PlacementPolicy does not have a Type field")
}
typeField.SetInt(1 /* cspb.NodePool_PlacementPolicy_COMPACT */)
// Done.
placementPolicyField.Set(nodePlacementPolicy.Addr())
return nil
}

// ApplyPodSpec modifies a PodSpec to use this runtime.
func (t RuntimeType) ApplyPodSpec(podSpec *v13.PodSpec) {
switch t {
Expand Down
23 changes: 19 additions & 4 deletions test/kubernetes/testcluster/testcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import (
"strings"
"time"

cspb "cloud.google.com/go/container/apiv1/containerpb"
"golang.org/x/sync/errgroup"
cspb "google.golang.org/genproto/googleapis/container/v1"
testpb "gvisor.dev/gvisor/test/kubernetes/test_range_config_go_proto"
appsv1 "k8s.io/api/apps/v1"
v13 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -159,9 +159,20 @@ func (t *TestCluster) Cluster() *testpb.Cluster {
return t.cluster
}

// ContainerCluster returns the underlying container cluster proto.
func (t *TestCluster) ContainerCluster() (*cspb.Cluster, error) {
var cluster cspb.Cluster
err := t.cluster.GetCluster().UnmarshalTo(&cluster)
return &cluster, err
}

// GetName returns this cluster's name.
func (t *TestCluster) GetName() string {
return t.cluster.GetCluster().GetName()
cluster, err := t.ContainerCluster()
if err != nil {
return fmt.Sprintf("[error:%v]", err)
}
return cluster.GetName()
}

// GetGVisorRuntimeLabelMap returns the gVisor runtime key-value pair used
Expand Down Expand Up @@ -424,12 +435,16 @@ func (t *TestCluster) ConfigurePodForTertiaryNodepool(pod *v13.Pod) (*v13.Pod, e
}

func (t *TestCluster) getNodePoolByName(name string) (*cspb.NodePool, error) {
for _, np := range t.cluster.GetCluster().GetNodePools() {
cluster, err := t.ContainerCluster()
if err != nil {
return nil, err
}
for _, np := range cluster.GetNodePools() {
if np.GetName() == name {
return np, nil
}
}
return nil, fmt.Errorf("failed to find nodepool %q: %+v", name, t.cluster.GetCluster().GetNodePools())
return nil, fmt.Errorf("failed to find nodepool %q: %+v", name, cluster.GetNodePools())
}

func (t *TestCluster) applyCommonPodConfigurations(np *cspb.NodePool, podSpec *v13.PodSpec) error {
Expand Down
Loading
Loading