Skip to content

Commit

Permalink
Merge pull request #134 from vardhaman22/k8s-v1.28.10
Browse files Browse the repository at this point in the history
[v1.28] Release v1.28.10
  • Loading branch information
kinarashah authored May 24, 2024
2 parents 5c56e26 + cddabbf commit 447cc96
Show file tree
Hide file tree
Showing 38 changed files with 952 additions and 188 deletions.
324 changes: 231 additions & 93 deletions CHANGELOG/CHANGELOG-1.28.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion cluster/gce/config-default.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ fi
# By default, the latest image from the image family will be used unless an
# explicit image will be set.
GCI_VERSION=${KUBE_GCI_VERSION:-}
IMAGE_FAMILY=${KUBE_IMAGE_FAMILY:-cos-97-lts}
IMAGE_FAMILY=${KUBE_IMAGE_FAMILY:-cos-109-lts}
export MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
export MASTER_IMAGE_FAMILY=${KUBE_GCE_MASTER_IMAGE_FAMILY:-${IMAGE_FAMILY}}
export MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
Expand Down
2 changes: 1 addition & 1 deletion cluster/gce/config-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ ALLOWED_NOTREADY_NODES=${ALLOWED_NOTREADY_NODES:-$(($(get-num-nodes) / 100))}
# By default, the latest image from the image family will be used unless an
# explicit image will be set.
GCI_VERSION=${KUBE_GCI_VERSION:-}
IMAGE_FAMILY=${KUBE_IMAGE_FAMILY:-cos-97-lts}
IMAGE_FAMILY=${KUBE_IMAGE_FAMILY:-cos-109-lts}
export MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-}
export MASTER_IMAGE_FAMILY=${KUBE_GCE_MASTER_IMAGE_FAMILY:-${IMAGE_FAMILY}}
export MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-cos-cloud}
Expand Down
6 changes: 3 additions & 3 deletions pkg/proxy/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func canUseTopology(endpoints []Endpoint, svcInfo ServicePort, nodeLabels map[st

zone, ok := nodeLabels[v1.LabelTopologyZone]
if !ok || zone == "" {
klog.InfoS("Skipping topology aware endpoint filtering since node is missing label", "label", v1.LabelTopologyZone)
klog.V(2).InfoS("Skipping topology aware endpoint filtering since node is missing label", "label", v1.LabelTopologyZone)
return false
}

Expand All @@ -166,7 +166,7 @@ func canUseTopology(endpoints []Endpoint, svcInfo ServicePort, nodeLabels map[st
continue
}
if endpoint.GetZoneHints().Len() == 0 {
klog.InfoS("Skipping topology aware endpoint filtering since one or more endpoints is missing a zone hint")
klog.V(2).InfoS("Skipping topology aware endpoint filtering since one or more endpoints is missing a zone hint", "endpoint", endpoint)
return false
}

Expand All @@ -176,7 +176,7 @@ func canUseTopology(endpoints []Endpoint, svcInfo ServicePort, nodeLabels map[st
}

if !hasEndpointForZone {
klog.InfoS("Skipping topology aware endpoint filtering since no hints were provided for zone", "zone", zone)
klog.V(2).InfoS("Skipping topology aware endpoint filtering since no hints were provided for zone", "zone", zone)
return false
}

Expand Down
26 changes: 16 additions & 10 deletions pkg/scheduler/schedule_one.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,14 +422,18 @@ func (sched *Scheduler) schedulePod(ctx context.Context, fwk framework.Framework
// filter plugins and filter extenders.
func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.Framework, state *framework.CycleState, pod *v1.Pod) ([]*v1.Node, framework.Diagnosis, error) {
logger := klog.FromContext(ctx)
diagnosis := framework.Diagnosis{
NodeToStatusMap: make(framework.NodeToStatusMap),
UnschedulablePlugins: sets.New[string](),
}

allNodes, err := sched.nodeInfoSnapshot.NodeInfos().List()
if err != nil {
return nil, diagnosis, err
return nil, framework.Diagnosis{
NodeToStatusMap: make(framework.NodeToStatusMap),
UnschedulablePlugins: sets.New[string](),
}, err
}

diagnosis := framework.Diagnosis{
NodeToStatusMap: make(framework.NodeToStatusMap, len(allNodes)),
UnschedulablePlugins: sets.New[string](),
}
// Run "prefilter" plugins.
preRes, s := fwk.RunPreFilterPlugins(ctx, state, pod)
Expand Down Expand Up @@ -470,12 +474,14 @@ func (sched *Scheduler) findNodesThatFitPod(ctx context.Context, fwk framework.F
nodes := allNodes
if !preRes.AllNodes() {
nodes = make([]*framework.NodeInfo, 0, len(preRes.NodeNames))
for n := range preRes.NodeNames {
nInfo, err := sched.nodeInfoSnapshot.NodeInfos().Get(n)
if err != nil {
return nil, diagnosis, err
for _, n := range allNodes {
if !preRes.NodeNames.Has(n.Node().Name) {
// We consider Nodes that are filtered out by PreFilterResult as rejected via UnschedulableAndUnresolvable.
// We have to record them in NodeToStatusMap so that they won't be considered as candidates in the preemption.
diagnosis.NodeToStatusMap[n.Node().Name] = framework.NewStatus(framework.UnschedulableAndUnresolvable, "node is filtered out by the prefilter result")
continue
}
nodes = append(nodes, nInfo)
nodes = append(nodes, n)
}
}
feasibleNodes, err := sched.findNodesThatPassFilters(ctx, fwk, state, pod, diagnosis, nodes)
Expand Down
2 changes: 1 addition & 1 deletion pkg/scheduler/schedule_one_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2213,7 +2213,7 @@ func TestSchedulerSchedulePod(t *testing.T) {
nodes: []string{"node1", "node2", "node3"},
pod: st.MakePod().Name("test-prefilter").UID("test-prefilter").Obj(),
wantNodes: sets.New("node2"),
wantEvaluatedNodes: pointer.Int32(1),
wantEvaluatedNodes: pointer.Int32(3),
},
{
name: "test prefilter plugin returning non-intersecting nodes",
Expand Down
14 changes: 14 additions & 0 deletions pkg/scheduler/testing/wrappers.go
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,20 @@ func (p *PersistentVolumeWrapper) HostPathVolumeSource(src *v1.HostPathVolumeSou
return p
}

// NodeAffinityIn creates a HARD node affinity (with the operator In)
// and injects into the pv.
func (p *PersistentVolumeWrapper) NodeAffinityIn(key string, vals []string) *PersistentVolumeWrapper {
if p.Spec.NodeAffinity == nil {
p.Spec.NodeAffinity = &v1.VolumeNodeAffinity{}
}
if p.Spec.NodeAffinity.Required == nil {
p.Spec.NodeAffinity.Required = &v1.NodeSelector{}
}
nodeSelector := MakeNodeSelector().In(key, vals).Obj()
p.Spec.NodeAffinity.Required.NodeSelectorTerms = append(p.Spec.NodeAffinity.Required.NodeSelectorTerms, nodeSelector.NodeSelectorTerms...)
return p
}

// ResourceClaimWrapper wraps a ResourceClaim inside.
type ResourceClaimWrapper struct{ resourcev1alpha2.ResourceClaim }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ var (
},
[]string{"endpoint"},
)
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"cluster"}, nil, compbasemetrics.ALPHA, "")
storageSizeDescription = compbasemetrics.NewDesc("apiserver_storage_size_bytes", "Size of the storage database file physically allocated in bytes.", []string{"storage_cluster_id"}, nil, compbasemetrics.ALPHA, "")
storageMonitor = &monitorCollector{monitorGetter: func() ([]Monitor, error) { return nil, nil }}
etcdEventsReceivedCounts = compbasemetrics.NewCounterVec(
&compbasemetrics.CounterOpts{
Expand Down Expand Up @@ -274,21 +274,21 @@ func (c *monitorCollector) CollectWithStability(ch chan<- compbasemetrics.Metric
}

for i, m := range monitors {
cluster := fmt.Sprintf("etcd-%d", i)
storageClusterID := fmt.Sprintf("etcd-%d", i)

klog.V(4).InfoS("Start collecting storage metrics", "cluster", cluster)
klog.V(4).InfoS("Start collecting storage metrics", "storage_cluster_id", storageClusterID)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
metrics, err := m.Monitor(ctx)
cancel()
m.Close()
if err != nil {
klog.InfoS("Failed to get storage metrics", "cluster", cluster, "err", err)
klog.InfoS("Failed to get storage metrics", "storage_cluster_id", storageClusterID, "err", err)
continue
}

metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), cluster)
metric, err := compbasemetrics.NewConstMetric(storageSizeDescription, compbasemetrics.GaugeValue, float64(metrics.Size), storageClusterID)
if err != nil {
klog.ErrorS(err, "Failed to create metric", "cluster", cluster)
klog.ErrorS(err, "Failed to create metric", "storage_cluster_id", storageClusterID)
}
ch <- metric
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ func TestStorageSizeCollector(t *testing.T) {
err: nil,
want: `# HELP apiserver_storage_size_bytes [ALPHA] Size of the storage database file physically allocated in bytes.
# TYPE apiserver_storage_size_bytes gauge
apiserver_storage_size_bytes{cluster="etcd-0"} 1e+09
apiserver_storage_size_bytes{storage_cluster_id="etcd-0"} 1e+09
`,
},
{
Expand Down
7 changes: 6 additions & 1 deletion staging/src/k8s.io/cloud-provider/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ func DefaultLoadBalancerName(service *v1.Service) string {
}

// GetInstanceProviderID builds a ProviderID for a node in a cloud.
// Note that if the instance does not exist, we must return ("", cloudprovider.InstanceNotFound)
// cloudprovider.InstanceNotFound should NOT be returned for instances that exist but are stopped/sleeping
func GetInstanceProviderID(ctx context.Context, cloud Interface, nodeName types.NodeName) (string, error) {
instances, ok := cloud.Instances()
if !ok {
Expand All @@ -108,8 +110,11 @@ func GetInstanceProviderID(ctx context.Context, cloud Interface, nodeName types.
if err == NotImplemented {
return "", err
}
if err == InstanceNotFound {
return "", err
}

return "", fmt.Errorf("failed to get instance ID from cloud provider: %v", err)
return "", fmt.Errorf("failed to get instance ID from cloud provider: %w", err)
}
return cloud.ProviderName() + "://" + instanceID, nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func (c *CloudNodeLifecycleController) MonitorNodes(ctx context.Context) {

// At this point the node has NotReady status, we need to check if the node has been removed
// from the cloud provider. If node cannot be found in cloudprovider, then delete the node
exists, err := ensureNodeExistsByProviderID(ctx, c.cloud, node)
exists, err := c.ensureNodeExistsByProviderID(ctx, node)
if err != nil {
klog.Errorf("error checking if node %s exists: %v", node.Name, err)
continue
Expand Down Expand Up @@ -180,7 +180,7 @@ func (c *CloudNodeLifecycleController) MonitorNodes(ctx context.Context) {
// Node exists. We need to check this to get taint working in similar in all cloudproviders
// current problem is that shutdown nodes are not working in similar way ie. all cloudproviders
// does not delete node from kubernetes cluster when instance it is shutdown see issue #46442
shutdown, err := shutdownInCloudProvider(ctx, c.cloud, node)
shutdown, err := c.shutdownInCloudProvider(ctx, node)
if err != nil {
klog.Errorf("error checking if node %s is shutdown: %v", node.Name, err)
}
Expand All @@ -196,18 +196,49 @@ func (c *CloudNodeLifecycleController) MonitorNodes(ctx context.Context) {
}
}

// getProviderID returns the provider ID for the node. If Node CR has no provider ID,
// it will be the one from the cloud provider.
func (c *CloudNodeLifecycleController) getProviderID(ctx context.Context, node *v1.Node) (string, error) {
if node.Spec.ProviderID != "" {
return node.Spec.ProviderID, nil
}

if instanceV2, ok := c.cloud.InstancesV2(); ok {
metadata, err := instanceV2.InstanceMetadata(ctx, node)
if err != nil {
return "", err
}
return metadata.ProviderID, nil
}

providerID, err := cloudprovider.GetInstanceProviderID(ctx, c.cloud, types.NodeName(node.Name))
if err != nil {
return "", err
}

return providerID, nil
}

// shutdownInCloudProvider returns true if the node is shutdown on the cloud provider
func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
if instanceV2, ok := cloud.InstancesV2(); ok {
func (c *CloudNodeLifecycleController) shutdownInCloudProvider(ctx context.Context, node *v1.Node) (bool, error) {
if instanceV2, ok := c.cloud.InstancesV2(); ok {
return instanceV2.InstanceShutdown(ctx, node)
}

instances, ok := cloud.Instances()
instances, ok := c.cloud.Instances()
if !ok {
return false, errors.New("cloud provider does not support instances")
}

shutdown, err := instances.InstanceShutdownByProviderID(ctx, node.Spec.ProviderID)
providerID, err := c.getProviderID(ctx, node)
if err != nil {
if err == cloudprovider.InstanceNotFound {
return false, nil
}
return false, err
}

shutdown, err := instances.InstanceShutdownByProviderID(ctx, providerID)
if err == cloudprovider.NotImplemented {
return false, nil
}
Expand All @@ -216,32 +247,22 @@ func shutdownInCloudProvider(ctx context.Context, cloud cloudprovider.Interface,
}

// ensureNodeExistsByProviderID checks if the instance exists by the provider id,
// If provider id in spec is empty it calls instanceId with node name to get provider id
func ensureNodeExistsByProviderID(ctx context.Context, cloud cloudprovider.Interface, node *v1.Node) (bool, error) {
if instanceV2, ok := cloud.InstancesV2(); ok {
func (c *CloudNodeLifecycleController) ensureNodeExistsByProviderID(ctx context.Context, node *v1.Node) (bool, error) {
if instanceV2, ok := c.cloud.InstancesV2(); ok {
return instanceV2.InstanceExists(ctx, node)
}

instances, ok := cloud.Instances()
instances, ok := c.cloud.Instances()
if !ok {
return false, errors.New("instances interface not supported in the cloud provider")
}

providerID := node.Spec.ProviderID
if providerID == "" {
var err error
providerID, err = instances.InstanceID(ctx, types.NodeName(node.Name))
if err != nil {
if err == cloudprovider.InstanceNotFound {
return false, nil
}
return false, err
}

if providerID == "" {
klog.Warningf("Cannot find valid providerID for node name %q, assuming non existence", node.Name)
providerID, err := c.getProviderID(ctx, node)
if err != nil {
if err == cloudprovider.InstanceNotFound {
return false, nil
}
return false, err
}

return instances.InstanceExistsByProviderID(ctx, providerID)
Expand Down
Loading

0 comments on commit 447cc96

Please sign in to comment.