Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added new option enableProvidedByTopology #780

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions chart/templates/controller/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ spec:
key: {{ .Values.controller.hcloudToken.existingSecret.key }}
{{- end }}
{{- end }}
{{- if .Values.global.enableProvidedByTopology}}
- name: ENABLE_PROVIDED_BY_TOPOLOGY
value: "t"
{{ end }}
{{- if .Values.controller.extraEnvVars }}
{{- include "common.tplvalues.render" (dict "value" .Values.controller.extraEnvVars "context" $) | nindent 12 }}
{{- end }}
Expand Down
8 changes: 8 additions & 0 deletions chart/templates/core/storageclass.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{ $global := .Values.global }}
{{- if .Values.storageClasses }}
{{- range $key, $val := .Values.storageClasses }}
kind: StorageClass
Expand All @@ -10,6 +11,13 @@ provisioner: csi.hetzner.cloud
volumeBindingMode: WaitForFirstConsumer
allowVolumeExpansion: true
reclaimPolicy: {{ $val.reclaimPolicy | quote }}
{{- if $global.enableProvidedByTopology }}
allowedTopologies:
- matchLabelExpressions:
- key: instance.hetzner.cloud/provided-by
values:
- "cloud"
{{- end}}
---
{{- end }}
{{- end }}
4 changes: 4 additions & 0 deletions chart/templates/node/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ spec:
{{- end }}
- name: ENABLE_METRICS
value: {{if .Values.metrics.enabled}}"true"{{ else }}"false"{{end}}
{{- if .Values.global.enableProvidedByTopology}}
- name: ENABLE_PROVIDED_BY_TOPOLOGY
value: "t"
{{ end }}
{{- if .Values.node.extraEnvVars }}
{{- include "common.tplvalues.render" (dict "value" .Values.node.extraEnvVars "context" $) | nindent 12 }}
{{- end }}
Expand Down
4 changes: 4 additions & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ global:
## - myRegistryKeySecretName
##
imagePullSecrets: []
## @param node.enableProvidedByTopology Enables workaround for upstream Kubernetes issue where nodes without the CSI plugin are still considered for scheduling.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Can you link the upstream kubernetes issue here?
  • Can you explain that this setting should not be modified for existing clusters (and why)

##
enableProvidedByTopology: false

## @section Common parameters
##
Expand Down Expand Up @@ -648,6 +651,7 @@ node:
## kubeletDir: /var/lib/k0s/kubelet
## For microk8s:
## kubeletDir: /var/snap/microk8s/common/var/lib/kubelet
##
kubeletDir: /var/lib/kubelet

## @section Other Parameters
Expand Down
4 changes: 4 additions & 0 deletions cmd/aio/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,13 @@ func main() {
volumeResizeService := volumes.NewLinuxResizeService(logger.With("component", "linux-resize-service"))
volumeStatsService := volumes.NewLinuxStatsService(logger.With("component", "linux-stats-service"))

enableProvidedByTopology := app.GetEnableProvidedByTopology()

nodeService := driver.NewNodeService(
logger.With("component", "driver-node-service"),
strconv.FormatInt(serverID, 10),
serverLocation,
enableProvidedByTopology,
volumeMountService,
volumeResizeService,
volumeStatsService,
Expand All @@ -84,6 +87,7 @@ func main() {
logger.With("component", "driver-controller-service"),
volumeService,
serverLocation,
enableProvidedByTopology,
)

// common
Expand Down
3 changes: 3 additions & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func main() {
location = server.Datacenter.Location.Name
}

enableProvidedByTopology := app.GetEnableProvidedByTopology()

volumeService := volumes.NewIdempotentService(
logger.With("component", "idempotent-volume-service"),
api.NewVolumeService(
Expand All @@ -65,6 +67,7 @@ func main() {
logger.With("component", "driver-controller-service"),
volumeService,
location,
enableProvidedByTopology,
)

identityService := driver.NewIdentityService(
Expand Down
3 changes: 3 additions & 0 deletions cmd/node/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ func main() {
volumeStatsService := volumes.NewLinuxStatsService(logger.With("component", "linux-stats-service"))
identityService := driver.NewIdentityService(logger.With("component", "driver-identity-service"))

enableProvidedByTopology := app.GetEnableProvidedByTopology()

nodeService := driver.NewNodeService(
logger.With("component", "driver-node-service"),
strconv.FormatInt(serverID, 10),
serverLocation,
enableProvidedByTopology,
volumeMountService,
volumeResizeService,
volumeStatsService,
Expand Down
12 changes: 10 additions & 2 deletions docs/kubernetes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,17 @@ $ kubectl apply -f https://raw.githubusercontent.com/hetznercloud/csi-driver/v2.

## Integration with Root Servers

Root servers can be part of the cluster, but the CSI plugin doesn't work there.
Root servers can be part of the cluster, but the CSI plugin doesn't work there and the current behaviour of the scheduler can cause Pods to be stuck in `Pending`.

Labels are needed to indicate whether a node is a cloud VM or a dedicated server from Robot. If you are using the `hcloud-cloud-controller-manager` version 1.21.0 or later, these labels are added automatically. Otherwise, you will need to label the nodes manually.
To address this behavior, you can set `enableProvidedByTopology` to `true` in the Helm Chart configuration. This setting prevents pods from being scheduled on nodes — specifically, Robot servers — where Hetzner volumes are unavailable. Enabling this option adds the `instance.hetzner.cloud/provided-by` label to the [allowed topologies](https://kubernetes.io/docs/concepts/storage/storage-classes/#allowed-topologies) section of the storage classes that are created. Additionally, this label is included in the `topologyKeys` section of `csinode` objects, and a node affinity is set up for each persistent volume.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a section that explains the backwards-incompatibility of this change and why users should not modify the setting in existing clusters.



```yaml
global:
enableProvidedByTopology: true
```

To ensure proper topology evaluation, labels are needed to indicate whether a node is a cloud VM or a dedicated server from Robot. If you are using the `hcloud-cloud-controller-manager` version 1.21.0 or later, these labels are added automatically. Otherwise, you will need to label the nodes manually.

### Adding labels manually

Expand Down
9 changes: 9 additions & 0 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ func CreateLogger() *slog.Logger {
return logger
}

// GetEnableProvidedByTopology parses the ENABLE_PROVIDED_BY_TOPOLOGY environment variable and returns false by default.
func GetEnableProvidedByTopology() bool {
var enableProvidedByTopology bool
if featFlag, exists := os.LookupEnv("ENABLE_PROVIDED_BY_TOPOLOGY"); exists {
enableProvidedByTopology, _ = strconv.ParseBool(featFlag)
}
return enableProvidedByTopology
}

// CreateListener creates and binds the unix socket in location specified by the CSI_ENDPOINT environment variable.
func CreateListener() (net.Listener, error) {
endpoint := os.Getenv("CSI_ENDPOINT")
Expand Down
31 changes: 20 additions & 11 deletions internal/driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,23 @@ import (
type ControllerService struct {
proto.UnimplementedControllerServer

logger *slog.Logger
volumeService volumes.Service
location string
logger *slog.Logger
volumeService volumes.Service
location string
enableProvidedByTopology bool
}

func NewControllerService(
logger *slog.Logger,
volumeService volumes.Service,
location string,
enableProvidedByTopology bool,
) *ControllerService {
return &ControllerService{
logger: logger,
volumeService: volumeService,
location: location,
logger: logger,
volumeService: volumeService,
location: location,
enableProvidedByTopology: enableProvidedByTopology,
}
}

Expand Down Expand Up @@ -88,16 +91,22 @@ func (s *ControllerService) CreateVolume(ctx context.Context, req *proto.CreateV
"volume-name", volume.Name,
)

topology := &proto.Topology{
Segments: map[string]string{
TopologySegmentLocation: volume.Location,
},
}

if s.enableProvidedByTopology {
topology.Segments[ProvidedByLabel] = "cloud"
}

resp := &proto.CreateVolumeResponse{
Volume: &proto.Volume{
VolumeId: strconv.FormatInt(volume.ID, 10),
CapacityBytes: volume.SizeBytes(),
AccessibleTopology: []*proto.Topology{
{
Segments: map[string]string{
TopologySegmentLocation: volume.Location,
},
},
topology,
},
VolumeContext: map[string]string{
"fsFormatOptions": req.Parameters["fsFormatOptions"],
Expand Down
8 changes: 8 additions & 0 deletions internal/driver/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func newControllerServiceTestEnv() *controllerServiceTestEnv {
logger,
volumeService,
"testloc",
false,
),
volumeService: volumeService,
}
Expand All @@ -41,6 +42,8 @@ func newControllerServiceTestEnv() *controllerServiceTestEnv {
func TestControllerServiceCreateVolume(t *testing.T) {
env := newControllerServiceTestEnv()

env.service.enableProvidedByTopology = true

env.volumeService.CreateFunc = func(ctx context.Context, opts volumes.CreateOpts) (*csi.Volume, error) {
if opts.Name != "testvol" {
t.Errorf("unexpected name passed to volume service: %s", opts.Name)
Expand Down Expand Up @@ -94,6 +97,11 @@ func TestControllerServiceCreateVolume(t *testing.T) {
if loc := top.Segments[TopologySegmentLocation]; loc != "testloc" {
t.Errorf("unexpected location segment in topology: %s", loc)
}
if env.service.enableProvidedByTopology {
if provider := top.Segments[ProvidedByLabel]; provider != "cloud" {
t.Errorf("unexpected provider segment in topology: %s", provider)
}
}
} else {
t.Errorf("unexpected number of topologies: %d", len(resp.Volume.AccessibleTopology))
}
Expand Down
1 change: 1 addition & 0 deletions internal/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ const (
DefaultVolumeSize = MinVolumeSize

TopologySegmentLocation = PluginName + "/location"
ProvidedByLabel = "instance.hetzner.cloud/provided-by"
)
32 changes: 20 additions & 12 deletions internal/driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,32 @@ import (
type NodeService struct {
proto.UnimplementedNodeServer

logger *slog.Logger
serverID string
serverLocation string
volumeMountService volumes.MountService
volumeResizeService volumes.ResizeService
volumeStatsService volumes.StatsService
logger *slog.Logger
serverID string
serverLocation string
enableProvidedByTopology bool
volumeMountService volumes.MountService
volumeResizeService volumes.ResizeService
volumeStatsService volumes.StatsService
}

func NewNodeService(
logger *slog.Logger,
serverID string,
serverLocation string,
enableProvidedByTopology bool,
volumeMountService volumes.MountService,
volumeResizeService volumes.ResizeService,
volumeStatsService volumes.StatsService,
) *NodeService {
return &NodeService{
logger: logger,
serverID: serverID,
serverLocation: serverLocation,
volumeMountService: volumeMountService,
volumeResizeService: volumeResizeService,
volumeStatsService: volumeStatsService,
logger: logger,
serverID: serverID,
serverLocation: serverLocation,
enableProvidedByTopology: enableProvidedByTopology,
volumeMountService: volumeMountService,
volumeResizeService: volumeResizeService,
volumeStatsService: volumeStatsService,
}
}

Expand Down Expand Up @@ -181,6 +184,11 @@ func (s *NodeService) NodeGetInfo(_ context.Context, _ *proto.NodeGetInfoRequest
},
},
}

if s.enableProvidedByTopology {
resp.AccessibleTopology.Segments[ProvidedByLabel] = "cloud"
}

return resp, nil
}

Expand Down
1 change: 1 addition & 0 deletions internal/driver/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func newNodeServerTestEnv() nodeServiceTestEnv {
testutil.NewNopLogger(),
"1",
"loc",
false,
volumeMountService,
volumeResizeService,
volumeStatsService,
Expand Down
2 changes: 2 additions & 0 deletions internal/driver/sanity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func TestSanity(t *testing.T) {
logger.With("component", "driver-controller-service"),
volumeService,
"testloc",
false,
)

identityService := NewIdentityService(
Expand All @@ -56,6 +57,7 @@ func TestSanity(t *testing.T) {
logger.With("component", "driver-node-service"),
"123456",
"loc",
false,
volumeMountService,
volumeResizeService,
volumeStatsService,
Expand Down