Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/longhorn #1596

Merged
merged 4 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions manifests/claudie/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,18 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
images:
- name: ghcr.io/berops/claudie/ansibler
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/autoscaler-adapter
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/builder
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/claudie-operator
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/kube-eleven
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/kuber
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/manager
newTag: 6928ace-3165
newTag: f95dd2b-3176
- name: ghcr.io/berops/claudie/terraformer
newTag: d06c57d-3175
newTag: f95dd2b-3176
2 changes: 1 addition & 1 deletion manifests/testing-framework/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@ secretGenerator:

images:
- name: ghcr.io/berops/claudie/testing-framework
newTag: d06c57d-3175
newTag: f95dd2b-3176
19 changes: 19 additions & 0 deletions proto/pb/spec/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,22 @@ func (r *TemplateRepository) MustExtractTargetPath() string {
r.Path,
)
}

func (n *NodePool) Zone() string {
var sn string

switch {
case n.GetDynamicNodePool() != nil:
sn = n.GetDynamicNodePool().Provider.SpecName
case n.GetStaticNodePool() != nil:
sn = StaticNodepoolInfo_STATIC_PROVIDER.String()
default:
panic("unsupported nodepool type")
}

if sn == "" {
panic("no zone specified")
}

return fmt.Sprintf("%s-zone", sn)
}
10 changes: 5 additions & 5 deletions services/kuber/server/domain/usecases/patch_nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ func (u *Usecases) PatchNodes(ctx context.Context, request *pb.PatchNodesRequest
return nil, fmt.Errorf("error while patching providerID on nodes for %s : %w", clusterID, err)
}

if err := patcher.PatchLabels(); err != nil {
logger.Err(err).Msgf("Error while patching node labels")
return nil, fmt.Errorf("error while patching labels on nodes for %s : %w", clusterID, err)
}

if err := patcher.PatchAnnotations(); err != nil {
logger.Err(err).Msgf("Error while patching node annotations")
return nil, fmt.Errorf("error while patching annotations on nodes for %s : %w", clusterID, err)
}

if err := patcher.PatchLabels(); err != nil {
logger.Err(err).Msgf("Error while patching node labels")
return nil, fmt.Errorf("error while patching labels on nodes for %s : %w", clusterID, err)
}

if err := patcher.PatchTaints(); err != nil {
logger.Err(err).Msgf("Error while patching node taints")
return nil, fmt.Errorf("error while patching taints on nodes for %s : %w", clusterID, err)
Expand Down
171 changes: 66 additions & 105 deletions services/kuber/server/domain/utils/longhorn/longhorn.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ import (
"github.com/rs/zerolog/log"
)

// Cluster - k8s cluster where longhorn will be set up
// Directory - directory where to create storage class manifest
type Longhorn struct {
Cluster *spec.K8Scluster
// Cluster where longhorn will be set up
Cluster *spec.K8Scluster
// Directory where to create storage class manifest
Directory string
}

Expand All @@ -37,191 +37,152 @@ type enableCA struct {
const (
longhornYaml = "services/kuber/server/manifests/longhorn.yaml"
longhornDefaultsYaml = "services/kuber/server/manifests/claudie-defaults.yaml"
longhornEnableCaTpl = "enable-ca.goyaml"
storageManifestTpl = "storage-class.goyaml"
defaultSC = "longhorn"
storageClassLabel = "claudie.io/provider-instance"
storageClassLabel = "claudie.io/storage-class"
)

// SetUp function will set up the longhorn on the k8s cluster saved in l.Longhorn
func (l *Longhorn) SetUp() error {
kubectl := kubectl.Kubectl{Kubeconfig: l.Cluster.GetKubeconfig(), MaxKubectlRetries: 3}
// apply longhorn.yaml and settings
kubectl.Stdout = comm.GetStdOut(l.Cluster.ClusterInfo.Id())
kubectl.Stderr = comm.GetStdErr(l.Cluster.ClusterInfo.Id())

// Apply longhorn manifests after nodes are annotated.
if err := l.applyManifests(kubectl); err != nil {
return fmt.Errorf("error while applying longhorn manifests in cluster %s : %w", l.Cluster.ClusterInfo.Name, err)
k := kubectl.Kubectl{
Kubeconfig: l.Cluster.GetKubeconfig(),
MaxKubectlRetries: 3,
}
k.Stdout = comm.GetStdOut(l.Cluster.ClusterInfo.Id())
k.Stderr = comm.GetStdErr(l.Cluster.ClusterInfo.Id())

//get existing sc so we can delete them if we do not need them any more
existingSC, err := l.getStorageClasses(kubectl)
current, err := l.currentClaudieStorageClasses(k)
if err != nil {
return fmt.Errorf("error while getting existing storage classes for %s : %w", l.Cluster.ClusterInfo.Name, err)
}
//save applied sc so we can find a difference with existing ones and remove the redundant ones
var appliedSC []string

//load the templates
template := templateUtils.Templates{Directory: l.Directory}
if err := k.KubectlApply(longhornYaml); err != nil {
return fmt.Errorf("error while applying longhorn.yaml in %s : %w", l.Directory, err)
}

if err := k.KubectlApply(longhornDefaultsYaml); err != nil {
return fmt.Errorf("error while applying claudie default settings for longhorn in %s : %w", l.Directory, err)
}

template := templateUtils.Templates{
Directory: l.Directory,
}

storageTpl, err := templateUtils.LoadTemplate(templates.StorageClassTemplate)
if err != nil {
return err
}

enableCATpl, err := templateUtils.LoadTemplate(templates.EnableClusterAutoscalerTemplate)
if err != nil {
return err
}

// Apply setting about CA
enableCa := enableCA{fmt.Sprintf("%v", l.Cluster.AnyAutoscaledNodePools())}
if setting, err := template.GenerateToString(enableCATpl, enableCa); err != nil {
return err
} else if err := kubectl.KubectlApplyString(setting); err != nil {
return fmt.Errorf("error while applying CA setting for longhorn in cluster %s : %w", l.Cluster.ClusterInfo.Name, err)
ca := enableCA{
fmt.Sprintf("%v", l.Cluster.AnyAutoscaledNodePools()),
}

// get real nodes names in a case when provider appends some string to the set name
realNodesInfo, err := kubectl.KubectlGetNodeNames()
setting, err := template.GenerateToString(enableCATpl, ca)
if err != nil {
return err
}
realNodeNames := strings.Split(string(realNodesInfo), "\n")
// tag nodes based on the zones
for providerInstance, nps := range nodepools.ByProviderSpecName(l.Cluster.ClusterInfo.NodePools) {
zoneName := sanitise.String(fmt.Sprintf("%s-zone", providerInstance))
storageClassName := fmt.Sprintf("longhorn-%s", zoneName)
//flag to determine whether we need to create storage class or not
isWorkerNodeProvider := false

if err := k.KubectlApplyString(setting); err != nil {
return fmt.Errorf("error while applying CA setting for longhorn in cluster %s: %w", l.Cluster.ClusterInfo.Name, err)
}

var desired []string
for provider, nps := range nodepools.ByProviderSpecName(l.Cluster.ClusterInfo.NodePools) {
wk := false
for _, np := range nps {
// tag worker nodes from nodepool based on the future zone
// NOTE: the master nodes are by default set to NoSchedule, therefore we do not need to annotate them
// If in the future, we add functionality to allow scheduling on master nodes, the longhorn will need add the annotation
if !np.IsControl {
isWorkerNodeProvider = true
for _, node := range np.GetNodes() {
nodeName := strings.TrimPrefix(node.Name, fmt.Sprintf("%s-", l.Cluster.ClusterInfo.Id()))
annotation := fmt.Sprintf("node.longhorn.io/default-node-tags='[\"%s\"]'", zoneName)

i := slices.Index(realNodeNames, nodeName)
if i < 0 {
log.Warn().Str("cluster", l.Cluster.ClusterInfo.Id()).Msgf("Node %s was not found in cluster %v", nodeName, realNodeNames)
continue
}
// Add tag to the node via kubectl annotate, use --overwrite to avoid getting error of already tagged node
if err := kubectl.KubectlAnnotate("node", realNodeNames[i], annotation, "--overwrite"); err != nil {
return fmt.Errorf("error while annotating the node %s from cluster %s via kubectl annotate : %w", realNodeNames[i], l.Cluster.ClusterInfo.Name, err)
}
}
wk = true
break
}
}
if isWorkerNodeProvider {
// create storage class manifest based on zones from templates
zoneData := zoneData{ZoneName: zoneName, StorageClassName: storageClassName}
manifest := fmt.Sprintf("%s.yaml", storageClassName)
err := template.Generate(storageTpl, manifest, zoneData)
if err != nil {

if wk {
zn := sanitise.String(fmt.Sprintf("%s-zone", provider))
sc := fmt.Sprintf("longhorn-%s", zn)
manifest := fmt.Sprintf("%s.yaml", sc)
data := zoneData{
ZoneName: zn,
StorageClassName: sc,
}

if err := template.Generate(storageTpl, manifest, data); err != nil {
return fmt.Errorf("error while generating %s manifest : %w", manifest, err)
}
//update the kubectl working directory
kubectl.Directory = l.Directory
// apply manifest
err = kubectl.KubectlApply(manifest, "")
if err != nil {

k.Directory = l.Directory
if err := k.KubectlApply(manifest, ""); err != nil {
return fmt.Errorf("error while applying %s manifest : %w", manifest, err)
}
appliedSC = append(appliedSC, storageClassName)
desired = append(desired, sc)
}
}

err = l.deleteOldStorageClasses(existingSC, appliedSC, kubectl)
if err != nil {
if err := l.deleteUnused(current, desired, k); err != nil {
return err
}

// Clean up
if err := os.RemoveAll(l.Directory); err != nil {
return fmt.Errorf("error while deleting files %s : %w", l.Directory, err)
}

return nil
}

// getStorageClasses returns a slice of names of storage classes currently deployed in cluster
// returns slice of storage classes if successful, error otherwise
func (l *Longhorn) getStorageClasses(kc kubectl.Kubectl) (result []string, err error) {
//define output struct
// currentClaudieStorageClasses returns a slice of names of claudie related storage classes currently deployed in cluster
func (l *Longhorn) currentClaudieStorageClasses(kc kubectl.Kubectl) (result []string, err error) {
type KubectlOutputJSON struct {
APIVersion string `json:"apiVersion"`
Items []map[string]interface{} `json:"items"`
Kind string `json:"kind"`
Metadata map[string]interface{} `json:"metadata"`
}
//get existing storage classes

out, err := kc.KubectlGet("sc", "-o", "json")
if err != nil {
return nil, fmt.Errorf("error while getting storage classes from cluster %s : %w", l.Cluster.ClusterInfo.Name, err)
}
//no storage class defined yet

if strings.Contains(string(out), "No resources found") {
return result, nil
}
//parse output

var parsedJSON KubectlOutputJSON
err = json.Unmarshal(out, &parsedJSON)
if err != nil {
if err := json.Unmarshal(out, &parsedJSON); err != nil {
return nil, fmt.Errorf("error while unmarshalling kubectl output for cluster %s : %w", l.Cluster.ClusterInfo.Name, err)
}
//return name of the storage classes

for _, sc := range parsedJSON.Items {
metadata := sc["metadata"].(map[string]interface{})
name := metadata["name"].(string)
//check if storage class has a claudie label

if labels, ok := metadata["labels"]; ok {
labelsMap := labels.(map[string]interface{})
if _, ok := labelsMap[storageClassLabel]; ok {
result = append(result, name)
}
}
}

return result, nil
}

// deleteOldStorageClasses deletes storage classes, which does not have a worker nodes behind it
func (l *Longhorn) deleteOldStorageClasses(existing, applied []string, kc kubectl.Kubectl) error {
// deleteUnused deleted unused storage classes previously created by claudie.
func (l *Longhorn) deleteUnused(existing, applied []string, kc kubectl.Kubectl) error {
for _, ex := range existing {
if ex == defaultSC {
//ignore the default sc
continue
}
found := false
for _, app := range applied {
if ex == app {
found = true
break
}
}
//if not found in applied, delete the sc
if !found {
err := kc.KubectlDeleteResource("sc", ex)
if !slices.Contains(applied, ex) {
log.Debug().Msgf("Deleting storage class %s", ex)
if err != nil {
if err := kc.KubectlDeleteResource("sc", ex); err != nil {
return fmt.Errorf("error while deleting storage class %s due to no nodes backing it : %w", ex, err)
}
}
}
return nil
}

// applyManifests applies longhorn manifests to the managed cluster.
func (l *Longhorn) applyManifests(kc kubectl.Kubectl) error {
// Apply longhorn.yaml
if err := kc.KubectlApply(longhornYaml); err != nil {
return fmt.Errorf("error while applying longhorn.yaml in %s : %w", l.Directory, err)
}
// Apply longhorn setting
if err := kc.KubectlApply(longhornDefaultsYaml); err != nil {
return fmt.Errorf("error while applying claudie default settings for longhorn in %s : %w", l.Directory, err)
}
return nil
}
Loading