Skip to content

Commit

Permalink
SKS-2345: Add support for system disk expansion (#169)
Browse files Browse the repository at this point in the history
  • Loading branch information
haijianyang authored Apr 22, 2024
1 parent 9ffaf7e commit 939f02d
Show file tree
Hide file tree
Showing 50 changed files with 4,142 additions and 45 deletions.
23 changes: 23 additions & 0 deletions api/v1beta1/conditions_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,29 @@ const (
// WaitingForAvailableHostWithEnoughGPUsReason (Severity=Info) documents an ElfMachine
// waiting for an available host with enough GPUs to create VM.
WaitingForAvailableHostWithEnoughGPUsReason = "WaitingForAvailableHostWithEnoughGPUs"

// ResourcesHotUpdatedCondition documents the status of the hot updating resources of a VM.
ResourcesHotUpdatedCondition = "ResourceHotUpdated"

// WaitingForResourcesHotUpdateReason (Severity=Info) documents an ElfMachine waiting for updating resources.
WaitingForResourcesHotUpdateReason = "WaitingForResourcesHotUpdate"

// ExpandingVMDiskReason documents (Severity=Info) ElfMachine currently executing the expand disk operation.
ExpandingVMDiskReason = "ExpandingVMDisk"

// ExpandingVMDiskFailedReason (Severity=Warning) documents an ElfMachine controller detecting
// an error while expanding disk; those kind of errors are usually transient and failed updating
// are automatically re-tried by the controller.
ExpandingVMDiskFailedReason = "ExpandingVMDiskFailed"

// ExpandingRootPartitionReason documents (Severity=Info) ElfMachine currently executing the
// adding new disk capacity to root directory operation.
ExpandingRootPartitionReason = "ExpandingRootPartition"

// ExpandingRootPartitionFailedReason (Severity=Warning) documents an ElfMachine controller
// detecting an error while adding new disk capacity to root directory; those kind of errors are
// usually transient and failed updating are automatically re-tried by the controller.
ExpandingRootPartitionFailedReason = "ExpandingRootPartitionFailed"
)

// Conditions and Reasons related to make connections to a Tower. Can currently be used by ElfCluster and ElfMachine
Expand Down
47 changes: 47 additions & 0 deletions api/v1beta1/elfmachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
capierrors "sigs.k8s.io/cluster-api/errors"
"sigs.k8s.io/cluster-api/util/conditions"
)

const (
Expand All @@ -37,6 +38,9 @@ const (

// VMDisconnectionTimestampAnnotation is the annotation identifying the VM of ElfMachine disconnection time.
VMDisconnectionTimestampAnnotation = "cape.infrastructure.cluster.x-k8s.io/vm-disconnection-timestamp"

// VMFirstBootTimestampAnnotation is the annotation identifying the VM of ElfMachine first power on time.
VMFirstBootTimestampAnnotation = "cape.infrastructure.cluster.x-k8s.io/vm-first-boot-timestamp"
)

// ElfMachineSpec defines the desired state of ElfMachine.
Expand Down Expand Up @@ -124,6 +128,10 @@ type ElfMachineStatus struct {
// +optional
GPUDevices []GPUStatus `json:"gpuDevices,omitempty"`

// Resources records the resources allocated for the machine.
// +optional
Resources ResourcesStatus `json:"resources,omitempty"`

// FailureReason will be set in the event that there is a terminal problem
// reconciling the Machine and will contain a succinct value suitable
// for machine interpretation.
Expand Down Expand Up @@ -241,6 +249,16 @@ func (m *ElfMachine) IsFailed() bool {
return m.Status.FailureReason != nil || m.Status.FailureMessage != nil
}

// IsHotUpdating returns whether the machine is being hot updated.
func (m *ElfMachine) IsHotUpdating() bool {
if conditions.Has(m, ResourcesHotUpdatedCondition) &&
conditions.IsFalse(m, ResourcesHotUpdatedCondition) {
return true
}

return false
}

func (m *ElfMachine) SetVMDisconnectionTimestamp(timestamp *metav1.Time) {
if m.Annotations == nil {
m.Annotations = make(map[string]string)
Expand Down Expand Up @@ -318,6 +336,35 @@ func (m *ElfMachine) GetVMDisconnectionTimestamp() *metav1.Time {
return nil
}

func (m *ElfMachine) SetVMFirstBootTimestamp(timestamp *metav1.Time) {
annotations := m.GetAnnotations()
if annotations == nil {
annotations = map[string]string{}
}
m.Annotations[VMFirstBootTimestampAnnotation] = timestamp.Format(time.RFC3339)
m.SetAnnotations(annotations)
}

func (m *ElfMachine) GetVMFirstBootTimestamp() *metav1.Time {
if m.Annotations == nil {
return nil
}

if _, ok := m.Annotations[VMFirstBootTimestampAnnotation]; ok {
timestampAnnotation := m.Annotations[VMFirstBootTimestampAnnotation]
timestamp, err := time.Parse(time.RFC3339, timestampAnnotation)
if err != nil {
return nil
}

firstBootTimestamp := metav1.NewTime(timestamp)

return &firstBootTimestamp
}

return nil
}

func (m *ElfMachine) RequiresGPUDevices() bool {
return m.RequiresPassThroughGPUDevices() || m.RequiresVGPUDevices()
}
Expand Down
5 changes: 5 additions & 0 deletions api/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ type GPUStatus struct {
Name string `json:"name,omitempty"`
}

// ResourcesStatus records the resources allocated to the virtual machine.
type ResourcesStatus struct {
Disk int32 `json:"disk,omitempty"`
}

//+kubebuilder:object:generate=false

// PatchStringValue is for patching resources.
Expand Down
16 changes: 16 additions & 0 deletions api/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,13 @@ spec:
ready:
description: Ready is true when the provider resource is ready.
type: boolean
resources:
description: Resources records the resources allocated for the machine.
properties:
disk:
format: int32
type: integer
type: object
taskRef:
description: TaskRef is a managed object reference to a Task related
to the machine. This value is set automatically at runtime and should
Expand Down
153 changes: 153 additions & 0 deletions config/crd/bases/kubesmart.smtx.io_hostconfigs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.12.0
name: hostconfigs.kubesmart.smtx.io
spec:
group: kubesmart.smtx.io
names:
categories:
- kubesmart
kind: HostConfig
listKind: HostConfigList
plural: hostconfigs
shortNames:
- hc
singular: hostconfig
scope: Namespaced
versions:
- additionalPrinterColumns:
- description: the current phase of HostConfig
jsonPath: .status.phase
name: Phase
type: string
- description: the last execution time
jsonPath: .status.lastExecutionTime
name: LastExecutionTime
type: string
- description: Time duration since creation of HostConfig
jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: HostConfig is the Schema for the HostConfig API.
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
of an object. Servers should convert recognized schemas to the latest
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
type: string
kind:
description: 'Kind is a string value representing the REST resource this
object represents. Servers may infer this from the endpoint the client
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
type: string
metadata:
type: object
spec:
properties:
config:
properties:
ansible:
description: Ansible 通过 ansible playbook 完成配置
properties:
localPlaybook:
description: LocalPlaybook 本地的 playbook,单个 yaml 文件, secret
引用或者 yaml 字符串
properties:
content:
description: Content is the inline yaml text.
format: yaml
type: string
secretRef:
description: SecretRef specifies the secret which stores
yaml text.
properties:
name:
description: name is unique within a namespace to
reference a secret resource.
type: string
namespace:
description: namespace defines the space within which
the secret name must be unique.
type: string
type: object
x-kubernetes-map-type: atomic
type: object
remotePlaybook:
description: RemotePlaybook 在远端的 playbook,单个 .tar.gz 压缩包,内容可以是单个
yaml 文件,也可以符合 ansible 要求的目录
properties:
md5sum:
description: MD5sum 压缩包的 MD5,填写了会进行校验,已经下载过的 playbook
校验通过后跳过重复下载
type: string
name:
description: Name 要执行的 playbook 文件名,相对于压缩包顶层的位置
type: string
url:
description: URL playbook 在远端的地址,支持 https
type: string
required:
- name
- url
type: object
values:
description: Values 执行 playbook 的参数,yaml 格式,可以是 secret 引用或者
yaml 字符串
properties:
content:
description: Content is the inline yaml text.
format: yaml
type: string
secretRef:
description: SecretRef specifies the secret which stores
yaml text.
properties:
name:
description: name is unique within a namespace to
reference a secret resource.
type: string
namespace:
description: namespace defines the space within which
the secret name must be unique.
type: string
type: object
x-kubernetes-map-type: atomic
type: object
type: object
timeout:
description: Timeout 执行一次配置的超时时间
type: string
type: object
nodeName:
type: string
required:
- config
- nodeName
type: object
status:
properties:
failureMessage:
type: string
failureReason:
type: string
lastExecutionTime:
description: LastExecutionTime 最后执行的时间戳
format: date-time
type: string
phase:
description: Phase 当前状态
type: string
required:
- phase
type: object
type: object
served: true
storage: true
subresources:
status: {}
Loading

0 comments on commit 939f02d

Please sign in to comment.