Skip to content

Commit

Permalink
Enabled the property-based scheduling experience
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelawyu committed Apr 1, 2024
1 parent e18a9e0 commit ab8883d
Show file tree
Hide file tree
Showing 14 changed files with 1,022 additions and 40 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,36 @@ jobs:
env:
KUBECONFIG: '/home/runner/.kube/config'
HUB_SERVER_URL: 'https://172.19.0.2:6443'

e2e-tests-with-aks-property-provider:
runs-on: ubuntu-latest
needs: [
detect-noop,
]
if: needs.detect-noop.outputs.noop != 'true'
steps:
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: ${{ env.GO_VERSION }}

- name: Check out code into the Go module directory
uses: actions/checkout@v4

- name: Install Ginkgo CLI
run: |
go install github.com/onsi/ginkgo/v2/[email protected]
- name: Install Kind
# Before updating the kind version to use, verify that the current kind image
# is still supported by the version.
run: |
go install sigs.k8s.io/[email protected]
- name: Run e2e tests
run: |
make e2e-tests
env:
KUBECONFIG: '/home/runner/.kube/config'
HUB_SERVER_URL: 'https://172.19.0.2:6443'
PROPERTY_PROVIDER: 'aks'
2 changes: 2 additions & 0 deletions charts/member-agent/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,7 @@ helm upgrade member-agent member-agent/ --namespace fleet-system
| resources | The resource request/limits for the container image | limits: "2" CPU, 4Gi, requests: 100m CPU, 128Mi |
| namespace | Namespace that this Helm chart is installed on. | `fleet-system` |
| logVerbosity | Log level. Uses V logs (klog) | `3` |
| propertyProvider | The property provider to use with the member agent | `` |
| region | The region where the member cluster resides | `` |

## Contributing Changes
6 changes: 6 additions & 0 deletions charts/member-agent/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ spec:
- -add_dir_header
- --enable-v1alpha1-apis={{ .Values.enableV1Alpha1APIs }}
- --enable-v1beta1-apis={{ .Values.enableV1Beta1APIs }}
{{- if .Values.propertyProvider }}
- --property-provider={{ .Values.propertyProvider }}
{{- end }}
{{- if .Values.region }}
- --region={{ .Values.region }}
{{- end }}
env:
- name: HUB_SERVER_URL
value: "{{ .Values.config.hubURL }}"
Expand Down
35 changes: 30 additions & 5 deletions cmd/memberagent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ import (
"go.goms.io/fleet/pkg/controllers/work"
workv1alpha1controller "go.goms.io/fleet/pkg/controllers/workv1alpha1"
fleetmetrics "go.goms.io/fleet/pkg/metrics"
"go.goms.io/fleet/pkg/propertyprovider"
"go.goms.io/fleet/pkg/propertyprovider/aks"
"go.goms.io/fleet/pkg/utils"
"go.goms.io/fleet/pkg/utils/httpclient"
//+kubebuilder:scaffold:imports
Expand All @@ -63,6 +65,8 @@ var (
leaderElectionNamespace = flag.String("leader-election-namespace", "kube-system", "The namespace in which the leader election resource will be created.")
enableV1Alpha1APIs = flag.Bool("enable-v1alpha1-apis", true, "If set, the agents will watch for the v1alpha1 APIs.")
enableV1Beta1APIs = flag.Bool("enable-v1beta1-apis", false, "If set, the agents will watch for the v1beta1 APIs.")
propertyProvider = flag.String("property-provider", "none", "The property provider to use for the agent.")
region = flag.String("region", "", "The region where the member cluster resides.")
)

func init() {
Expand Down Expand Up @@ -339,14 +343,35 @@ func Start(ctx context.Context, hubCfg, memberConfig *rest.Config, hubOpts, memb
}

klog.Info("Setting up the internalMemberCluster v1beta1 controller")
imcReconciler, err := imcv1beta1.NewReconciler(ctx, hubMgr.GetClient(), memberMgr.GetConfig(), memberMgr.GetClient(), workController, nil)
// Set up a provider provider (if applicable).
var pp propertyprovider.PropertyProvider
switch {
case propertyProvider != nil && *propertyProvider == "aks":
klog.V(2).Info("setting up the AKS property provider")
// Note that the property provider, though initialized here, is not started until
// the specific instance wins the leader election.
pp = aks.New(region)
default:
// Fall back to not using any property provider if the provided type is none or
// not recognizable.
klog.V(2).Info("no property provider is specified, or the given type is not recognizable; start with no property provider")
pp = nil
}

// Set up the IMC controller.
imcReconciler, err := imcv1beta1.NewReconciler(
ctx,
hubMgr.GetClient(),
memberMgr.GetConfig(), memberMgr.GetClient(),
workController,
pp)
if err != nil {
klog.ErrorS(err, "Failed to create v1beta1 controller", "controller", "internalMemberCluster")
return fmt.Errorf("unable to create internalMemberCluster v1beta1 controller: %w", err)
klog.ErrorS(err, "Failed to create internalMemberCluster v1beta1 reconciler")
return fmt.Errorf("failed to create internalMemberCluster v1beta1 reconciler: %w", err)
}
if err := imcReconciler.SetupWithManager(hubMgr); err != nil {
klog.ErrorS(err, "Failed to set up v1beta1 controller with controller manager", "controller", "internalMemberCluster")
return fmt.Errorf("unable to set up internalMemberCluster v1beta1 controller with controller manager: %w", err)
klog.ErrorS(err, "Failed to create v1beta1 controller", "controller", "internalMemberCluster")
return fmt.Errorf("failed to create internalMemberCluster v1beta1 controller: %w", err)
}
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/propertyprovider/aks/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (p *PropertyProvider) Start(ctx context.Context, config *rest.Config) error
return err
}

if p.region == nil {
if p.region == nil || len(*p.region) == 0 {
klog.V(2).Info("Auto-discover region as none has been specified")
// Note that an API reader is passed here for the purpose of auto-discovering region
// information from AKS nodes; at this time the cache from the controller manager
Expand All @@ -134,7 +134,7 @@ func (p *PropertyProvider) Start(ctx context.Context, config *rest.Config) error
}
p.region = discoveredRegion
}
klog.V(2).Infof("Starting with the specified region %s", *p.region)
klog.V(2).Infof("Starting with the region set to %s", *p.region)
pp := trackers.NewAKSKarpenterPricingClient(ctx, *p.region)
p.podTracker = trackers.NewPodTracker()
p.nodeTracker = trackers.NewNodeTracker(pp)
Expand Down
6 changes: 5 additions & 1 deletion test/e2e/framework/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"k8s.io/client-go/tools/clientcmd/api"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/apiutil"

"go.goms.io/fleet/pkg/propertyprovider/aks/trackers"
)

var (
Expand All @@ -33,13 +35,15 @@ type Cluster struct {
PresentingServiceAccountInHubClusterName string
HubURL string
RestMapper meta.RESTMapper
PricingProvider trackers.PricingProvider
}

func NewCluster(name, svcAccountName string, scheme *runtime.Scheme) *Cluster {
func NewCluster(name, svcAccountName string, scheme *runtime.Scheme, pp trackers.PricingProvider) *Cluster {
return &Cluster{
Scheme: scheme,
ClusterName: name,
PresentingServiceAccountInHubClusterName: svcAccountName,
PricingProvider: pp,
}
}

Expand Down
9 changes: 9 additions & 0 deletions test/e2e/kindconfigs/cluster-1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Important: due to kind limitations
# (kind node always has resource capacity == host resource capacity) and the way our planned test
# cases (resource and non-resource properties) are designed, modification of this setup might lead
# to test failures.
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
10 changes: 10 additions & 0 deletions test/e2e/kindconfigs/cluster-2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Important: due to kind limitations
# (kind node always has resource capacity == host resource capacity) and the way our planned test
# cases (resource and non-resource properties) are designed, modification of this setup might lead
# to test failures.
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
11 changes: 11 additions & 0 deletions test/e2e/kindconfigs/cluster-3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Important: due to kind limitations
# (kind node always has resource capacity == host resource capacity) and the way our planned test
# cases (resource and non-resource properties) are designed, modification of this setup might lead
# to test failures.
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker
Loading

0 comments on commit ab8883d

Please sign in to comment.