Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improve emulator cloud provider reliability #16

Merged
merged 1 commit into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-04-23T19:38:57Z by kres 0610b40-dirty.
# Generated on 2024-08-14T14:55:16Z by kres 7be2a05.

_out
hack/compose/docker-compose.override.yml
hack/compose/docker-compose-provider.override.yml
72 changes: 72 additions & 0 deletions .kres.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: common.Build
spec:
ignoredPaths:
- "hack/compose/docker-compose.override.yml"
- "hack/compose/docker-compose-provider.override.yml"
---
kind: service.CodeCov
spec:
Expand All @@ -24,6 +25,10 @@ spec:
toplevel: true
- name: docker-compose-down
toplevel: true
- name: docker-compose-provider-up
toplevel: true
- name: docker-compose-provider-down
toplevel: true
---
kind: custom.Step
name: docker-compose-up
Expand Down Expand Up @@ -91,3 +96,70 @@ spec:
DOCKER_BUILDKIT=1
GO_LDFLAGS="$(GO_LDFLAGS)"
docker compose -p talemu --file ./hack/compose/docker-compose.yml --file ./hack/compose/docker-compose.override.yml down --rmi local --remove-orphans --volumes=$(REMOVE_VOLUMES)
---
kind: custom.Step
name: docker-compose-provider-up
spec:
makefile:
enabled: true
phony: true
script:
- >-
ARTIFACTS="$(ARTIFACTS)"
SHA="$(SHA)"
TAG="$(TAG)"
USERNAME="$(USERNAME)"
REGISTRY="$(REGISTRY)"
PROTOBUF_TS_VERSION="$(PROTOBUF_TS_VERSION)"
NODE_BUILD_ARGS="$(NODE_BUILD_ARGS)"
TOOLCHAIN="$(TOOLCHAIN)"
CGO_ENABLED="$(CGO_ENABLED)"
GO_BUILDFLAGS="$(GO_BUILDFLAGS)"
GOLANGCILINT_VERSION="$(GOLANGCILINT_VERSION)"
GOFUMPT_VERSION="$(GOFUMPT_VERSION)"
GOIMPORTS_VERSION="$(GOIMPORTS_VERSION)"
PROTOBUF_GO_VERSION="$(PROTOBUF_GO_VERSION)"
GRPC_GO_VERSION="$(GRPC_GO_VERSION)"
GRPC_GATEWAY_VERSION="$(GRPC_GATEWAY_VERSION)"
VTPROTOBUF_VERSION="$(VTPROTOBUF_VERSION)"
DEEPCOPY_VERSION="$(DEEPCOPY_VERSION)"
TESTPKGS="$(TESTPKGS)"
COMPOSE_DOCKER_CLI_BUILD=1
DOCKER_BUILDKIT=1
GO_LDFLAGS="$(GO_LDFLAGS)"
docker compose -p talemu-cloud-provider --file ./hack/compose/docker-compose-provider.yml --file ./hack/compose/docker-compose-provider.override.yml up --build
---
kind: custom.Step
name: docker-compose-provider-down
spec:
makefile:
enabled: true
phony: true
variables:
- name: REMOVE_VOLUMES
defaultValue: false
script:
- >-
ARTIFACTS="$(ARTIFACTS)"
SHA="$(SHA)"
TAG="$(TAG)"
USERNAME="$(USERNAME)"
REGISTRY="$(REGISTRY)"
PROTOBUF_TS_VERSION="$(PROTOBUF_TS_VERSION)"
NODE_BUILD_ARGS="$(NODE_BUILD_ARGS)"
TOOLCHAIN="$(TOOLCHAIN)"
CGO_ENABLED="$(CGO_ENABLED)"
GO_BUILDFLAGS="$(GO_BUILDFLAGS)"
GOLANGCILINT_VERSION="$(GOLANGCILINT_VERSION)"
GOFUMPT_VERSION="$(GOFUMPT_VERSION)"
GOIMPORTS_VERSION="$(GOIMPORTS_VERSION)"
PROTOBUF_GO_VERSION="$(PROTOBUF_GO_VERSION)"
GRPC_GO_VERSION="$(GRPC_GO_VERSION)"
GRPC_GATEWAY_VERSION="$(GRPC_GATEWAY_VERSION)"
VTPROTOBUF_VERSION="$(VTPROTOBUF_VERSION)"
DEEPCOPY_VERSION="$(DEEPCOPY_VERSION)"
TESTPKGS="$(TESTPKGS)"
COMPOSE_DOCKER_CLI_BUILD=1
DOCKER_BUILDKIT=1
GO_LDFLAGS="$(GO_LDFLAGS)"
docker compose -p talemu-cloud-provider --file ./hack/compose/docker-compose-provider.yml --file ./hack/compose/docker-compose-provider.override.yml down --rmi local --remove-orphans --volumes=$(REMOVE_VOLUMES)
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-07-31T12:19:21Z by kres faf91e3.
# Generated on 2024-08-14T14:55:16Z by kres 7be2a05.

ARG TOOLCHAIN

Expand All @@ -11,7 +11,7 @@ FROM ghcr.io/siderolabs/ca-certificates:v1.7.0 AS image-ca-certificates
FROM ghcr.io/siderolabs/fhs:v1.7.0 AS image-fhs

# runs markdownlint
FROM docker.io/oven/bun:1.1.20-alpine AS lint-markdown
FROM docker.io/oven/bun:1.1.22-alpine AS lint-markdown
WORKDIR /src
RUN bun i [email protected] [email protected]
COPY .markdownlint.json .
Expand Down
24 changes: 16 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
#
# Generated on 2024-07-31T12:19:21Z by kres faf91e3.
# Generated on 2024-08-14T14:58:43Z by kres 7be2a05.

# common variables

Expand All @@ -18,14 +18,14 @@ REGISTRY ?= ghcr.io
USERNAME ?= siderolabs
REGISTRY_AND_USERNAME ?= $(REGISTRY)/$(USERNAME)
PROTOBUF_GO_VERSION ?= 1.34.2
GRPC_GO_VERSION ?= 1.4.0
GRPC_GATEWAY_VERSION ?= 2.20.0
GRPC_GO_VERSION ?= 1.5.1
GRPC_GATEWAY_VERSION ?= 2.21.0
VTPROTOBUF_VERSION ?= 0.6.0
GOIMPORTS_VERSION ?= 0.23.0
GOIMPORTS_VERSION ?= 0.24.0
DEEPCOPY_VERSION ?= v0.5.6
GOLANGCILINT_VERSION ?= v1.59.1
GOLANGCILINT_VERSION ?= v1.60.1
GOFUMPT_VERSION ?= v0.6.0
GO_VERSION ?= 1.22.5
GO_VERSION ?= 1.23.0
GO_BUILDFLAGS ?=
GO_LDFLAGS ?=
CGO_ENABLED ?= 0
Expand Down Expand Up @@ -67,7 +67,7 @@ COMMON_ARGS += --build-arg=DEEPCOPY_VERSION="$(DEEPCOPY_VERSION)"
COMMON_ARGS += --build-arg=GOLANGCILINT_VERSION="$(GOLANGCILINT_VERSION)"
COMMON_ARGS += --build-arg=GOFUMPT_VERSION="$(GOFUMPT_VERSION)"
COMMON_ARGS += --build-arg=TESTPKGS="$(TESTPKGS)"
TOOLCHAIN ?= docker.io/golang:1.22-alpine
TOOLCHAIN ?= docker.io/golang:1.23-alpine

# extra variables

Expand Down Expand Up @@ -135,7 +135,7 @@ else
GO_LDFLAGS += -s
endif

all: unit-tests talemu image-talemu talemu-cloud-provider image-talemu-cloud-provider docker-compose-up docker-compose-down lint
all: unit-tests talemu image-talemu talemu-cloud-provider image-talemu-cloud-provider docker-compose-up docker-compose-down docker-compose-provider-up docker-compose-provider-down lint

$(ARTIFACTS): ## Creates artifacts directory.
@mkdir -p $(ARTIFACTS)
Expand Down Expand Up @@ -225,6 +225,14 @@ docker-compose-up:
docker-compose-down:
ARTIFACTS="$(ARTIFACTS)" SHA="$(SHA)" TAG="$(TAG)" USERNAME="$(USERNAME)" REGISTRY="$(REGISTRY)" PROTOBUF_TS_VERSION="$(PROTOBUF_TS_VERSION)" NODE_BUILD_ARGS="$(NODE_BUILD_ARGS)" TOOLCHAIN="$(TOOLCHAIN)" CGO_ENABLED="$(CGO_ENABLED)" GO_BUILDFLAGS="$(GO_BUILDFLAGS)" GOLANGCILINT_VERSION="$(GOLANGCILINT_VERSION)" GOFUMPT_VERSION="$(GOFUMPT_VERSION)" GOIMPORTS_VERSION="$(GOIMPORTS_VERSION)" PROTOBUF_GO_VERSION="$(PROTOBUF_GO_VERSION)" GRPC_GO_VERSION="$(GRPC_GO_VERSION)" GRPC_GATEWAY_VERSION="$(GRPC_GATEWAY_VERSION)" VTPROTOBUF_VERSION="$(VTPROTOBUF_VERSION)" DEEPCOPY_VERSION="$(DEEPCOPY_VERSION)" TESTPKGS="$(TESTPKGS)" COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 GO_LDFLAGS="$(GO_LDFLAGS)" docker compose -p talemu --file ./hack/compose/docker-compose.yml --file ./hack/compose/docker-compose.override.yml down --rmi local --remove-orphans --volumes=$(REMOVE_VOLUMES)

.PHONY: docker-compose-provider-up
docker-compose-provider-up:
ARTIFACTS="$(ARTIFACTS)" SHA="$(SHA)" TAG="$(TAG)" USERNAME="$(USERNAME)" REGISTRY="$(REGISTRY)" PROTOBUF_TS_VERSION="$(PROTOBUF_TS_VERSION)" NODE_BUILD_ARGS="$(NODE_BUILD_ARGS)" TOOLCHAIN="$(TOOLCHAIN)" CGO_ENABLED="$(CGO_ENABLED)" GO_BUILDFLAGS="$(GO_BUILDFLAGS)" GOLANGCILINT_VERSION="$(GOLANGCILINT_VERSION)" GOFUMPT_VERSION="$(GOFUMPT_VERSION)" GOIMPORTS_VERSION="$(GOIMPORTS_VERSION)" PROTOBUF_GO_VERSION="$(PROTOBUF_GO_VERSION)" GRPC_GO_VERSION="$(GRPC_GO_VERSION)" GRPC_GATEWAY_VERSION="$(GRPC_GATEWAY_VERSION)" VTPROTOBUF_VERSION="$(VTPROTOBUF_VERSION)" DEEPCOPY_VERSION="$(DEEPCOPY_VERSION)" TESTPKGS="$(TESTPKGS)" COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 GO_LDFLAGS="$(GO_LDFLAGS)" docker compose -p talemu-cloud-provider --file ./hack/compose/docker-compose-provider.yml --file ./hack/compose/docker-compose-provider.override.yml up --build

.PHONY: docker-compose-provider-down
docker-compose-provider-down:
ARTIFACTS="$(ARTIFACTS)" SHA="$(SHA)" TAG="$(TAG)" USERNAME="$(USERNAME)" REGISTRY="$(REGISTRY)" PROTOBUF_TS_VERSION="$(PROTOBUF_TS_VERSION)" NODE_BUILD_ARGS="$(NODE_BUILD_ARGS)" TOOLCHAIN="$(TOOLCHAIN)" CGO_ENABLED="$(CGO_ENABLED)" GO_BUILDFLAGS="$(GO_BUILDFLAGS)" GOLANGCILINT_VERSION="$(GOLANGCILINT_VERSION)" GOFUMPT_VERSION="$(GOFUMPT_VERSION)" GOIMPORTS_VERSION="$(GOIMPORTS_VERSION)" PROTOBUF_GO_VERSION="$(PROTOBUF_GO_VERSION)" GRPC_GO_VERSION="$(GRPC_GO_VERSION)" GRPC_GATEWAY_VERSION="$(GRPC_GATEWAY_VERSION)" VTPROTOBUF_VERSION="$(VTPROTOBUF_VERSION)" DEEPCOPY_VERSION="$(DEEPCOPY_VERSION)" TESTPKGS="$(TESTPKGS)" COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 GO_LDFLAGS="$(GO_LDFLAGS)" docker compose -p talemu-cloud-provider --file ./hack/compose/docker-compose-provider.yml --file ./hack/compose/docker-compose-provider.override.yml down --rmi local --remove-orphans --volumes=$(REMOVE_VOLUMES)

.PHONY: rekres
rekres:
@docker pull $(KRES_IMAGE)
Expand Down
29 changes: 25 additions & 4 deletions cmd/talemu-cloud-provider/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

emuruntime "github.com/siderolabs/talemu/internal/pkg/emu"
"github.com/siderolabs/talemu/internal/pkg/kubefactory"
"github.com/siderolabs/talemu/internal/pkg/machine/network"
"github.com/siderolabs/talemu/internal/pkg/machine/runtime"
"github.com/siderolabs/talemu/internal/pkg/machine/runtime/resources/emu"
"github.com/siderolabs/talemu/internal/pkg/provider"
Expand All @@ -50,9 +51,19 @@ var rootCmd = &cobra.Command{
}

if cfg.createServiceAccount {
err = createServiceAccount(cmd.Context())
if err != nil {
return err
for {
err = createServiceAccount(cmd.Context())
if err == nil {
break
}

logger.Error("failed to create service account", zap.Error(err))

select {
case <-cmd.Context().Done():
return err
case <-time.After(time.Second * 5):
}
}
}

Expand Down Expand Up @@ -97,7 +108,15 @@ var rootCmd = &cobra.Command{
return err
}

if err = provider.RegisterControllers(runtime, kubernetes); err != nil {
nc := network.NewClient()

if err = nc.Run(cmd.Context()); err != nil {
return err
}

defer nc.Close() //nolint:errcheck

if err = provider.RegisterControllers(runtime, kubernetes, nc); err != nil {
return err
}

Expand All @@ -113,6 +132,8 @@ func createServiceAccount(ctx context.Context) error {
return err
}

defer rootClient.Close() //nolint:errcheck

name := access.CloudProviderServiceAccountPrefix + meta.ProviderID

sa := access.ParseServiceAccountFromName(name)
Expand Down
15 changes: 12 additions & 3 deletions cmd/talemu/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
emuruntime "github.com/siderolabs/talemu/internal/pkg/emu"
"github.com/siderolabs/talemu/internal/pkg/kubefactory"
"github.com/siderolabs/talemu/internal/pkg/machine"
"github.com/siderolabs/talemu/internal/pkg/machine/network"
"github.com/siderolabs/talemu/internal/pkg/machine/runtime"
"github.com/siderolabs/talemu/internal/pkg/machine/runtime/resources/emu"
)
Expand Down Expand Up @@ -81,17 +82,25 @@ var rootCmd = &cobra.Command{
return runtime.Run(ctx)
})

nc := network.NewClient()

if err = nc.Run(cmd.Context()); err != nil {
return err
}

defer nc.Close() //nolint:errcheck

for i := range cfg.machinesCount {
machine, err := machine.NewMachine(fmt.Sprintf("%04d1802-c798-4da7-a410-f09abb48c8d8", i+1000), logger, emulatorState)
m, err := machine.NewMachine(fmt.Sprintf("%04d1802-c798-4da7-a410-f09abb48c8d8", i+1000), logger, emulatorState)
if err != nil {
return err
}

eg.Go(func() error {
return machine.Run(ctx, params, i+1000, kubernetes)
return m.Run(ctx, params, i+1000, kubernetes, machine.WithNetworkClient(nc))
})

machines = append(machines, machine)
machines = append(machines, m)
}

var errors error
Expand Down
36 changes: 36 additions & 0 deletions hack/compose/docker-compose-provider.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version: '3.8'
services:
talemu-cloud-provider:
volumes:
- state:/_out/provider
container_name: talemu-cloud-provider
restart: on-failure
cap_add:
- NET_ADMIN
build:
target: image-talemu-cloud-provider
context: ../../
dockerfile: Dockerfile
args:
- ARTIFACTS=${ARTIFACTS:?error}
- SHA=${SHA:?error}
- TAG=${TAG:?error}
- USERNAME=${USERNAME:?error}
- REGISTRY=${REGISTRY:?error}
- NODE_BUILD_ARGS=${NODE_BUILD_ARGS}
- TOOLCHAIN=${TOOLCHAIN:?error}
- CGO_ENABLED=${CGO_ENABLED:?error}
- GO_BUILDFLAGS=${GO_BUILDFLAGS}
- GOLANGCILINT_VERSION=${GOLANGCILINT_VERSION:?error}
- GOFUMPT_VERSION=${GOFUMPT_VERSION:?error}
- GOIMPORTS_VERSION=${GOIMPORTS_VERSION:?error}
- PROTOBUF_GO_VERSION=${PROTOBUF_GO_VERSION:?error}
- GRPC_GO_VERSION=${GRPC_GO_VERSION:?error}
- GRPC_GATEWAY_VERSION=${GRPC_GATEWAY_VERSION:?error}
- VTPROTOBUF_VERSION=${VTPROTOBUF_VERSION:?error}
- DEEPCOPY_VERSION=${DEEPCOPY_VERSION:?error}
- TESTPKGS=${TESTPKGS:?error}
- GO_LDFLAGS=${GO_LDFLAGS}

volumes:
state:
17 changes: 16 additions & 1 deletion internal/pkg/emu/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package emu

import (
"context"
"time"

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/controller/runtime"
Expand Down Expand Up @@ -70,5 +71,19 @@ func (rt *Runtime) RegisterController(ctrl controller.Controller) error {
func (rt *Runtime) Run(ctx context.Context) error {
rt.logger.Info("starting global runtime")

return rt.runtime.Run(ctx)
for {
err := rt.runtime.Run(ctx)

if err == nil {
return nil
}

rt.logger.Error("global runtime crashed", zap.Error(err))

select {
case <-ctx.Done():
return err
case <-time.After(time.Second * 10):
}
}
}
Loading