From 8b9759f8071fdf89fb499ce3e3744be55594bcaa Mon Sep 17 00:00:00 2001 From: CMGS Date: Tue, 24 Aug 2021 14:22:25 +0800 Subject: [PATCH 1/6] revise selfmon context --- agent.go | 30 +++---------- engine/engine.go | 10 ++--- engine/health_check.go | 12 ++--- engine/health_check_test.go | 3 +- engine/helper.go | 8 ++-- engine/load.go | 6 ++- engine/load_test.go | 3 +- engine/monitor.go | 8 ++-- engine/stat.go | 13 +++--- engine/status/event.go | 16 +++---- go.mod | 4 +- go.sum | 1 - selfmon/selfmon.go | 87 +++++++++++++++---------------------- selfmon/selfmon_test.go | 8 ++-- types/config.go | 22 +++++++++- watcher/log.go | 4 +- 16 files changed, 113 insertions(+), 122 deletions(-) diff --git a/agent.go b/agent.go index c0cf540..d7510f6 100644 --- a/agent.go +++ b/agent.go @@ -1,10 +1,10 @@ package main import ( - "bufio" - "bytes" "fmt" "os" + "os/signal" + "syscall" _ "go.uber.org/automaxprocs" @@ -17,10 +17,8 @@ import ( "github.com/projecteru2/agent/watcher" "github.com/jinzhu/configor" - "github.com/sethvargo/go-signalcontext" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" - "gopkg.in/yaml.v3" ) func setupLogLevel(l string) error { @@ -40,25 +38,11 @@ func initConfig(c *cli.Context) *types.Config { log.Fatalf("[main] load config failed %v", err) } - config.PrepareConfig(c) - printConfig(config) + config.Prepare(c) + config.Print() return config } -func printConfig(c *types.Config) { - bs, err := yaml.Marshal(c) - if err != nil { - log.Fatalf("[main] print config failed %v", err) - } - - log.Info("---- current config ----") - scanner := bufio.NewScanner(bytes.NewBuffer(bs)) - for scanner.Scan() { - log.Info(scanner.Text()) - } - log.Info("------------------------") -} - func serve(c *cli.Context) error { if err := setupLogLevel(c.String("log-level")); err != nil { log.Fatal(err) @@ -69,16 +53,16 @@ func serve(c *cli.Context) error { defer os.Remove(config.PidFile) if c.Bool("selfmon") { - return selfmon.Monitor(config) + return selfmon.Monitor(c.Context, config) } - ctx, cancel := signalcontext.OnInterrupt() + ctx, cancel := signal.NotifyContext(c.Context, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) defer cancel() watcher.InitMonitor() go watcher.LogMonitor.Serve(ctx) - agent, err := engine.NewEngine(c.Context, config) + agent, err := engine.NewEngine(ctx, config) if err != nil { return err } diff --git a/engine/engine.go b/engine/engine.go index 46cc1e2..4af299c 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -67,7 +67,7 @@ func NewEngine(ctx context.Context, config *types.Config) (*Engine, error) { engine.store = store engine.docker = docker engine.node = node - engine.nodeIP = dockerengine.GetIP(context.TODO(), node.Endpoint) + engine.nodeIP = dockerengine.GetIP(ctx, node.Endpoint) if engine.nodeIP == "" { engine.nodeIP = common.LocalIP } @@ -98,7 +98,7 @@ func NewEngine(ctx context.Context, config *types.Config) (*Engine, error) { // either call this in a separated goroutine, or used in main to block main goroutine func (e *Engine) Run(ctx context.Context) error { // load container - if err := e.load(); err != nil { + if err := e.load(ctx); err != nil { return err } // start status watcher @@ -119,21 +119,21 @@ func (e *Engine) Run(ctx context.Context) error { log.Info("[Engine] Agent caught system signal, exiting") return nil case err := <-errChan: - if err := e.crash(); err != nil { + if err := e.crash(ctx); err != nil { log.Infof("[Engine] Mark node crash failed %v", err) } return err } } -func (e *Engine) crash() error { +func (e *Engine) crash(ctx context.Context) error { log.Info("[crash] mark all containers unhealthy") containers, err := e.listContainers(false, nil) if err != nil { return err } for _, c := range containers { - container, err := e.detectContainer(c.ID) + container, err := e.detectContainer(ctx, c.ID) if err != nil { return err } diff --git a/engine/health_check.go b/engine/health_check.go index 17302e8..04a1239 100644 --- a/engine/health_check.go +++ b/engine/health_check.go @@ -21,7 +21,7 @@ func (e *Engine) healthCheck(ctx context.Context) { for { select { case <-tick.C: - go e.checkAllContainers() + go e.checkAllContainers(ctx) case <-ctx.Done(): return } @@ -32,7 +32,7 @@ func (e *Engine) healthCheck(ctx context.Context) { // 这里需要 list all,原因是 monitor 检测到 die 的时候已经标记为 false 了 // 但是这时候 health check 刚返回 true 回来并写入 core // 为了保证最终数据一致性这里也要检测 -func (e *Engine) checkAllContainers() { +func (e *Engine) checkAllContainers(ctx context.Context) { log.Debug("[checkAllContainers] health check begin") containers, err := e.listContainers(true, nil) if err != nil { @@ -45,7 +45,7 @@ func (e *Engine) checkAllContainers() { // ContainerList 返回 enginetypes.Container // ContainerInspect 返回 enginetypes.ContainerJSON // 是不是有毛病啊, 不能返回一样的数据结构么我真是日了狗了... 艹他妹妹... - container, err := e.detectContainer(c.ID) + container, err := e.detectContainer(ctx, c.ID) if err != nil { log.Errorf("[checkAllContainers] detect container failed %v", err) continue @@ -115,9 +115,9 @@ func checkSingleContainerHealthy(container *types.Container, timeout time.Durati httpChecker = append(httpChecker, fmt.Sprintf("http://%s:%s%s", container.LocalIP, container.HealthCheck.HTTPPort, container.HealthCheck.HTTPURL)) } - id := coreutils.ShortID(container.ID) - f1 := checkHTTP(id, httpChecker, container.HealthCheck.HTTPCode, timeout) - f2 := checkTCP(id, tcpChecker, timeout) + ID := coreutils.ShortID(container.ID) + f1 := checkHTTP(ID, httpChecker, container.HealthCheck.HTTPCode, timeout) + f2 := checkTCP(ID, tcpChecker, timeout) return f1 && f2 } diff --git a/engine/health_check_test.go b/engine/health_check_test.go index c53a834..f3b536a 100644 --- a/engine/health_check_test.go +++ b/engine/health_check_test.go @@ -1,6 +1,7 @@ package engine import ( + "context" "net/http" "os" "testing" @@ -46,7 +47,7 @@ func TestCheckAllContainers(t *testing.T) { e := mockNewEngine() mockStore := e.store.(*mocks.Store) mockStore.On("SetContainerStatus", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - e.checkAllContainers() + e.checkAllContainers(context.TODO()) time.Sleep(1 * time.Second) } diff --git a/engine/helper.go b/engine/helper.go index e900f9d..b2c0407 100644 --- a/engine/helper.go +++ b/engine/helper.go @@ -70,18 +70,18 @@ func checkHostname(env []string, hostname string) bool { return false } -func (e *Engine) detectContainer(id string) (*types.Container, error) { +func (e *Engine) detectContainer(ctx context.Context, ID string) (*types.Container, error) { // 标准化为 inspect 的数据 - ctx, cancel := context.WithTimeout(context.Background(), e.config.GlobalConnectionTimeout) + ctx, cancel := context.WithTimeout(ctx, e.config.GlobalConnectionTimeout) defer cancel() - c, err := e.docker.ContainerInspect(ctx, id) + c, err := e.docker.ContainerInspect(ctx, ID) if err != nil { return nil, err } label := c.Config.Labels if _, ok := label[cluster.ERUMark]; !ok { - return nil, fmt.Errorf("not a eru container %s", coreutils.ShortID(id)) + return nil, fmt.Errorf("not a eru container %s", coreutils.ShortID(ID)) } // TODO should be removed in the future diff --git a/engine/load.go b/engine/load.go index 628d703..9cdd94b 100644 --- a/engine/load.go +++ b/engine/load.go @@ -1,11 +1,13 @@ package engine import ( + "context" + coreutils "github.com/projecteru2/core/utils" log "github.com/sirupsen/logrus" ) -func (e *Engine) load() error { +func (e *Engine) load(ctx context.Context) error { log.Info("[load] Load containers") containers, err := e.listContainers(true, nil) if err != nil { @@ -14,7 +16,7 @@ func (e *Engine) load() error { for _, container := range containers { log.Debugf("[load] detect container %s", coreutils.ShortID(container.ID)) - c, err := e.detectContainer(container.ID) + c, err := e.detectContainer(ctx, container.ID) if err != nil { log.Errorf("[load] detect container failed %v", err) continue diff --git a/engine/load_test.go b/engine/load_test.go index 2093752..0c7d7e6 100644 --- a/engine/load_test.go +++ b/engine/load_test.go @@ -1,6 +1,7 @@ package engine import ( + "context" "os" "testing" "time" @@ -22,7 +23,7 @@ func TestLoad(t *testing.T) { mockStore.On("UpdateNode", mock.Anything).Return(nil) mockStore.On("SetContainerStatus", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - err := e.load() + err := e.load(context.TODO()) assert.NoError(t, err) time.Sleep(1 * time.Second) } diff --git a/engine/monitor.go b/engine/monitor.go index 5890a86..380554f 100644 --- a/engine/monitor.go +++ b/engine/monitor.go @@ -30,9 +30,9 @@ func (e *Engine) monitor(eventChan <-chan eventtypes.Message) { eventHandler.Watch(eventChan) } -func (e *Engine) handleContainerStart(event eventtypes.Message) { +func (e *Engine) handleContainerStart(ctx context.Context, event eventtypes.Message) { log.Debugf("[handleContainerStart] container %s start", coreutils.ShortID(event.ID)) - container, err := e.detectContainer(event.ID) + container, err := e.detectContainer(ctx, event.ID) if err != nil { log.Errorf("[handleContainerStart] detect container failed %v", err) return @@ -53,9 +53,9 @@ func (e *Engine) handleContainerStart(event eventtypes.Message) { } } -func (e *Engine) handleContainerDie(event eventtypes.Message) { +func (e *Engine) handleContainerDie(ctx context.Context, event eventtypes.Message) { log.Debugf("[handleContainerDie] container %s die", coreutils.ShortID(event.ID)) - container, err := e.detectContainer(event.ID) + container, err := e.detectContainer(ctx, event.ID) if err != nil { log.Errorf("[handleContainerDie] detect container failed %v", err) } else if err := e.setContainerStatus(container); err != nil { diff --git a/engine/stat.go b/engine/stat.go index 2bec01c..5695e54 100644 --- a/engine/stat.go +++ b/engine/stat.go @@ -11,7 +11,7 @@ import ( log "github.com/sirupsen/logrus" ) -func (e *Engine) stat(parentCtx context.Context, container *types.Container) { +func (e *Engine) stat(ctx context.Context, container *types.Container) { // TODO // FIXME fuck internal pkg proc := "/proc" @@ -19,7 +19,7 @@ func (e *Engine) stat(parentCtx context.Context, container *types.Container) { proc = "/hostProc" } // init stats - containerCPUStats, systemCPUStats, containerNetStats, err := getStats(parentCtx, container, proc) + containerCPUStats, systemCPUStats, containerNetStats, err := getStats(ctx, container, proc) if err != nil { log.Errorf("[stat] get %s stats failed %v", coreutils.ShortID(container.ID), err) return @@ -43,14 +43,13 @@ func (e *Engine) stat(parentCtx context.Context, container *types.Container) { log.Infof("[stat] container %s %s metric report start", container.Name, coreutils.ShortID(container.ID)) updateMetrics := func() { - id := container.ID - container, err = e.detectContainer(id) + container, err = e.detectContainer(ctx, container.ID) if err != nil { - log.Errorf("[stat] can not refresh container meta %s", id) + log.Errorf("[stat] can not refresh container meta %s", container.ID) return } containerCPUCount := container.CPUNum * period - timeoutCtx, cancel := context.WithTimeout(parentCtx, timeout) + timeoutCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() newContainrCPUStats, newSystemCPUStats, newContainerNetStats, err := getStats(timeoutCtx, container, proc) if err != nil { @@ -131,7 +130,7 @@ func (e *Engine) stat(parentCtx context.Context, container *types.Container) { select { case <-tick.C: updateMetrics() - case <-parentCtx.Done(): + case <-ctx.Done(): mClient.Unregister() return } diff --git a/engine/status/event.go b/engine/status/event.go index e1ff3d8..d1a5a1e 100644 --- a/engine/status/event.go +++ b/engine/status/event.go @@ -1,6 +1,7 @@ package status import ( + "context" "sync" eventtypes "github.com/docker/docker/api/types/events" @@ -11,16 +12,16 @@ import ( // EventHandler define event handler type EventHandler struct { sync.Mutex - handlers map[string]func(eventtypes.Message) + handlers map[string]func(context.Context, eventtypes.Message) } // NewEventHandler new a event handler func NewEventHandler() *EventHandler { - return &EventHandler{handlers: make(map[string]func(eventtypes.Message))} + return &EventHandler{handlers: make(map[string]func(context.Context, eventtypes.Message))} } // Handle hand a event -func (e *EventHandler) Handle(action string, h func(eventtypes.Message)) { +func (e *EventHandler) Handle(action string, h func(context.Context, eventtypes.Message)) { e.Lock() defer e.Unlock() e.handlers[action] = h @@ -31,11 +32,10 @@ func (e *EventHandler) Watch(c <-chan eventtypes.Message) { for ev := range c { log.Infof("[Watch] Monitor: cid %s action %s", coreutils.ShortID(ev.ID), ev.Action) e.Lock() - h, exists := e.handlers[ev.Action] - e.Unlock() - if !exists { - continue + h := e.handlers[ev.Action] + if h != nil { + go h(context.TODO(), ev) } - go h(ev) + e.Unlock() } } diff --git a/go.mod b/go.mod index 7195cbf..26284be 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,6 @@ require ( github.com/pkg/errors v0.9.1 github.com/projecteru2/core v0.0.0-20210618045145-314d3a292929 github.com/prometheus/client_golang v1.11.0 - github.com/sethvargo/go-signalcontext v0.1.0 github.com/shirou/gopsutil v3.20.11+incompatible github.com/sirupsen/logrus v1.7.0 github.com/stretchr/objx v0.2.0 // indirect @@ -26,6 +25,7 @@ require ( go.opencensus.io v0.22.2 // indirect go.uber.org/automaxprocs v1.3.0 google.golang.org/grpc v1.38.0 - gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b + gopkg.in/yaml.v2 v2.4.0 + gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect gotest.tools/v3 v3.0.3 // indirect ) diff --git a/go.sum b/go.sum index 3c5e53a..e1848f1 100644 --- a/go.sum +++ b/go.sum @@ -462,7 +462,6 @@ github.com/schollz/closestmatch v2.1.0+incompatible/go.mod h1:RtP1ddjLong6gTkbtm github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= -github.com/sethvargo/go-signalcontext v0.1.0 h1:3IU7HOlmRXF0PSDf85C4nJ/zjYDjF+DS+LufcKfLvyk= github.com/sethvargo/go-signalcontext v0.1.0/go.mod h1:PXu9UmR2f7mmp8kEwgkKmaDbxq/PbqixkiC66WIkkWE= github.com/shirou/gopsutil v3.20.11+incompatible h1:LJr4ZQK4mPpIV5gOa4jCOKOGb4ty4DZO54I4FGqIpto= github.com/shirou/gopsutil v3.20.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= diff --git a/selfmon/selfmon.go b/selfmon/selfmon.go index 0bbc1d3..baa58d2 100644 --- a/selfmon/selfmon.go +++ b/selfmon/selfmon.go @@ -3,7 +3,6 @@ package selfmon import ( "context" "io" - "os" "os/signal" "sync" "syscall" @@ -40,7 +39,7 @@ type Selfmon struct { } // New . -func New(config *types.Config) (mon *Selfmon, err error) { +func New(ctx context.Context, config *types.Config) (mon *Selfmon, err error) { mon = &Selfmon{} mon.config = config mon.exit.C = make(chan struct{}, 1) @@ -48,7 +47,7 @@ func New(config *types.Config) (mon *Selfmon, err error) { return } - if mon.rpc, err = corestore.NewCoreRPCClientPool(context.TODO(), mon.config); err != nil { + if mon.rpc, err = corestore.NewCoreRPCClientPool(ctx, mon.config); err != nil { log.Errorf("[selfmon] no core rpc connection") return } @@ -74,8 +73,8 @@ func (m *Selfmon) Reload() error { } // Run . -func (m *Selfmon) Run() { - ctx, cancel := context.WithCancel(context.Background()) +func (m *Selfmon) Run(ctx context.Context) { + ctx, cancel := context.WithCancel(ctx) defer cancel() go m.watchNodeStatus(ctx) @@ -110,12 +109,8 @@ func (m *Selfmon) initNodeStatus(ctx context.Context) { Nodename: n.Name, Podname: n.Podname, } - if err != nil || status == nil { - fakeMessage.Alive = false - } else { - fakeMessage.Alive = status.Alive - } - m.dealNodeStatusMessage(fakeMessage) + fakeMessage.Alive = !(err != nil || status == nil) && status.Alive + m.dealNodeStatusMessage(ctx, fakeMessage) } } @@ -154,11 +149,11 @@ func (m *Selfmon) watch(ctx context.Context) error { log.Errorf("[selfmon] read node events failed %v", err) return err } - go m.dealNodeStatusMessage(message) + go m.dealNodeStatusMessage(ctx, message) } } -func (m *Selfmon) dealNodeStatusMessage(message *pb.NodeStatusStreamMessage) { +func (m *Selfmon) dealNodeStatusMessage(ctx context.Context, message *pb.NodeStatusStreamMessage) { if message.Error != "" { log.Errorf("[selfmon] deal with node status stream message failed %v", message.Error) return @@ -180,7 +175,7 @@ func (m *Selfmon) dealNodeStatusMessage(message *pb.NodeStatusStreamMessage) { } // TODO maybe we need a distributed lock to control concurrency - ctx, cancel := context.WithTimeout(context.Background(), m.config.GlobalConnectionTimeout) + ctx, cancel := context.WithTimeout(ctx, m.config.GlobalConnectionTimeout) defer cancel() if _, err := m.rpc.GetClient().SetNode(ctx, &pb.SetNodeOptions{ Nodename: message.Nodename, @@ -195,8 +190,8 @@ func (m *Selfmon) dealNodeStatusMessage(message *pb.NodeStatusStreamMessage) { } // Register . -func (m *Selfmon) Register() (func(), error) { - ctx, cancel := context.WithCancel(context.Background()) +func (m *Selfmon) Register(ctx context.Context) (func(), error) { + ctx, cancel := context.WithCancel(ctx) del := make(chan struct{}, 1) var wg sync.WaitGroup @@ -263,7 +258,7 @@ func (m *Selfmon) Register() (func(), error) { default: } - if ne, un, err := m.register(); err != nil { + if ne, un, err := m.register(ctx); err != nil { if !errors.Is(err, coretypes.ErrKeyExists) { log.Errorf("[Register] failed to re-register: %v", err) time.Sleep(time.Second) @@ -298,36 +293,35 @@ func (m *Selfmon) Register() (func(), error) { }, nil } -func (m *Selfmon) register() (<-chan struct{}, func(), error) { - ctx, cancel := context.WithTimeout(context.Background(), m.config.GlobalConnectionTimeout*2) +func (m *Selfmon) register(ctx context.Context) (<-chan struct{}, func(), error) { + ctx, cancel := context.WithTimeout(ctx, m.config.GlobalConnectionTimeout*2) defer cancel() return m.etcd.StartEphemeral(ctx, ActiveKey, time.Second*16) } // Monitor . -func Monitor(config *types.Config) error { - mon, err := New(config) +func Monitor(ctx context.Context, config *types.Config) error { + mon, err := New(ctx, config) if err != nil { return err } - unregister, err := mon.Register() + unregister, err := mon.Register(ctx) if err != nil { return err } defer unregister() var wg sync.WaitGroup - wg.Add(1) + wg.Add(2) go func() { defer wg.Done() - handleSignals(mon) + handleSignals(ctx, mon) }() - wg.Add(1) go func() { defer wg.Done() - mon.Run() + mon.Run(ctx) }() log.Infof("[selfmon] selfmon %p is running", mon) @@ -338,42 +332,33 @@ func Monitor(config *types.Config) error { } // handleSignals . -func handleSignals(mon *Selfmon) { +func handleSignals(ctx context.Context, mon *Selfmon) { + var reloadCtx context.Context + var cancel1 context.CancelFunc defer func() { log.Warnf("[selfmon] %p signals handler exit", mon) + cancel1() mon.Close() }() - sch := make(chan os.Signal, 1) - signal.Notify(sch, []os.Signal{ - syscall.SIGHUP, - syscall.SIGINT, - syscall.SIGTERM, - syscall.SIGQUIT, - syscall.SIGUSR2, - }...) + reloadCtx, cancel1 = signal.NotifyContext(ctx, syscall.SIGHUP, syscall.SIGUSR2) + exitCtx, cancel2 := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) + defer cancel2() for { select { - case sign := <-sch: - switch sign { - case syscall.SIGHUP, syscall.SIGUSR2: - log.Warnf("[selfmon] recv signal %d to reload", sign) - if err := mon.Reload(); err != nil { - log.Errorf("[selfmon] reload %p failed %v", mon, err) - } - - case syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT: - log.Warnf("[selfmon] recv signal %d to exit", sign) - return - - default: - log.Warnf("[selfmon] recv signal %d to ignore", sign) - } - case <-mon.Exit(): log.Warnf("[selfmon] recv from mon %p exit ch", mon) return + case <-exitCtx.Done(): + log.Warn("[selfmon] recv signal to exit") + return + case <-reloadCtx.Done(): + log.Warn("[selfmon] recv signal to reload") + if err := mon.Reload(); err != nil { + log.Errorf("[selfmon] reload %p failed %v", mon, err) + } + reloadCtx, cancel1 = signal.NotifyContext(ctx, syscall.SIGHUP, syscall.SIGUSR2) } } } diff --git a/selfmon/selfmon_test.go b/selfmon/selfmon_test.go index 01b7d2d..aa72f7c 100644 --- a/selfmon/selfmon_test.go +++ b/selfmon/selfmon_test.go @@ -1,6 +1,7 @@ package selfmon import ( + "context" "fmt" "sync" "testing" @@ -47,7 +48,7 @@ func TestRun(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - m.Run() + m.Run(context.TODO()) }() // Makes it as an active selfmon. @@ -61,12 +62,13 @@ func TestRun(t *testing.T) { func TestRegister(t *testing.T) { m, cancel := newTestSelfmon(t) defer cancel() + ctx := context.TODO() - unregister0, err := m.Register() + unregister0, err := m.Register(ctx) require.NoError(t, err) require.NotNil(t, unregister0) - unregister1, err := m.Register() + unregister1, err := m.Register(ctx) require.NoError(t, err) require.NotNil(t, unregister1) diff --git a/types/config.go b/types/config.go index 2770c1d..7aa75b4 100644 --- a/types/config.go +++ b/types/config.go @@ -1,12 +1,15 @@ package types import ( + "bufio" + "bytes" "os" "time" coretypes "github.com/projecteru2/core/types" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" + "gopkg.in/yaml.v2" ) // DockerConfig contain endpoint @@ -69,8 +72,8 @@ func (config *Config) GetHealthCheckStatusTTL() int64 { return int64(2*config.HealthCheck.Interval + config.HealthCheck.Interval/2) } -// PrepareConfig 从cli覆写并做准备 -func (config *Config) PrepareConfig(c *cli.Context) { +// Prepare 从cli覆写并做准备 +func (config *Config) Prepare(c *cli.Context) { if c.String("hostname") != "" { config.HostName = c.String("hostname") } else { @@ -140,3 +143,18 @@ func (config *Config) PrepareConfig(c *cli.Context) { config.HealthCheck.CacheTTL = 300 } } + +// Print config +func (config *Config) Print() { + bs, err := yaml.Marshal(config) + if err != nil { + log.Fatalf("[config] print config failed %v", err) + } + + log.Info("---- current config ----") + scanner := bufio.NewScanner(bytes.NewBuffer(bs)) + for scanner.Scan() { + log.Info(scanner.Text()) + } + log.Info("------------------------") +} diff --git a/watcher/log.go b/watcher/log.go index ddecc01..57c50ec 100644 --- a/watcher/log.go +++ b/watcher/log.go @@ -44,12 +44,12 @@ func (w *Watcher) Serve(ctx context.Context) { break } line := fmt.Sprintf("%X\r\n%s\r\n\r\n", len(data)+2, string(data)) - for id, consumer := range consumers { + for ID, consumer := range consumers { if _, err := consumer.Buf.WriteString(line); err != nil { logrus.Error(err) logrus.Infof("%s %s log detached", consumer.App, consumer.ID) consumer.Conn.Close() - delete(consumers, id) + delete(consumers, ID) if len(w.consumer[log.Name]) == 0 { delete(w.consumer, log.Name) } From 29862207259df3341a832f223ed1af2cbe90c992 Mon Sep 17 00:00:00 2001 From: DuodenumL Date: Wed, 8 Sep 2021 15:00:47 +0800 Subject: [PATCH 2/6] Abstract "runtime" --- runtime/docker/client.go | 30 +++ runtime/docker/container.go | 49 ++++ runtime/docker/docker.go | 330 +++++++++++++++++++++++++++ runtime/docker/helpers.go | 70 ++++++ runtime/docker/metrics.go | 383 ++++++++++++++++++++++++++++++++ runtime/docker/stat.go | 146 ++++++++++++ runtime/docker/stat_linux.go | 47 ++++ runtime/docker/stat_notlinux.go | 31 +++ runtime/mocks/Runtime.go | 197 ++++++++++++++++ runtime/mocks/template.go | 141 ++++++++++++ runtime/runtime.go | 21 ++ types/config.go | 16 +- types/config_test.go | 4 + types/container.go | 53 ----- types/kv.go | 7 + types/message.go | 9 + types/node.go | 18 ++ types/workload.go | 13 ++ utils/check.go | 81 +++++++ utils/check_test.go | 23 ++ utils/retry_test.go | 2 +- utils/utils.go | 96 ++++++++ utils/utils_test.go | 97 ++++++++ 23 files changed, 1809 insertions(+), 55 deletions(-) create mode 100644 runtime/docker/client.go create mode 100644 runtime/docker/container.go create mode 100644 runtime/docker/docker.go create mode 100644 runtime/docker/helpers.go create mode 100644 runtime/docker/metrics.go create mode 100644 runtime/docker/stat.go create mode 100644 runtime/docker/stat_linux.go create mode 100644 runtime/docker/stat_notlinux.go create mode 100644 runtime/mocks/Runtime.go create mode 100644 runtime/mocks/template.go create mode 100644 runtime/runtime.go delete mode 100644 types/container.go create mode 100644 types/kv.go create mode 100644 types/message.go create mode 100644 types/node.go create mode 100644 types/workload.go create mode 100644 utils/check.go create mode 100644 utils/check_test.go diff --git a/runtime/docker/client.go b/runtime/docker/client.go new file mode 100644 index 0000000..9826492 --- /dev/null +++ b/runtime/docker/client.go @@ -0,0 +1,30 @@ +package docker + +import ( + "sync" + + "github.com/projecteru2/agent/types" + + log "github.com/sirupsen/logrus" +) + +var ( + once sync.Once + client *Docker +) + +// InitClient init docker client +func InitClient(config *types.Config, nodeIP string) { + once.Do(func() { + var err error + client, err = New(config, nodeIP) + if err != nil { + log.Errorf("[GetDockerClient] failed to make docker client, err: %s", err) + } + }) +} + +// GetClient . +func GetClient() *Docker { + return client +} diff --git a/runtime/docker/container.go b/runtime/docker/container.go new file mode 100644 index 0000000..d567ee4 --- /dev/null +++ b/runtime/docker/container.go @@ -0,0 +1,49 @@ +package docker + +import ( + "context" + "fmt" + "time" + + "github.com/projecteru2/agent/utils" + coretypes "github.com/projecteru2/core/types" + coreutils "github.com/projecteru2/core/utils" +) + +// Container docker container +type Container struct { + coretypes.StatusMeta + Pid int + Name string + EntryPoint string + Ident string + CPUNum float64 + CPUQuota int64 + CPUPeriod int64 + Memory int64 + Labels map[string]string + Env map[string]string + HealthCheck *coretypes.HealthCheck + LocalIP string `json:"-"` +} + +// CheckHealth check container's health status +func (c *Container) CheckHealth(ctx context.Context, timeout time.Duration) bool { + if c.HealthCheck == nil { + return true + } + tcpChecker := []string{} + httpChecker := []string{} + + for _, port := range c.HealthCheck.TCPPorts { + tcpChecker = append(tcpChecker, fmt.Sprintf("%s:%s", c.LocalIP, port)) + } + if c.HealthCheck.HTTPPort != "" { + httpChecker = append(httpChecker, fmt.Sprintf("http://%s:%s%s", c.LocalIP, c.HealthCheck.HTTPPort, c.HealthCheck.HTTPURL)) + } + + ID := coreutils.ShortID(c.ID) + f1 := utils.CheckHTTP(ctx, ID, httpChecker, c.HealthCheck.HTTPCode, timeout) + f2 := utils.CheckTCP(ID, tcpChecker, timeout) + return f1 && f2 +} diff --git a/runtime/docker/docker.go b/runtime/docker/docker.go new file mode 100644 index 0000000..ba4b9aa --- /dev/null +++ b/runtime/docker/docker.go @@ -0,0 +1,330 @@ +package docker + +import ( + "context" + "encoding/json" + "fmt" + "io" + "math" + "net/http/httputil" + "os" + "time" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + "github.com/projecteru2/core/cluster" + coreutils "github.com/projecteru2/core/utils" + + enginetypes "github.com/docker/docker/api/types" + enginecontainer "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/events" + enginefilters "github.com/docker/docker/api/types/filters" + engineapi "github.com/docker/docker/client" + "github.com/docker/docker/pkg/stdcopy" + "github.com/docker/go-units" + "github.com/shirou/gopsutil/cpu" + "github.com/shirou/gopsutil/mem" + log "github.com/sirupsen/logrus" +) + +// Docker . +type Docker struct { + client *engineapi.Client + config *types.Config + + nodeIP string + cpuCore float64 // 因为到时候要乘以 float64 所以就直接转换成 float64 吧 + memory int64 + cas utils.GroupCAS + transfers *utils.HashBackends +} + +const ( + fieldPodName = "ERU_POD" + fieldNodeName = "ERU_NODE_NAME" + fieldStoreIdentifier = "eru.coreid" +) + +// New returns a wrapper of docker client +func New(config *types.Config, nodeIP string) (*Docker, error) { + d := &Docker{} + d.config = config + + var err error + d.client, err = utils.MakeDockerClient(config) + if err != nil { + log.Errorf("[NewDocker] failed to make docker client, err: %v", err) + return nil, err + } + + d.nodeIP = nodeIP + log.Infof("[NewDocker] Host IP %s", d.nodeIP) + + if utils.IsDockerized() { + os.Setenv("HOST_PROC", "/hostProc") + } + + cpus, err := cpu.Info() + if err != nil { + return nil, err + } + log.Infof("[NewDocker] Host has %d cpus", len(cpus)) + + memory, err := mem.VirtualMemory() + if err != nil { + return nil, err + } + log.Infof("[NewDocker] Host has %d memory", memory.Total) + + d.cpuCore = float64(len(cpus)) + d.memory = int64(memory.Total) + + d.transfers = utils.NewHashBackends(config.Metrics.Transfers) + + return d, nil +} + +func (d *Docker) getFilterArgs(filters []types.KV) enginefilters.Args { + f := enginefilters.NewArgs() + + for _, kv := range filters { + f.Add(kv.Key, kv.Value) + } + + return f +} + +// ListWorkloadIDs lists workload IDs filtered by given condition +func (d *Docker) ListWorkloadIDs(ctx context.Context, all bool, filters []types.KV) ([]string, error) { + f := d.getFilterArgs(filters) + opts := enginetypes.ContainerListOptions{Filters: f, All: all} + + var containers []enginetypes.Container + var err error + utils.WithTimeout(ctx, d.config.GlobalConnectionTimeout, func(ctx context.Context) { + containers, err = d.client.ContainerList(ctx, opts) + }) + if err != nil { + log.Errorf("[ListWorkloadIDs] failed to list workloads, err: %v", err) + return nil, err + } + + workloads := make([]string, 0, len(containers)) + for _, c := range containers { + workloads = append(workloads, c.ID) + } + return workloads, nil +} + +// AttachWorkload . +func (d *Docker) AttachWorkload(ctx context.Context, ID string) (io.Reader, io.Reader, error) { + resp, err := d.client.ContainerAttach(ctx, ID, enginetypes.ContainerAttachOptions{ + Stream: true, + Stdin: false, + Stdout: true, + Stderr: true, + }) + if err != nil && err != httputil.ErrPersistEOF { // nolint + log.Errorf("[AttachWorkload] failed to attach workload %v, err: %v", ID, err) + return nil, nil, err + } + + cap, _ := units.RAMInBytes("10M") + outr, outw := utils.NewBufPipe(cap) + errr, errw := utils.NewBufPipe(cap) + + go func() { + defer func() { + resp.Close() + outw.Close() + errw.Close() + }() + + _, err = stdcopy.StdCopy(outw, errw, resp.Reader) + if err != nil { + log.Errorf("[attach] attach get stream failed %s", err) + } + log.Infof("[attach] attach workload %s finished", coreutils.ShortID(ID)) + }() + + return outr, errr, nil +} + +// detectWorkload detect a container by ID +func (d *Docker) detectWorkload(ctx context.Context, ID string) (*Container, error) { + // 标准化为 inspect 的数据 + var c enginetypes.ContainerJSON + var err error + utils.WithTimeout(ctx, d.config.GlobalConnectionTimeout, func(ctx context.Context) { + c, err = d.client.ContainerInspect(ctx, ID) + }) + if err != nil { + return nil, err + } + label := c.Config.Labels + + if _, ok := label[cluster.ERUMark]; !ok { + return nil, fmt.Errorf("not a eru container %s", coreutils.ShortID(ID)) + } + + // TODO should be removed in the future + if d.config.CheckOnlyMine && !utils.UseLabelAsFilter() && !utils.CheckHostname(c.Config.Env, d.config.HostName) { + return nil, fmt.Errorf("should ignore this container") + } + + // 生成基准 meta + meta := coreutils.DecodeMetaInLabel(ctx, label) + + // 是否符合 eru pattern,如果一个容器又有 ERUMark 又是三段式的 name,那它就是个 ERU 容器 + container, err := generateContainerMeta(c, meta, label) + if err != nil { + return nil, err + } + // 计算容器用了多少 CPU + container = calcuateCPUNum(container, c, d.cpuCore) + if container.Memory == 0 || container.Memory == math.MaxInt64 { + container.Memory = d.memory + } + // 活着才有发布必要 + if c.NetworkSettings != nil && container.Running { + networks := map[string]string{} + for name, endpoint := range c.NetworkSettings.Networks { + networkmode := enginecontainer.NetworkMode(name) + if networkmode.IsHost() { + container.LocalIP = common.LocalIP + networks[name] = d.nodeIP + } else { + container.LocalIP = endpoint.IPAddress + networks[name] = endpoint.IPAddress + } + break + } + container.Networks = networks + } + + return container, nil +} + +// Events returns the events of workloads' changes +func (d *Docker) Events(ctx context.Context, filters []types.KV) (<-chan *types.WorkloadEventMessage, <-chan error) { + eventChan := make(chan *types.WorkloadEventMessage) + errChan := make(chan error) + + go func() { + defer close(eventChan) + defer close(errChan) + + f := d.getFilterArgs(filters) + f.Add("type", events.ContainerEventType) + options := enginetypes.EventsOptions{Filters: f} + m, e := d.client.Events(ctx, options) + for { + select { + case message := <-m: + eventChan <- &types.WorkloadEventMessage{ + ID: message.ID, + Type: message.Type, + Action: message.Action, + TimeNano: message.TimeNano, + } + case err := <-e: + errChan <- err + case <-ctx.Done(): + return + } + } + }() + + return eventChan, errChan +} + +// GetStatus check workload's status then returns workload status +func (d *Docker) GetStatus(ctx context.Context, ID string, checkHealth bool) (*types.WorkloadStatus, error) { + container, err := d.detectWorkload(ctx, ID) + if err != nil { + log.Errorf("[GetStatus] failed to detect workload, err: %v", err) + return nil, err + } + + bytes, err := json.Marshal(container.Labels) + if err != nil { + log.Errorf("[GetStatus] failed to marshal labels, err: %v", err) + return nil, err + } + + status := &types.WorkloadStatus{ + ID: container.ID, + Running: container.Running, + Networks: container.Networks, + Extension: bytes, + Appname: container.Name, + Nodename: d.config.HostName, + Entrypoint: container.Entrypoint, + Healthy: container.Running && container.HealthCheck == nil, + } + + // only check the running containers + if checkHealth && container.Running { + free, acquired := d.cas.Acquire(container.ID) + if !acquired { + return nil, fmt.Errorf("[GetStatus] failed to get the lock") + } + defer free() + status.Healthy = container.CheckHealth(ctx, time.Duration(d.config.HealthCheck.Timeout)*time.Second) + } + + return status, nil +} + +// GetWorkloadName returns the name of workload +func (d *Docker) GetWorkloadName(ctx context.Context, ID string) (string, error) { + var containerJSON enginetypes.ContainerJSON + var err error + utils.WithTimeout(ctx, d.config.GlobalConnectionTimeout, func(ctx context.Context) { + containerJSON, err = d.client.ContainerInspect(ctx, ID) + }) + if err != nil { + log.Errorf("[GetWorkloadName] failed to get container by id %v, err: %v", ID, err) + return "", err + } + + return containerJSON.Name, nil +} + +// LogFieldsExtra . +func (d *Docker) LogFieldsExtra(ctx context.Context, ID string) (map[string]string, error) { + container, err := d.detectWorkload(ctx, ID) + if err != nil { + log.Errorf("[LogFieldsExtra] failed to detect container, err: %v", err) + return nil, err + } + + extra := map[string]string{ + "podname": container.Env[fieldPodName], + "nodename": container.Env[fieldNodeName], + "coreid": container.Labels[fieldStoreIdentifier], + } + for name, addr := range container.Networks { + extra[fmt.Sprintf("networks_%s", name)] = addr + } + return extra, nil +} + +// IsDaemonRunning returns if the runtime daemon is running. +func (d *Docker) IsDaemonRunning(ctx context.Context) bool { + var err error + utils.WithTimeout(ctx, d.config.GlobalConnectionTimeout, func(ctx context.Context) { + _, err = d.client.Info(ctx) + }) + if err != nil { + log.Debugf("[IsDaemonRunning] connect to docker daemon failed, err: %v", err) + return false + } + return true +} + +// Name returns the name of runtime +func (d *Docker) Name() string { + return "docker" +} diff --git a/runtime/docker/helpers.go b/runtime/docker/helpers.go new file mode 100644 index 0000000..0a8bd7b --- /dev/null +++ b/runtime/docker/helpers.go @@ -0,0 +1,70 @@ +package docker + +import ( + "strings" + + "github.com/projecteru2/agent/utils" + coretypes "github.com/projecteru2/core/types" + + enginetypes "github.com/docker/docker/api/types" + log "github.com/sirupsen/logrus" +) + +func normalizeEnv(env []string) map[string]string { + em := make(map[string]string) + for _, e := range env { + ps := strings.SplitN(e, "=", 2) + if len(ps) == 2 { + em[ps[0]] = ps[1] + } else { + em[ps[0]] = "" + } + } + return em +} + +// generateContainerMeta make meta obj +func generateContainerMeta(c enginetypes.ContainerJSON, meta *coretypes.LabelMeta, labels map[string]string) (*Container, error) { + name, entrypoint, ident, err := utils.GetAppInfo(c.Name) + if err != nil { + return nil, err + } + + container := &Container{ + StatusMeta: coretypes.StatusMeta{ID: c.ID}, + Name: name, + EntryPoint: entrypoint, + Ident: ident, + Labels: labels, + Env: normalizeEnv(c.Config.Env), + HealthCheck: meta.HealthCheck, + CPUQuota: c.HostConfig.Resources.CPUQuota, + CPUPeriod: c.HostConfig.Resources.CPUPeriod, + Memory: utils.Max(c.HostConfig.Memory, c.HostConfig.MemoryReservation), + } + + if !c.State.Running || c.State.Pid == 0 { + container.Healthy = false + container.Running = false + } else { + // 第一次上的容器可能没有设置health check + // 那么我们认为这个容器一直是健康的, 并且不做检查 + // 需要告诉第一次上的时候这个容器是健康的, 还是不是 + container.Pid = c.State.Pid + container.Running = c.State.Running + container.Healthy = !(meta.HealthCheck != nil) + } + + log.Debugf("[generateContainerMeta] Generate container meta %v %v", container.Name, container.EntryPoint) + return container, nil +} + +// calcuateCPUNum calculate how many cpu container used +func calcuateCPUNum(container *Container, containerJSON enginetypes.ContainerJSON, hostCPUNum float64) *Container { + cpuNum := hostCPUNum + if containerJSON.HostConfig.CPUPeriod != 0 && containerJSON.HostConfig.CPUQuota != 0 { + cpuNum = float64(containerJSON.HostConfig.CPUQuota) / float64(containerJSON.HostConfig.CPUPeriod) + } + container.CPUNum = cpuNum + return container +} diff --git a/runtime/docker/metrics.go b/runtime/docker/metrics.go new file mode 100644 index 0000000..8bcea4e --- /dev/null +++ b/runtime/docker/metrics.go @@ -0,0 +1,383 @@ +package docker + +import ( + "fmt" + "strings" + "sync" + + "github.com/projecteru2/core/cluster" + coreutils "github.com/projecteru2/core/utils" + + statsdlib "github.com/CMGS/statsd" + "github.com/prometheus/client_golang/prometheus" + log "github.com/sirupsen/logrus" +) + +// MetricsClient combine statsd and prometheus +type MetricsClient struct { + statsd string + statsdClient *statsdlib.Client + prefix string + data map[string]float64 + + cpuHostUsage prometheus.Gauge + cpuHostSysUsage prometheus.Gauge + cpuHostUserUsage prometheus.Gauge + + cpuContainerUsage prometheus.Gauge + cpuContainerSysUsage prometheus.Gauge + cpuContainerUserUsage prometheus.Gauge + + memUsage prometheus.Gauge + memMaxUsage prometheus.Gauge + memRss prometheus.Gauge + memPercent prometheus.Gauge + memRSSPercent prometheus.Gauge + + bytesSent *prometheus.GaugeVec + bytesRecv *prometheus.GaugeVec + packetsSent *prometheus.GaugeVec + packetsRecv *prometheus.GaugeVec + errIn *prometheus.GaugeVec + errOut *prometheus.GaugeVec + dropIn *prometheus.GaugeVec + dropOut *prometheus.GaugeVec +} + +var clients sync.Map + +// NewMetricsClient new a metrics client +func NewMetricsClient(statsd, hostname string, container *Container) *MetricsClient { + if metricsClient, ok := clients.Load(container.ID); ok { + return metricsClient.(*MetricsClient) + } + + clables := []string{} + for k, v := range container.Labels { + if strings.HasPrefix(k, cluster.ERUMark) || strings.HasPrefix(k, cluster.LabelMeta) { + continue + } + clables = append(clables, fmt.Sprintf("%s=%s", k, v)) + } + labels := map[string]string{ + "containerID": container.ID, + "hostname": hostname, + "appname": container.Name, + "entrypoint": container.EntryPoint, + "orchestrator": cluster.ERUMark, + "labels": strings.Join(clables, ","), + } + + cpuHostUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cpu_host_usage", + Help: "cpu usage in host view.", + ConstLabels: labels, + }) + cpuHostSysUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cpu_host_sys_usage", + Help: "cpu sys usage in host view.", + ConstLabels: labels, + }) + cpuHostUserUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cpu_host_user_usage", + Help: "cpu user usage in host view.", + ConstLabels: labels, + }) + cpuContainerUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cpu_container_usage", + Help: "cpu usage in container view.", + ConstLabels: labels, + }) + cpuContainerSysUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cpu_container_sys_usage", + Help: "cpu sys usage in container view.", + ConstLabels: labels, + }) + cpuContainerUserUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "cpu_container_user_usage", + Help: "cpu user usage in container view.", + ConstLabels: labels, + }) + memUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "mem_usage", + Help: "memory usage.", + ConstLabels: labels, + }) + memMaxUsage := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "mem_max_usage", + Help: "memory max usage.", + ConstLabels: labels, + }) + memRss := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "mem_rss", + Help: "memory rss.", + ConstLabels: labels, + }) + memPercent := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "mem_percent", + Help: "memory percent.", + ConstLabels: labels, + }) + memRSSPercent := prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "mem_rss_percent", + Help: "memory rss percent.", + ConstLabels: labels, + }) + bytesSent := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "bytes_send", + Help: "bytes send.", + ConstLabels: labels, + }, []string{"nic"}) + bytesRecv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "bytes_recv", + Help: "bytes recv.", + ConstLabels: labels, + }, []string{"nic"}) + packetsSent := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "packets_send", + Help: "packets send.", + ConstLabels: labels, + }, []string{"nic"}) + packetsRecv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "packets_recv", + Help: "packets recv.", + ConstLabels: labels, + }, []string{"nic"}) + errIn := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "err_in", + Help: "err in.", + ConstLabels: labels, + }, []string{"nic"}) + errOut := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "err_out", + Help: "err out.", + ConstLabels: labels, + }, []string{"nic"}) + dropIn := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "drop_in", + Help: "drop in.", + ConstLabels: labels, + }, []string{"nic"}) + dropOut := prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "drop_out", + Help: "drop out.", + ConstLabels: labels, + }, []string{"nic"}) + + // TODO 这里已经没有了版本了 + tag := fmt.Sprintf("%s.%s", hostname, coreutils.ShortID(container.ID)) + endpoint := fmt.Sprintf("%s.%s", container.Name, container.EntryPoint) + prefix := fmt.Sprintf("%s.%s.%s", cluster.ERUMark, endpoint, tag) + + prometheus.MustRegister( + cpuHostSysUsage, cpuHostUsage, cpuHostUserUsage, + cpuContainerSysUsage, cpuContainerUsage, cpuContainerUserUsage, + memMaxUsage, memRss, memUsage, memPercent, memRSSPercent, + bytesRecv, bytesSent, packetsRecv, packetsSent, + errIn, errOut, dropIn, dropOut, + ) + + metricsClient := &MetricsClient{ + statsd: statsd, + prefix: prefix, + data: map[string]float64{}, + + cpuHostUsage: cpuHostUsage, + cpuHostSysUsage: cpuHostSysUsage, + cpuHostUserUsage: cpuHostUserUsage, + + cpuContainerUsage: cpuContainerUsage, + cpuContainerSysUsage: cpuContainerSysUsage, + cpuContainerUserUsage: cpuContainerUserUsage, + + memUsage: memUsage, + memMaxUsage: memMaxUsage, + memRss: memRss, + memPercent: memPercent, + memRSSPercent: memRSSPercent, + + bytesSent: bytesSent, + bytesRecv: bytesRecv, + packetsSent: packetsSent, + packetsRecv: packetsRecv, + errIn: errIn, + errOut: errOut, + dropIn: dropIn, + dropOut: dropOut, + } + clients.Store(container.ID, metricsClient) + return metricsClient +} + +// Unregister unlink all prometheus things +func (m *MetricsClient) Unregister() { + prometheus.Unregister(m.cpuHostSysUsage) + prometheus.Unregister(m.cpuHostUsage) + prometheus.Unregister(m.cpuHostUserUsage) + + prometheus.Unregister(m.cpuContainerUsage) + prometheus.Unregister(m.cpuContainerSysUsage) + prometheus.Unregister(m.cpuContainerUserUsage) + + prometheus.Unregister(m.memUsage) + prometheus.Unregister(m.memMaxUsage) + prometheus.Unregister(m.memRss) + prometheus.Unregister(m.memPercent) + prometheus.Unregister(m.memRSSPercent) + + prometheus.Unregister(m.bytesRecv) + prometheus.Unregister(m.bytesSent) + prometheus.Unregister(m.packetsRecv) + prometheus.Unregister(m.packetsSent) + prometheus.Unregister(m.errIn) + prometheus.Unregister(m.errOut) + prometheus.Unregister(m.dropIn) + prometheus.Unregister(m.dropOut) +} + +// CPUHostUsage set cpu usage in host view +func (m *MetricsClient) CPUHostUsage(i float64) { + m.data["cpu_host_usage"] = i + m.cpuHostUsage.Set(i) +} + +// CPUHostSysUsage set cpu sys usage in host view +func (m *MetricsClient) CPUHostSysUsage(i float64) { + m.data["cpu_host_sys_usage"] = i + m.cpuHostSysUsage.Set(i) +} + +// CPUHostUserUsage set cpu user usage in host view +func (m *MetricsClient) CPUHostUserUsage(i float64) { + m.data["cpu_host_user_usage"] = i + m.cpuHostUserUsage.Set(i) +} + +// CPUContainerUsage set cpu usage in container view +func (m *MetricsClient) CPUContainerUsage(i float64) { + m.data["cpu_container_usage"] = i + m.cpuContainerUsage.Set(i) +} + +// CPUContainerSysUsage set cpu sys usage in container view +func (m *MetricsClient) CPUContainerSysUsage(i float64) { + m.data["cpu_container_sys_usage"] = i + m.cpuContainerSysUsage.Set(i) +} + +// CPUContainerUserUsage set cpu user usage in container view +func (m *MetricsClient) CPUContainerUserUsage(i float64) { + m.data["cpu_container_user_usage"] = i + m.cpuContainerUserUsage.Set(i) +} + +// MemUsage set memory usage +func (m *MetricsClient) MemUsage(i float64) { + m.data["mem_usage"] = i + m.memUsage.Set(i) +} + +// MemMaxUsage set memory max usage +func (m *MetricsClient) MemMaxUsage(i float64) { + m.data["mem_max_usage"] = i + m.memMaxUsage.Set(i) +} + +// MemRss set memory rss +func (m *MetricsClient) MemRss(i float64) { + m.data["mem_rss"] = i + m.memRss.Set(i) +} + +// MemPercent set memory percent +func (m *MetricsClient) MemPercent(i float64) { + m.data["mem_percent"] = i + m.memPercent.Set(i) +} + +// MemRSSPercent set memory percent +func (m *MetricsClient) MemRSSPercent(i float64) { + m.data["mem_rss_percent"] = i + m.memRSSPercent.Set(i) +} + +// BytesSent set bytes send +func (m *MetricsClient) BytesSent(nic string, i float64) { + m.data[nic+".bytes.sent"] = i + m.bytesSent.WithLabelValues(nic).Set(i) +} + +// BytesRecv set bytes recv +func (m *MetricsClient) BytesRecv(nic string, i float64) { + m.data[nic+".bytes.recv"] = i + m.bytesRecv.WithLabelValues(nic).Set(i) +} + +// PacketsSent set packets send +func (m *MetricsClient) PacketsSent(nic string, i float64) { + m.data[nic+".packets.sent"] = i + m.packetsSent.WithLabelValues(nic).Set(i) +} + +// PacketsRecv set packets recv +func (m *MetricsClient) PacketsRecv(nic string, i float64) { + m.data[nic+".packets.recv"] = i + m.packetsRecv.WithLabelValues(nic).Set(i) +} + +// ErrIn set inbound err count +func (m *MetricsClient) ErrIn(nic string, i float64) { + m.data[nic+".err.in"] = i + m.errIn.WithLabelValues(nic).Set(i) +} + +// ErrOut set outbound err count +func (m *MetricsClient) ErrOut(nic string, i float64) { + m.data[nic+".err.out"] = i + m.errOut.WithLabelValues(nic).Set(i) +} + +// DropIn set inbound drop count +func (m *MetricsClient) DropIn(nic string, i float64) { + m.data[nic+".drop.in"] = i + m.dropIn.WithLabelValues(nic).Set(i) +} + +// DropOut set outbound drop count +func (m *MetricsClient) DropOut(nic string, i float64) { + m.data[nic+".drop.out"] = i + m.dropOut.WithLabelValues(nic).Set(i) +} + +// Lazy connecting +func (m *MetricsClient) checkConn() error { + if m.statsdClient != nil { + return nil + } + // We needn't try to renew/reconnect because of only supporting UDP protocol now + // We should add an `errorCount` to reconnect when implementing TCP protocol + var err error + if m.statsdClient, err = statsdlib.New(m.statsd, statsdlib.WithErrorHandler(func(err error) { + log.Errorf("[statsd] Sending statsd failed: %v", err) + })); err != nil { + log.Errorf("[statsd] Connect statsd failed: %v", err) + return err + } + return nil +} + +// Send to statsd +func (m *MetricsClient) Send() error { + if m.statsd == "" { + return nil + } + if err := m.checkConn(); err != nil { + return err + } + for k, v := range m.data { + key := fmt.Sprintf("%s.%s", m.prefix, k) + m.statsdClient.Gauge(key, v) + delete(m.data, k) + } + return nil +} diff --git a/runtime/docker/stat.go b/runtime/docker/stat.go new file mode 100644 index 0000000..f56748b --- /dev/null +++ b/runtime/docker/stat.go @@ -0,0 +1,146 @@ +package docker + +import ( + "context" + "strings" + "time" + + "github.com/projecteru2/agent/utils" + coreutils "github.com/projecteru2/core/utils" + + "github.com/shirou/gopsutil/net" + log "github.com/sirupsen/logrus" +) + +// CollectWorkloadMetrics . +func (d *Docker) CollectWorkloadMetrics(ctx context.Context, ID string) { + // TODO + // FIXME fuck internal pkg + proc := "/proc" + if utils.IsDockerized() { + proc = "/hostProc" + } + + container, err := d.detectWorkload(ctx, ID) + if err != nil { + log.Errorf("[CollectWorkloadMetrics] failed to detect container, err: %v", err) + } + + // init stats + containerCPUStats, systemCPUStats, containerNetStats, err := getStats(ctx, container.ID, container.Pid, proc) + if err != nil { + log.Errorf("[stat] get %s stats failed %v", coreutils.ShortID(container.ID), err) + return + } + + delta := float64(d.config.Metrics.Step) + timeout := time.Duration(d.config.Metrics.Step) * time.Second + tick := time.NewTicker(timeout) + defer tick.Stop() + hostname := strings.ReplaceAll(d.config.HostName, ".", "-") + addr := "" + if d.transfers.Len() > 0 { + addr = d.transfers.Get(container.ID, 0) + } + + period := float64(d.config.Metrics.Step) + hostCPUCount := d.cpuCore * period + + mClient := NewMetricsClient(addr, hostname, container) + defer log.Infof("[stat] container %s %s metric report stop", container.Name, coreutils.ShortID(container.ID)) + log.Infof("[stat] container %s %s metric report start", container.Name, coreutils.ShortID(container.ID)) + + updateMetrics := func() { + newContainer, err := d.detectWorkload(ctx, container.ID) + if err != nil { + log.Errorf("[stat] can not refresh container meta %s", container.ID) + return + } + containerCPUCount := newContainer.CPUNum * period + timeoutCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + newContainerCPUStats, newSystemCPUStats, newContainerNetStats, err := getStats(timeoutCtx, newContainer.ID, newContainer.Pid, proc) + if err != nil { + log.Errorf("[stat] get %s stats failed %v", coreutils.ShortID(newContainer.ID), err) + return + } + containerMemStats, err := getMemStats(timeoutCtx, newContainer.ID) + if err != nil { + log.Errorf("[stat] get %s mem stats failed %v", coreutils.ShortID(newContainer.ID), err) + return + } + + deltaContainerCPUUsage := newContainerCPUStats.Usage - containerCPUStats.Usage // CPU Usage in seconds + deltaContainerCPUSysUsage := newContainerCPUStats.System - containerCPUStats.System // Sys Usage in jiffies / tick + deltaContainerCPUUserUsage := newContainerCPUStats.User - containerCPUStats.User // User Usage in jiffies / tick + + deltaSystemCPUSysUsage := newSystemCPUStats.System - systemCPUStats.System + deltaSystemCPUUserUsage := newSystemCPUStats.User - systemCPUStats.User + + cpuHostUsage := deltaContainerCPUUsage / hostCPUCount + cpuHostSysUsage := 0.0 + if deltaSystemCPUSysUsage > 0 { + cpuHostSysUsage = deltaContainerCPUSysUsage / deltaSystemCPUSysUsage + } + cpuHostUserUsage := 0.0 + if deltaSystemCPUUserUsage > 0 { + cpuHostUserUsage = deltaContainerCPUUserUsage / deltaSystemCPUUserUsage + + } + mClient.CPUHostUsage(cpuHostUsage) + mClient.CPUHostSysUsage(cpuHostSysUsage) + mClient.CPUHostUserUsage(cpuHostUserUsage) + + cpuContainerUsage := deltaContainerCPUUsage / containerCPUCount // 实际消耗的 CPU 秒 / 允许消耗的 CPU 秒 + cpuContainerSysUsage := 0.0 + if deltaContainerCPUUsage > 0 { + cpuContainerSysUsage = deltaContainerCPUSysUsage / deltaContainerCPUUsage + } + cpuContainerUserUsage := 0.0 + if deltaContainerCPUUsage > 0 { + cpuContainerUserUsage = deltaContainerCPUUserUsage / deltaContainerCPUUsage + } + mClient.CPUContainerUsage(cpuContainerUsage) + mClient.CPUContainerSysUsage(cpuContainerSysUsage) + mClient.CPUContainerUserUsage(cpuContainerUserUsage) + + mClient.MemUsage(float64(containerMemStats.MemUsageInBytes)) + mClient.MemMaxUsage(float64(containerMemStats.MemMaxUsageInBytes)) + mClient.MemRss(float64(containerMemStats.RSS)) + if newContainer.Memory > 0 { + mClient.MemPercent(float64(containerMemStats.MemUsageInBytes) / float64(newContainer.Memory)) + mClient.MemRSSPercent(float64(containerMemStats.RSS) / float64(newContainer.Memory)) + } + nics := map[string]net.IOCountersStat{} + for _, nic := range containerNetStats { + nics[nic.Name] = nic + } + for _, nic := range newContainerNetStats { + if _, ok := nics[nic.Name]; !ok { + continue + } + oldNICStats := nics[nic.Name] + mClient.BytesSent(nic.Name, float64(nic.BytesSent-oldNICStats.BytesSent)/delta) + mClient.BytesRecv(nic.Name, float64(nic.BytesRecv-oldNICStats.BytesRecv)/delta) + mClient.PacketsSent(nic.Name, float64(nic.PacketsSent-oldNICStats.PacketsSent)/delta) + mClient.PacketsRecv(nic.Name, float64(nic.PacketsRecv-oldNICStats.PacketsRecv)/delta) + mClient.ErrIn(nic.Name, float64(nic.Errin-oldNICStats.Errin)/delta) + mClient.ErrOut(nic.Name, float64(nic.Errout-oldNICStats.Errout)/delta) + mClient.DropIn(nic.Name, float64(nic.Dropin-oldNICStats.Dropin)/delta) + mClient.DropOut(nic.Name, float64(nic.Dropout-oldNICStats.Dropout)/delta) + } + containerCPUStats, systemCPUStats, containerNetStats = newContainerCPUStats, newSystemCPUStats, newContainerNetStats + if err := mClient.Send(); err != nil { + log.Errorf("[stat] Send metrics failed %v", err) + } + } + for { + select { + case <-tick.C: + updateMetrics() + case <-ctx.Done(): + mClient.Unregister() + return + } + } +} diff --git a/runtime/docker/stat_linux.go b/runtime/docker/stat_linux.go new file mode 100644 index 0000000..ef5f99f --- /dev/null +++ b/runtime/docker/stat_linux.go @@ -0,0 +1,47 @@ +//go:build linux +// +build linux + +package docker + +import ( + "context" + "fmt" + + "github.com/shirou/gopsutil/cpu" + "github.com/shirou/gopsutil/docker" + "github.com/shirou/gopsutil/net" +) + +func getStats(ctx context.Context, ID string, pid int, proc string) (*docker.CgroupCPUStat, cpu.TimesStat, []net.IOCountersStat, error) { + // get container cpu stats + containerCPUStatsWithoutUsage, err := docker.CgroupCPUDockerWithContext(ctx, ID) + if err != nil { + return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err + } + containerCPUStatsUsage, err := docker.CgroupCPUDockerUsageWithContext(ctx, ID) + if err != nil { + return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err + } + containerCPUStats := &docker.CgroupCPUStat{ + TimesStat: *containerCPUStatsWithoutUsage, + Usage: containerCPUStatsUsage, + } + // get system cpu stats + systemCPUsStats, err := cpu.TimesWithContext(ctx, false) + if err != nil { + return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err + } + // 0 means all cpu + systemCPUStats := systemCPUsStats[0] + // get container nic stats + netFilePath := fmt.Sprintf("%s/%d/net/dev", proc, pid) + containerNetStats, err := net.IOCountersByFileWithContext(ctx, true, netFilePath) + if err != nil { + return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err + } + return containerCPUStats, systemCPUStats, containerNetStats, nil +} + +func getMemStats(ctx context.Context, ID string) (*docker.CgroupMemStat, error) { + return docker.CgroupMemDockerWithContext(ctx, ID) +} diff --git a/runtime/docker/stat_notlinux.go b/runtime/docker/stat_notlinux.go new file mode 100644 index 0000000..10ca4d2 --- /dev/null +++ b/runtime/docker/stat_notlinux.go @@ -0,0 +1,31 @@ +//go:build !linux +// +build !linux + +package docker + +import ( + "context" + + "github.com/shirou/gopsutil/cpu" + "github.com/shirou/gopsutil/docker" + "github.com/shirou/gopsutil/net" +) + +func getStats(ctx context.Context, ID string, pid int, proc string) (*docker.CgroupCPUStat, cpu.TimesStat, []net.IOCountersStat, error) { + containerCPUStats := &docker.CgroupCPUStat{ + TimesStat: cpu.TimesStat{}, + Usage: 0.0, + } + // get system cpu stats + systemCPUsStats, err := cpu.TimesWithContext(ctx, false) + if err != nil { + return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err + } + // 0 means all cpu + systemCPUStats := systemCPUsStats[0] + return containerCPUStats, systemCPUStats, []net.IOCountersStat{}, nil +} + +func getMemStats(ctx context.Context, ID string) (*docker.CgroupMemStat, error) { + return &docker.CgroupMemStat{}, nil +} diff --git a/runtime/mocks/Runtime.go b/runtime/mocks/Runtime.go new file mode 100644 index 0000000..b3d3241 --- /dev/null +++ b/runtime/mocks/Runtime.go @@ -0,0 +1,197 @@ +// Code generated by mockery 2.9.0. DO NOT EDIT. + +package mocks + +import ( + context "context" + io "io" + + mock "github.com/stretchr/testify/mock" + + types "github.com/projecteru2/agent/types" +) + +// Runtime is an autogenerated mock type for the Runtime type +type Runtime struct { + mock.Mock +} + +// AttachWorkload provides a mock function with given fields: ctx, ID +func (_m *Runtime) AttachWorkload(ctx context.Context, ID string) (io.Reader, io.Reader, error) { + ret := _m.Called(ctx, ID) + + var r0 io.Reader + if rf, ok := ret.Get(0).(func(context.Context, string) io.Reader); ok { + r0 = rf(ctx, ID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(io.Reader) + } + } + + var r1 io.Reader + if rf, ok := ret.Get(1).(func(context.Context, string) io.Reader); ok { + r1 = rf(ctx, ID) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(io.Reader) + } + } + + var r2 error + if rf, ok := ret.Get(2).(func(context.Context, string) error); ok { + r2 = rf(ctx, ID) + } else { + r2 = ret.Error(2) + } + + return r0, r1, r2 +} + +// CollectWorkloadMetrics provides a mock function with given fields: ctx, ID +func (_m *Runtime) CollectWorkloadMetrics(ctx context.Context, ID string) { + _m.Called(ctx, ID) +} + +// Events provides a mock function with given fields: ctx, filters +func (_m *Runtime) Events(ctx context.Context, filters []types.KV) (<-chan *types.WorkloadEventMessage, <-chan error) { + ret := _m.Called(ctx, filters) + + var r0 <-chan *types.WorkloadEventMessage + if rf, ok := ret.Get(0).(func(context.Context, []types.KV) <-chan *types.WorkloadEventMessage); ok { + r0 = rf(ctx, filters) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan *types.WorkloadEventMessage) + } + } + + var r1 <-chan error + if rf, ok := ret.Get(1).(func(context.Context, []types.KV) <-chan error); ok { + r1 = rf(ctx, filters) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(<-chan error) + } + } + + return r0, r1 +} + +// GetStatus provides a mock function with given fields: ctx, ID, checkHealth +func (_m *Runtime) GetStatus(ctx context.Context, ID string, checkHealth bool) (*types.WorkloadStatus, error) { + ret := _m.Called(ctx, ID, checkHealth) + + var r0 *types.WorkloadStatus + if rf, ok := ret.Get(0).(func(context.Context, string, bool) *types.WorkloadStatus); ok { + r0 = rf(ctx, ID, checkHealth) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*types.WorkloadStatus) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, string, bool) error); ok { + r1 = rf(ctx, ID, checkHealth) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetWorkloadName provides a mock function with given fields: ctx, ID +func (_m *Runtime) GetWorkloadName(ctx context.Context, ID string) (string, error) { + ret := _m.Called(ctx, ID) + + var r0 string + if rf, ok := ret.Get(0).(func(context.Context, string) string); ok { + r0 = rf(ctx, ID) + } else { + r0 = ret.Get(0).(string) + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, ID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// IsDaemonRunning provides a mock function with given fields: ctx +func (_m *Runtime) IsDaemonRunning(ctx context.Context) bool { + ret := _m.Called(ctx) + + var r0 bool + if rf, ok := ret.Get(0).(func(context.Context) bool); ok { + r0 = rf(ctx) + } else { + r0 = ret.Get(0).(bool) + } + + return r0 +} + +// ListWorkloadIDs provides a mock function with given fields: ctx, all, filters +func (_m *Runtime) ListWorkloadIDs(ctx context.Context, all bool, filters []types.KV) ([]string, error) { + ret := _m.Called(ctx, all, filters) + + var r0 []string + if rf, ok := ret.Get(0).(func(context.Context, bool, []types.KV) []string); ok { + r0 = rf(ctx, all, filters) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]string) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, bool, []types.KV) error); ok { + r1 = rf(ctx, all, filters) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// LogFieldsExtra provides a mock function with given fields: ctx, ID +func (_m *Runtime) LogFieldsExtra(ctx context.Context, ID string) (map[string]string, error) { + ret := _m.Called(ctx, ID) + + var r0 map[string]string + if rf, ok := ret.Get(0).(func(context.Context, string) map[string]string); ok { + r0 = rf(ctx, ID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(map[string]string) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, ID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// Name provides a mock function with given fields: +func (_m *Runtime) Name() string { + ret := _m.Called() + + var r0 string + if rf, ok := ret.Get(0).(func() string); ok { + r0 = rf() + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} diff --git a/runtime/mocks/template.go b/runtime/mocks/template.go new file mode 100644 index 0000000..b3d9f54 --- /dev/null +++ b/runtime/mocks/template.go @@ -0,0 +1,141 @@ +package mocks + +import ( + "context" + "strings" + "time" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/runtime" + "github.com/projecteru2/agent/types" + + "github.com/stretchr/testify/mock" +) + +// eva a fake workload +type eva struct { + ID string + Name string + EntryPoint string + Pid int + Running bool + Healthy bool +} + +// Nerv a fake runtime +type Nerv struct { + Runtime + workloads map[string]*eva + msgChan chan *types.WorkloadEventMessage + errChan chan error + daemonRunning bool +} + +func (n *Nerv) init() { + n.workloads = map[string]*eva{ + "Rei": { + ID: "Rei", + Name: "nerv_eva0_boiled", + EntryPoint: "eva0", + Pid: 12306, + Running: true, + Healthy: false, + }, + "Shinji": { + ID: "Shinji", + Name: "nerv_eva1_related", + EntryPoint: "eva1", + Pid: 12307, + Running: true, + Healthy: true, + }, + "Asuka": { + ID: "Asuka", + Name: "nerv_eva2_genius", + EntryPoint: "eva2", + Pid: 12308, + Running: false, // not yet + Healthy: false, + }, + } + + n.msgChan = make(chan *types.WorkloadEventMessage) + n.errChan = make(chan error) + n.daemonRunning = true +} + +// FromTemplate returns a mock runtime instance created from template +func FromTemplate() runtime.Runtime { + n := &Nerv{} + n.init() + n.On("AttachWorkload", mock.Anything, mock.Anything).Return(strings.NewReader("stdout\n"), strings.NewReader("stderr\n"), nil) + n.On("CollectWorkloadMetrics", mock.Anything, mock.Anything).Return() + n.On("ListWorkloadIDs", mock.Anything, mock.Anything, mock.Anything).Return(func(ctx context.Context, all bool, filters []types.KV) []string { + var IDs []string + for ID, workload := range n.workloads { + if all || workload.Running { + IDs = append(IDs, ID) + } + } + return IDs + }, nil) + n.On("Events", mock.Anything, mock.Anything).Return(func(ctx context.Context, filters []types.KV) <-chan *types.WorkloadEventMessage { + return n.msgChan + }, func(ctx context.Context, filters []types.KV) <-chan error { + return n.errChan + }) + n.On("GetStatus", mock.Anything, mock.Anything, mock.Anything).Return(func(ctx context.Context, ID string, checkHealth bool) *types.WorkloadStatus { + workload := n.workloads[ID] + return &types.WorkloadStatus{ + ID: workload.ID, + Running: workload.Running, + Healthy: workload.Healthy, + } + }, nil) + n.On("GetWorkloadName", mock.Anything, mock.Anything).Return(func(ctx context.Context, ID string) string { + return n.workloads[ID].Name + }, nil) + n.On("LogFieldsExtra", mock.Anything, mock.Anything).Return(map[string]string{}, nil) + n.On("IsDaemonRunning", mock.Anything).Return(func(ctx context.Context) bool { + return n.daemonRunning + }) + n.On("Name").Return("NERV") + + return n +} + +// StartEvents starts the events: Shinji 400%, Asuka starts, Asuka dies, Rei dies +func (n *Nerv) StartEvents() { + n.msgChan <- &types.WorkloadEventMessage{ + ID: "Shinji", + Action: "400%", + } + + n.workloads["Asuka"].Running = true + n.workloads["Asuka"].Healthy = true + n.msgChan <- &types.WorkloadEventMessage{ + ID: "Asuka", + Action: common.StatusStart, + } + time.Sleep(time.Second) + + n.workloads["Asuka"].Running = false + n.workloads["Asuka"].Healthy = false + n.msgChan <- &types.WorkloadEventMessage{ + ID: "Asuka", + Action: common.StatusDie, + } + time.Sleep(time.Second) + + n.workloads["Rei"].Running = false + n.workloads["Rei"].Healthy = false + n.msgChan <- &types.WorkloadEventMessage{ + ID: "Rei", + Action: common.StatusDie, + } +} + +// SetDaemonRunning set `daemonRunning` +func (n *Nerv) SetDaemonRunning(status bool) { + n.daemonRunning = status +} diff --git a/runtime/runtime.go b/runtime/runtime.go new file mode 100644 index 0000000..8ae9ea7 --- /dev/null +++ b/runtime/runtime.go @@ -0,0 +1,21 @@ +package runtime + +import ( + "context" + "io" + + "github.com/projecteru2/agent/types" +) + +// Runtime provides runtime-related functions +type Runtime interface { + AttachWorkload(ctx context.Context, ID string) (io.Reader, io.Reader, error) + CollectWorkloadMetrics(ctx context.Context, ID string) + ListWorkloadIDs(ctx context.Context, all bool, filters []types.KV) ([]string, error) + Events(ctx context.Context, filters []types.KV) (<-chan *types.WorkloadEventMessage, <-chan error) + GetStatus(ctx context.Context, ID string, checkHealth bool) (*types.WorkloadStatus, error) + GetWorkloadName(ctx context.Context, ID string) (string, error) + LogFieldsExtra(ctx context.Context, ID string) (map[string]string, error) + IsDaemonRunning(ctx context.Context) bool + Name() string +} diff --git a/types/config.go b/types/config.go index 7aa75b4..42ed778 100644 --- a/types/config.go +++ b/types/config.go @@ -7,6 +7,7 @@ import ( "time" coretypes "github.com/projecteru2/core/types" + log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" "gopkg.in/yaml.v2" @@ -51,10 +52,14 @@ type Config struct { CheckOnlyMine bool `yaml:"check_only_mine" default:"false"` + Store string `yaml:"store" default:"grpc"` + Runtime string `yaml:"runtime" default:"docker"` + KV string `yaml:"kv" default:"etcd"` + Auth coretypes.AuthConfig `yaml:"auth"` Docker DockerConfig Metrics MetricsConfig - API APIConfig + API APIConfig `yaml:"api"` Log LogConfig HealthCheck HealthCheckConfig `yaml:"healthcheck"` Etcd coretypes.EtcdConfig `yaml:"etcd"` @@ -129,6 +134,15 @@ func (config *Config) Prepare(c *cli.Context) { if c.Bool("check-only-mine") { config.CheckOnlyMine = true } + if c.String("runtime") != "" { + config.Runtime = c.String("runtime") + } + if c.String("store") != "" { + config.Store = c.String("store") + } + if c.String("kv") != "" { + config.KV = c.String("kv") + } // validate if config.PidFile == "" { log.Fatal("need to set pidfile") diff --git a/types/config_test.go b/types/config_test.go index 47f122e..dfe909d 100644 --- a/types/config_test.go +++ b/types/config_test.go @@ -23,4 +23,8 @@ func TestLoadConfig(t *testing.T) { assert.Equal(config.HealthCheck.CacheTTL, 300) assert.False(config.HealthCheck.EnableSelfmon) assert.Equal(config.GetHealthCheckStatusTTL(), int64(300)) + + assert.Equal(config.Store, "grpc") + assert.Equal(config.Runtime, "docker") + assert.Equal(config.KV, "etcd") } diff --git a/types/container.go b/types/container.go deleted file mode 100644 index 8376ce4..0000000 --- a/types/container.go +++ /dev/null @@ -1,53 +0,0 @@ -package types - -import ( - "fmt" - - coretypes "github.com/projecteru2/core/types" -) - -const ( - fieldPodname = "ERU_POD" - fieldNodename = "ERU_NODE_NAME" - fieldCoreIdentifier = "eru.coreid" -) - -// Container define agent view container -type Container struct { - coretypes.StatusMeta - Pid int - Name string - EntryPoint string - Ident string - CPUNum float64 - CPUQuota int64 - CPUPeriod int64 - Memory int64 - Labels map[string]string - Env map[string]string - HealthCheck *coretypes.HealthCheck - LocalIP string `json:"-"` -} - -// LogFieldExtra returns the extra field of log line -// currently it contains podname, nodename, coreid, and networks -// which user can know where this container is -// a sample: -// { -// "podname": "testpod", -// "nodename": "testnode", -// "coreid": "b60d121b438a380c343d5ec3c2037564b82ffef3", -// "networks_test_calico_pool1": "10.243.122.1", -// "networks_test_calico_pool2": "10.233.0.1", -// } -func (c *Container) LogFieldExtra() map[string]string { - extra := map[string]string{ - "podname": c.Env[fieldPodname], - "nodename": c.Env[fieldNodename], - "coreid": c.Labels[fieldCoreIdentifier], - } - for name, addr := range c.Networks { - extra[fmt.Sprintf("networks_%s", name)] = addr - } - return extra -} diff --git a/types/kv.go b/types/kv.go new file mode 100644 index 0000000..eeb225e --- /dev/null +++ b/types/kv.go @@ -0,0 +1,7 @@ +package types + +// KV key value pair +type KV struct { + Key string + Value string +} diff --git a/types/message.go b/types/message.go new file mode 100644 index 0000000..e52b09d --- /dev/null +++ b/types/message.go @@ -0,0 +1,9 @@ +package types + +// WorkloadEventMessage . +type WorkloadEventMessage struct { + ID string + Type string + Action string + TimeNano int64 +} diff --git a/types/node.go b/types/node.go new file mode 100644 index 0000000..f0cbd65 --- /dev/null +++ b/types/node.go @@ -0,0 +1,18 @@ +package types + +// Node . +type Node struct { + Name string + Endpoint string + Podname string + Labels map[string]string + Available bool +} + +// NodeStatus . +type NodeStatus struct { + Nodename string + Podname string + Alive bool + Error error +} diff --git a/types/workload.go b/types/workload.go new file mode 100644 index 0000000..a16ff67 --- /dev/null +++ b/types/workload.go @@ -0,0 +1,13 @@ +package types + +// WorkloadStatus . +type WorkloadStatus struct { + ID string + Running bool + Healthy bool + Networks map[string]string + Extension []byte + Appname string + Nodename string + Entrypoint string +} diff --git a/utils/check.go b/utils/check.go new file mode 100644 index 0000000..54790cf --- /dev/null +++ b/utils/check.go @@ -0,0 +1,81 @@ +package utils + +import ( + "context" + "net" + "net/http" + "time" + + log "github.com/sirupsen/logrus" +) + +// CheckHTTP 检查一个workload的所有URL +// CheckHTTP 事实上一般也就一个 +func CheckHTTP(ctx context.Context, ID string, backends []string, code int, timeout time.Duration) bool { + for _, backend := range backends { + log.Debugf("[checkHTTP] Check health via http: container %s, url %s, expect code %d", ID, backend, code) + if !checkOneURL(ctx, backend, code, timeout) { + log.Infof("[checkHTTP] Check health failed via http: container %s, url %s, expect code %d", ID, backend, code) + return false + } + } + return true +} + +// CheckTCP 检查一个TCP +// 这里不支持ctx? +func CheckTCP(ID string, backends []string, timeout time.Duration) bool { + for _, backend := range backends { + log.Debugf("[checkTCP] Check health via tcp: container %s, backend %s", ID, backend) + conn, err := net.DialTimeout("tcp", backend, timeout) + if err != nil { + return false + } + conn.Close() + } + return true +} + +// 偷来的函数 +// 谁要官方的context没有收录他 ¬ ¬ +func get(ctx context.Context, client *http.Client, url string) (*http.Response, error) { + if client == nil { + client = http.DefaultClient + } + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, err + } + + resp, err := client.Do(req.WithContext(ctx)) + if err != nil { + select { + case <-ctx.Done(): + err = ctx.Err() + default: + } + } + return resp, err +} + +// 就先定义 [200, 500) 这个区间的 code 都算是成功吧 +func checkOneURL(ctx context.Context, url string, expectedCode int, timeout time.Duration) bool { + var resp *http.Response + var err error + WithTimeout(ctx, timeout, func(ctx context.Context) { + resp, err = get(ctx, nil, url) // nolint + }) + if err != nil { + log.Warnf("[checkOneURL] Error when checking %s, %s", url, err.Error()) + return false + } + defer resp.Body.Close() + if expectedCode == 0 { + return resp.StatusCode < 500 && resp.StatusCode >= 200 + } + if resp.StatusCode != expectedCode { + log.Infof("[checkOneURL] Error when checking %s, expect %d, got %d", url, expectedCode, resp.StatusCode) + } + return resp.StatusCode == expectedCode +} diff --git a/utils/check_test.go b/utils/check_test.go new file mode 100644 index 0000000..8bfab4e --- /dev/null +++ b/utils/check_test.go @@ -0,0 +1,23 @@ +package utils + +import ( + "context" + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestCheck(t *testing.T) { + go http.ListenAndServe(":12306", http.NotFoundHandler()) + time.Sleep(time.Second) + ctx := context.Background() + assert.Equal(t, CheckHTTP(ctx, "", []string{"http://127.0.0.1:12306"}, 404, time.Second), true) + assert.Equal(t, CheckHTTP(ctx, "", []string{"http://127.0.0.1:12306"}, 0, time.Second), true) + assert.Equal(t, CheckHTTP(ctx, "", []string{"http://127.0.0.1:12306"}, 200, time.Second), false) + assert.Equal(t, CheckHTTP(ctx, "", []string{"http://127.0.0.1:12307"}, 200, time.Second), false) + + assert.Equal(t, CheckTCP("", []string{"127.0.0.1:12306"}, time.Second), true) + assert.Equal(t, CheckTCP("", []string{"127.0.0.1:12307"}, time.Second), false) +} diff --git a/utils/retry_test.go b/utils/retry_test.go index ec69c46..2761b9c 100644 --- a/utils/retry_test.go +++ b/utils/retry_test.go @@ -18,5 +18,5 @@ func TestBackoffRetry(t *testing.T) { return nil } assert.Nil(t, BackoffRetry(context.Background(), 10, f)) - assert.EqualValues(t, 4, i) + assert.Equal(t, 4, i) } diff --git a/utils/utils.go b/utils/utils.go index dd5c881..3d050c4 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -1,10 +1,18 @@ package utils import ( + "context" "fmt" "io/ioutil" + "math" + "net/url" "os" "strconv" + "strings" + "sync" + "time" + "unicode" + "unicode/utf8" "github.com/projecteru2/agent/common" "github.com/projecteru2/agent/types" @@ -40,3 +48,91 @@ func Max(a, b int64) int64 { } return b } + +// UseLabelAsFilter return if use label as filter +func UseLabelAsFilter() bool { + return os.Getenv("ERU_AGENT_EXPERIMENTAL_FILTER") == "label" +} + +// CheckHostname check if ERU_NODE_NAME env in container is the hostname of this agent +// TODO should be removed in the future, should always use label to filter +func CheckHostname(env []string, hostname string) bool { + for _, e := range env { + ps := strings.SplitN(e, "=", 2) + if len(ps) != 2 { + continue + } + if ps[0] == "ERU_NODE_NAME" && ps[1] == hostname { + return true + } + } + return false +} + +// GetMaxAttemptsByTTL . +func GetMaxAttemptsByTTL(ttl int64) int { + // if selfmon is enabled, retry 5 times + if ttl < 1 { + return 5 + } + return int(math.Floor(math.Log2(float64(ttl)+1))) + 1 +} + +// ReplaceNonUtf8 replaces non-utf8 characters in \x format. +func ReplaceNonUtf8(str string) string { + if str == "" { + return str + } + + // deal with "legal" error rune in utf8 + if strings.ContainsRune(str, utf8.RuneError) { + str = strings.ReplaceAll(str, string(utf8.RuneError), "\\xff\\xfd") + } + + if utf8.ValidString(str) { + return str + } + + v := make([]rune, 0, len(str)) + for i, r := range str { + switch { + case r == utf8.RuneError: + _, size := utf8.DecodeRuneInString(str[i:]) + if size > 0 { + v = append(v, []rune(fmt.Sprintf("\\x%02x", str[i:i+size]))...) + } + case unicode.IsControl(r) && r != '\r' && r != '\n': + v = append(v, []rune(fmt.Sprintf("\\x%02x", r))...) + default: + v = append(v, r) + } + } + return string(v) +} + +var dockerized bool +var once sync.Once + +// IsDockerized returns if the agent is running in docker +func IsDockerized() bool { + once.Do(func() { + dockerized = os.Getenv(common.DOCKERIZED) != "" + }) + return dockerized +} + +// WithTimeout runs a function with given timeout +func WithTimeout(ctx context.Context, timeout time.Duration, f func(ctx2 context.Context)) { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + f(ctx) +} + +// GetIP Get hostIP +func GetIP(daemonHost string) string { + u, err := url.Parse(daemonHost) + if err != nil { + return "" + } + return u.Hostname() +} diff --git a/utils/utils_test.go b/utils/utils_test.go index ed9ea34..6ed3e94 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -1,6 +1,8 @@ package utils import ( + "context" + "fmt" "io" "io/ioutil" "math" @@ -107,3 +109,98 @@ func BenchmarkReadOnly(b *testing.B) { } } } + +func TestReplaceNonUtf8(t *testing.T) { + str := "test, 1\x00\xff\x01\xbb\xfd\xff\xfd\n" + assert.Equal(t, "test, 1\\x00\\xff\\x01\\xbb\\xfd\\xff\\xfd\n", ReplaceNonUtf8(str)) + + data := []byte{ + 0x7b, 0x0a, 0x20, 0x20, 0x22, 0x41, 0x44, 0x44, 0x52, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x30, 0x2e, + 0x31, 0x36, 0x38, 0x2e, 0x33, 0x39, 0x2e, 0x31, 0x39, 0x3a, 0x31, 0x30, 0x30, 0x30, 0x31, 0x22, + 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x43, 0x4d, 0x44, 0x22, 0x3a, 0x20, 0x22, 0x53, 0x45, 0x54, 0x20, + 0x74, 0x63, 0x69, 0x72, 0x5f, 0x53, 0x47, 0x5f, 0x5f, 0x34, 0x37, 0x31, 0x35, 0x5f, 0x70, 0x69, + 0x64, 0x20, 0x5c, 0x74, 0x2c, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x4e, 0xef, + 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x3f, 0x5c, 0x75, 0x30, 0x30, 0x31, 0x31, 0x36, 0x5c, 0x75, 0x30, + 0x30, 0x30, 0x37, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x5c, 0x75, 0x30, 0x30, + 0x31, 0x32, 0xef, 0xbf, 0xbd, 0x3f, 0x5c, 0x75, 0x30, 0x30, 0x31, 0x39, 0x41, 0x5c, 0x75, 0x30, + 0x30, 0x31, 0x37, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x55, 0x5b, 0xef, 0xbf, 0xbd, 0x3f, 0x21, + 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x48, 0xef, 0xbf, 0xbd, 0x27, 0x24, 0xef, 0xbf, 0xbd, 0x3f, + 0x28, 0xef, 0xbf, 0xbd, 0x3f, 0x7e, 0x48, 0xef, 0xbf, 0xbd, 0x5c, 0x75, 0x30, 0x30, 0x30, 0x36, + 0x30, 0xef, 0xbf, 0xbd, 0x29, 0x50, 0x3f, 0x7e, 0x40, 0x5c, 0x75, 0x30, 0x30, 0x30, 0x33, 0x20, + 0x45, 0x58, 0x20, 0x36, 0x30, 0x34, 0x38, 0x30, 0x30, 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x44, + 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x32, 0x31, 0x39, 0x30, + 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x49, 0x44, 0x22, 0x3a, 0x20, 0x22, 0x63, 0x6c, 0x75, 0x73, + 0x74, 0x65, 0x72, 0x3a, 0x68, 0x61, 0x6e, 0x71, 0x69, 0x61, 0x6e, 0x67, 0x2e, 0x74, 0x65, 0x73, + 0x74, 0x2e, 0x73, 0x67, 0x37, 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x4a, 0x52, 0x5f, 0x54, 0x49, + 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x36, 0x32, 0x37, 0x35, + 0x33, 0x30, 0x33, 0x36, 0x37, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x22, 0x2c, 0x0a, 0x20, 0x20, + 0x22, 0x50, 0x41, 0x43, 0x4b, 0x41, 0x47, 0x45, 0x22, 0x3a, 0x20, 0x22, 0x73, 0x6c, 0x6f, 0x77, + 0x6c, 0x6f, 0x67, 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x54, 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, + 0x4d, 0x50, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x36, 0x32, 0x37, 0x35, 0x33, 0x30, 0x33, 0x36, 0x37, + 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x54, 0x59, 0x50, 0x45, 0x22, 0x3a, 0x20, 0x22, 0x70, 0x72, + 0x6f, 0x78, 0x79, 0x22, 0x0a, 0x7d, 0x0a, + } + fmt.Println(ReplaceNonUtf8(string(data))) +} + +func TestMax(t *testing.T) { + assert.EqualValues(t, Max(-10086, 10086), 10086) +} + +func TestUseLabelAsFilter(t *testing.T) { + currentValue := os.Getenv("ERU_AGENT_EXPERIMENTAL_FILTER") + defer os.Setenv("ERU_AGENT_EXPERIMENTAL_FILTER", currentValue) + os.Setenv("ERU_AGENT_EXPERIMENTAL_FILTER", "test") + assert.Equal(t, UseLabelAsFilter(), false) + os.Setenv("ERU_AGENT_EXPERIMENTAL_FILTER", "label") + assert.Equal(t, UseLabelAsFilter(), true) +} + +func TestCheckHostname(t *testing.T) { + env := []string{"ERU_NODE_NAME=hostname", "LUNCH=free", "ANNUAL_LEAVE=unlimited"} + assert.Equal(t, CheckHostname(env, "hostname"), true) + assert.Equal(t, CheckHostname(env, ""), false) + + env = []string{"LUNCH=free", "ANNUAL_LEAVE=unlimited"} + assert.Equal(t, CheckHostname(env, ""), false) +} + +func TestGetMaxAttemptsByTTL(t *testing.T) { + assert.Equal(t, GetMaxAttemptsByTTL(0), 5) // selfmon enabled + assert.Equal(t, GetMaxAttemptsByTTL(1), 2) + assert.Equal(t, GetMaxAttemptsByTTL(8), 4) // 0+1+2+4 +} + +func TestGetIP(t *testing.T) { + host := "protocol://127.0.0.1:8888888888/some-api?param=none" + assert.Equal(t, GetIP(host), "127.0.0.1") + host = "invalid-string" + assert.Equal(t, GetIP(host), "") +} + +func TestWithTimeout(t *testing.T) { + ctx := context.Background() + i := 0 + WithTimeout(ctx, time.Second, func(ctx context.Context) { + select { + case <-ctx.Done(): + return + default: + i = 1 + } + }) + + assert.Equal(t, i, 1) + + WithTimeout(ctx, time.Second, func(ctx context.Context) { + time.Sleep(2 * time.Second) + select { + case <-ctx.Done(): + return + default: + i = 2 + } + }) + + assert.NotEqual(t, i, 2) +} From 346acbfec7e524c435b6f5d6332c8019d44e40c1 Mon Sep 17 00:00:00 2001 From: DuodenumL Date: Wed, 8 Sep 2021 15:01:47 +0800 Subject: [PATCH 3/6] Abstract "store" --- store/core/client.go | 40 +++++++-- store/core/container.go | 59 ------------ store/core/identifier.go | 16 ++-- store/core/node.go | 169 +++++++++++++++++++++++++++++------ store/core/rpcpool.go | 42 +++++---- store/core/workload.go | 64 +++++++++++++ store/mocks/RPCClientPool.go | 29 ------ store/mocks/Store.go | 143 ++++++++++++++++++++++------- store/mocks/template.go | 119 ++++++++++++++++++++++++ store/store.go | 19 ++-- 10 files changed, 507 insertions(+), 193 deletions(-) delete mode 100644 store/core/container.go create mode 100644 store/core/workload.go delete mode 100644 store/mocks/RPCClientPool.go create mode 100644 store/mocks/template.go diff --git a/store/core/client.go b/store/core/client.go index 9f46104..deb155f 100644 --- a/store/core/client.go +++ b/store/core/client.go @@ -1,33 +1,55 @@ -package corestore +package core import ( "context" + "sync" "time" "github.com/projecteru2/agent/types" pb "github.com/projecteru2/core/rpc/gen" "github.com/patrickmn/go-cache" + log "github.com/sirupsen/logrus" ) -// CoreStore use core to store meta -type CoreStore struct { - clientPool RPCClientPool +// Store use core to store meta +type Store struct { + clientPool *ClientPool config *types.Config cache *cache.Cache } -// New new a CoreStore -func New(ctx context.Context, config *types.Config) (*CoreStore, error) { +var coreStore *Store +var once sync.Once + +// New new a Store +func New(ctx context.Context, config *types.Config) (*Store, error) { clientPool, err := NewCoreRPCClientPool(ctx, config) if err != nil { return nil, err } cache := cache.New(time.Duration(config.HealthCheck.CacheTTL)*time.Second, 24*time.Hour) - return &CoreStore{clientPool, config, cache}, nil + return &Store{clientPool, config, cache}, nil } // GetClient returns a gRPC client -func (c *CoreStore) GetClient() pb.CoreRPCClient { - return c.clientPool.GetClient() +func (c *Store) GetClient() pb.CoreRPCClient { + return c.clientPool.getClient() +} + +// Init inits the core store only once +func Init(ctx context.Context, config *types.Config) { + once.Do(func() { + var err error + coreStore, err = New(ctx, config) + if err != nil { + log.Errorf("[Init] failed to create core store, err: %v", err) + return + } + }) +} + +// Get returns the core store instance +func Get() *Store { + return coreStore } diff --git a/store/core/container.go b/store/core/container.go deleted file mode 100644 index bd6c3a7..0000000 --- a/store/core/container.go +++ /dev/null @@ -1,59 +0,0 @@ -package corestore - -import ( - "context" - "encoding/json" - "fmt" - "time" - - "github.com/projecteru2/agent/types" - pb "github.com/projecteru2/core/rpc/gen" - coretypes "github.com/projecteru2/core/types" -) - -// SetContainerStatus deploy containers -func (c *CoreStore) SetContainerStatus(ctx context.Context, container *types.Container, node *coretypes.Node, ttl int64) error { - status := fmt.Sprintf("%s|%v|%v", container.ID, container.Running, container.Healthy) - if ttl == 0 { - cached, ok := c.cache.Get(container.ID) - if ok { - str := cached.(string) - if str == status { - return nil - } - } - } - - bytes, err := json.Marshal(container.Labels) - if err != nil { - return err - } - containerStatus := &pb.WorkloadStatus{ - Id: container.ID, - Running: container.Running, - Healthy: container.Healthy, - Networks: container.Networks, - Extension: bytes, - Ttl: ttl, - - Appname: container.Name, - Entrypoint: container.EntryPoint, - Nodename: c.config.HostName, - } - - opts := &pb.SetWorkloadsStatusOptions{ - Status: []*pb.WorkloadStatus{containerStatus}, - } - - _, err = c.GetClient().SetWorkloadsStatus(ctx, opts) - - if ttl == 0 { - if err != nil { - c.cache.Delete(container.ID) - } else { - c.cache.Set(container.ID, status, time.Duration(c.config.HealthCheck.CacheTTL)*time.Second) - } - } - - return err -} diff --git a/store/core/identifier.go b/store/core/identifier.go index d80c5ab..232f750 100644 --- a/store/core/identifier.go +++ b/store/core/identifier.go @@ -1,17 +1,19 @@ -package corestore +package core import ( "context" + "github.com/projecteru2/agent/utils" pb "github.com/projecteru2/core/rpc/gen" ) -// GetCoreIdentifier returns the identifier of core -func (c *CoreStore) GetCoreIdentifier() string { - ctx, cancel := context.WithTimeout(context.Background(), c.config.GlobalConnectionTimeout) - defer cancel() - - resp, err := c.GetClient().Info(ctx, &pb.Empty{}) +// GetIdentifier returns the identifier of core +func (c *Store) GetIdentifier(ctx context.Context) string { + var resp *pb.CoreInfo + var err error + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + resp, err = c.GetClient().Info(ctx, &pb.Empty{}) + }) if err != nil { return "" } diff --git a/store/core/node.go b/store/core/node.go index 9d88a9b..8929ef5 100644 --- a/store/core/node.go +++ b/store/core/node.go @@ -1,64 +1,177 @@ -package corestore +package core import ( "context" + "errors" + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" pb "github.com/projecteru2/core/rpc/gen" - "github.com/projecteru2/core/types" + coretypes "github.com/projecteru2/core/types" ) // GetNode return a node by core -func (c *CoreStore) GetNode(nodename string) (*types.Node, error) { - ctx, cancel := context.WithTimeout(context.Background(), c.config.GlobalConnectionTimeout) - defer cancel() - resp, err := c.GetClient().GetNode(ctx, &pb.GetNodeOptions{Nodename: nodename}) +func (c *Store) GetNode(ctx context.Context, nodename string) (*types.Node, error) { + var resp *pb.Node + var err error + + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + resp, err = c.GetClient().GetNode(ctx, &pb.GetNodeOptions{Nodename: nodename}) + }) + if err != nil { return nil, err } - cpus := types.CPUMap{} - for k, v := range resp.Cpu { - cpus[k] = int64(v) - } - node := &types.Node{ - NodeMeta: types.NodeMeta{ - Name: resp.Name, - Podname: resp.Podname, - Endpoint: resp.Endpoint, - CPU: cpus, - MemCap: resp.Memory, - }, + Name: resp.Name, + Podname: resp.Podname, + Endpoint: resp.Endpoint, Available: resp.Available, } return node, nil } // UpdateNode update node status -func (c *CoreStore) UpdateNode(node *types.Node) error { +func (c *Store) UpdateNode(ctx context.Context, node *types.Node) error { opts := &pb.SetNodeOptions{ - Nodename: node.Name, + Nodename: node.Name, + StatusOpt: coretypes.TriFalse, } if node.Available { - opts.StatusOpt = types.TriTrue - } else { - opts.StatusOpt = types.TriFalse + opts.StatusOpt = coretypes.TriTrue } - ctx, cancel := context.WithTimeout(context.Background(), c.config.GlobalConnectionTimeout) - defer cancel() - _, err := c.GetClient().SetNode(ctx, opts) + var err error + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + _, err = c.GetClient().SetNode(ctx, opts) + }) + return err } // SetNodeStatus reports the status of node // SetNodeStatus always reports alive status, // when not alive, TTL will cause expiration of node -func (c *CoreStore) SetNodeStatus(ctx context.Context, ttl int64) error { +func (c *Store) SetNodeStatus(ctx context.Context, ttl int64) error { opts := &pb.SetNodeStatusOptions{ Nodename: c.config.HostName, Ttl: ttl, } - _, err := c.GetClient().SetNodeStatus(ctx, opts) + var err error + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + _, err = c.GetClient().SetNodeStatus(ctx, opts) + }) + return err } + +// SetNode sets node +func (c *Store) SetNode(ctx context.Context, node string, status bool) error { + statusOpt := pb.TriOpt_TRUE + if !status { + statusOpt = pb.TriOpt_FALSE + } + + var err error + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + _, err = c.GetClient().SetNode(ctx, &pb.SetNodeOptions{ + Nodename: node, + StatusOpt: statusOpt, + WorkloadsDown: !status, + }) + }) + + return err +} + +// GetNodeStatus gets the status of node +func (c *Store) GetNodeStatus(ctx context.Context, nodename string) (*types.NodeStatus, error) { + var resp *pb.NodeStatusStreamMessage + var err error + + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + resp, err = c.GetClient().GetNodeStatus(ctx, &pb.GetNodeStatusOptions{Nodename: nodename}) + }) + + if err != nil { + return nil, err + } + + if resp.Error != "" { + err = errors.New(resp.Error) + } + + status := &types.NodeStatus{ + Nodename: resp.Nodename, + Podname: resp.Podname, + Alive: resp.Alive, + Error: err, + } + return status, nil +} + +// NodeStatusStream watches the changes of node status +func (c *Store) NodeStatusStream(ctx context.Context) (<-chan *types.NodeStatus, <-chan error) { + msgChan := make(chan *types.NodeStatus) + errChan := make(chan error) + + go func() { + defer close(msgChan) + defer close(errChan) + + client, err := c.GetClient().NodeStatusStream(ctx, &pb.Empty{}) + if err != nil { + errChan <- err + return + } + + for { + message, err := client.Recv() + if err != nil { + errChan <- err + return + } + nodeStatus := &types.NodeStatus{ + Nodename: message.Nodename, + Podname: message.Podname, + Alive: message.Alive, + Error: nil, + } + if message.Error != "" { + nodeStatus.Error = errors.New(message.Error) + } + msgChan <- nodeStatus + } + }() + + return msgChan, errChan +} + +// ListPodNodes list nodes by given conditions, note that not all the fields are filled. +func (c *Store) ListPodNodes(ctx context.Context, all bool, podname string, labels map[string]string) ([]*types.Node, error) { + var resp *pb.Nodes + var err error + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + resp, err = c.GetClient().ListPodNodes(ctx, &pb.ListNodesOptions{ + Podname: podname, + All: all, + Labels: labels, + }) + }) + + if err != nil { + return nil, err + } + + nodes := make([]*types.Node, 0, len(resp.Nodes)) + for _, n := range resp.Nodes { + nodes = append(nodes, &types.Node{ + Name: n.Name, + Endpoint: n.Endpoint, + Podname: n.Podname, + Labels: n.Labels, + }) + } + return nodes, nil +} diff --git a/store/core/rpcpool.go b/store/core/rpcpool.go index 227cfb0..94f8554 100644 --- a/store/core/rpcpool.go +++ b/store/core/rpcpool.go @@ -1,4 +1,4 @@ -package corestore +package core import ( "context" @@ -7,51 +7,49 @@ import ( "time" "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" "github.com/projecteru2/core/client" pb "github.com/projecteru2/core/rpc/gen" log "github.com/sirupsen/logrus" ) -// RPCClientPool pool of core rpc client -type RPCClientPool interface { - GetClient() pb.CoreRPCClient -} - type clientWithStatus struct { client pb.CoreRPCClient addr string alive bool } -// CoreRPCClientPool implement of RPCClientPool -type CoreRPCClientPool struct { +// ClientPool implement of RPCClientPool +type ClientPool struct { rpcClients []*clientWithStatus } func checkAlive(ctx context.Context, rpc *clientWithStatus, timeout time.Duration) bool { - cctx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - _, err := rpc.client.Info(cctx, &pb.Empty{}) + var err error + utils.WithTimeout(ctx, timeout, func(ctx context.Context) { + _, err = rpc.client.Info(ctx, &pb.Empty{}) + }) if err != nil { - log.Errorf("[CoreRPCClientPool] connect to %s failed, err: %s", rpc.addr, err) + log.Errorf("[ClientPool] connect to %s failed, err: %s", rpc.addr, err) return false } - log.Debugf("[CoreRPCClientPool] connect to %s success", rpc.addr) + log.Debugf("[ClientPool] connect to %s success", rpc.addr) return true } // NewCoreRPCClientPool . -func NewCoreRPCClientPool(ctx context.Context, config *types.Config) (*CoreRPCClientPool, error) { +func NewCoreRPCClientPool(ctx context.Context, config *types.Config) (*ClientPool, error) { if len(config.Core) == 0 { return nil, errors.New("core addr not set") } - c := &CoreRPCClientPool{rpcClients: []*clientWithStatus{}} + c := &ClientPool{rpcClients: []*clientWithStatus{}} for _, addr := range config.Core { - cctx, cancel := context.WithTimeout(ctx, config.GlobalConnectionTimeout) - defer cancel() - - rpc, err := client.NewClient(cctx, addr, config.Auth) + var rpc *client.Client + var err error + utils.WithTimeout(ctx, config.GlobalConnectionTimeout, func(ctx context.Context) { + rpc, err = client.NewClient(ctx, addr, config.Auth) + }) if err != nil { log.Errorf("[NewCoreRPCClientPool] connect to %s failed, err: %s", addr, err) continue @@ -90,7 +88,7 @@ func NewCoreRPCClientPool(ctx context.Context, config *types.Config) (*CoreRPCCl return c, nil } -func (c *CoreRPCClientPool) updateClientsStatus(ctx context.Context, timeout time.Duration) { +func (c *ClientPool) updateClientsStatus(ctx context.Context, timeout time.Duration) { wg := &sync.WaitGroup{} for _, rpc := range c.rpcClients { wg.Add(1) @@ -102,8 +100,8 @@ func (c *CoreRPCClientPool) updateClientsStatus(ctx context.Context, timeout tim wg.Wait() } -// GetClient finds the first *client.Client instance with an active connection. If all connections are dead, returns the first one. -func (c *CoreRPCClientPool) GetClient() pb.CoreRPCClient { +// getClient finds the first *client.Client instance with an active connection. If all connections are dead, returns the first one. +func (c *ClientPool) getClient() pb.CoreRPCClient { for _, rpc := range c.rpcClients { if rpc.alive { return rpc.client diff --git a/store/core/workload.go b/store/core/workload.go new file mode 100644 index 0000000..b80c60c --- /dev/null +++ b/store/core/workload.go @@ -0,0 +1,64 @@ +package core + +import ( + "context" + "fmt" + "math/rand" + "time" + + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + pb "github.com/projecteru2/core/rpc/gen" +) + +func getCacheTTL(ttl int) time.Duration { + delta := rand.Intn(ttl) / 4 // nolint + ttl = ttl - ttl/8 + delta + return time.Duration(ttl) * time.Second +} + +// SetWorkloadStatus deploy containers +func (c *Store) SetWorkloadStatus(ctx context.Context, status *types.WorkloadStatus, ttl int64) error { + workloadStatus := fmt.Sprintf("%s|%v|%v", status.ID, status.Running, status.Healthy) + if ttl == 0 { + cached, ok := c.cache.Get(status.ID) + if ok { + str := cached.(string) + if str == workloadStatus { + return nil + } + } + } + + statusPb := &pb.WorkloadStatus{ + Id: status.ID, + Running: status.Running, + Healthy: status.Healthy, + Networks: status.Networks, + Extension: status.Extension, + Ttl: ttl, + + Appname: status.Appname, + Entrypoint: status.Entrypoint, + Nodename: c.config.HostName, + } + + opts := &pb.SetWorkloadsStatusOptions{ + Status: []*pb.WorkloadStatus{statusPb}, + } + + var err error + utils.WithTimeout(ctx, c.config.GlobalConnectionTimeout, func(ctx context.Context) { + _, err = c.GetClient().SetWorkloadsStatus(ctx, opts) + }) + + if ttl == 0 { + if err != nil { + c.cache.Delete(status.ID) + } else { + c.cache.Set(status.ID, workloadStatus, getCacheTTL(c.config.HealthCheck.CacheTTL)) + } + } + + return err +} diff --git a/store/mocks/RPCClientPool.go b/store/mocks/RPCClientPool.go deleted file mode 100644 index 170025a..0000000 --- a/store/mocks/RPCClientPool.go +++ /dev/null @@ -1,29 +0,0 @@ -// Code generated by mockery 2.9.0. DO NOT EDIT. - -package mocks - -import ( - pb "github.com/projecteru2/core/rpc/gen" - mock "github.com/stretchr/testify/mock" -) - -// RPCClientPool is an autogenerated mock type for the RPCClientPool type -type RPCClientPool struct { - mock.Mock -} - -// GetClient provides a mock function with given fields: -func (_m *RPCClientPool) GetClient() pb.CoreRPCClient { - ret := _m.Called() - - var r0 pb.CoreRPCClient - if rf, ok := ret.Get(0).(func() pb.CoreRPCClient); ok { - r0 = rf() - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPCClient) - } - } - - return r0 -} diff --git a/store/mocks/Store.go b/store/mocks/Store.go index 5568481..41131fe 100644 --- a/store/mocks/Store.go +++ b/store/mocks/Store.go @@ -1,15 +1,13 @@ -// Code generated by mockery 2.7.5. DO NOT EDIT. +// Code generated by mockery 2.9.0. DO NOT EDIT. package mocks import ( context "context" - agenttypes "github.com/projecteru2/agent/types" - mock "github.com/stretchr/testify/mock" - types "github.com/projecteru2/core/types" + types "github.com/projecteru2/agent/types" ) // Store is an autogenerated mock type for the Store type @@ -17,13 +15,13 @@ type Store struct { mock.Mock } -// GetCoreIdentifier provides a mock function with given fields: -func (_m *Store) GetCoreIdentifier() string { - ret := _m.Called() +// GetIdentifier provides a mock function with given fields: ctx +func (_m *Store) GetIdentifier(ctx context.Context) string { + ret := _m.Called(ctx) var r0 string - if rf, ok := ret.Get(0).(func() string); ok { - r0 = rf() + if rf, ok := ret.Get(0).(func(context.Context) string); ok { + r0 = rf(ctx) } else { r0 = ret.Get(0).(string) } @@ -31,13 +29,13 @@ func (_m *Store) GetCoreIdentifier() string { return r0 } -// GetNode provides a mock function with given fields: nodename -func (_m *Store) GetNode(nodename string) (*types.Node, error) { - ret := _m.Called(nodename) +// GetNode provides a mock function with given fields: ctx, nodename +func (_m *Store) GetNode(ctx context.Context, nodename string) (*types.Node, error) { + ret := _m.Called(ctx, nodename) var r0 *types.Node - if rf, ok := ret.Get(0).(func(string) *types.Node); ok { - r0 = rf(nodename) + if rf, ok := ret.Get(0).(func(context.Context, string) *types.Node); ok { + r0 = rf(ctx, nodename) } else { if ret.Get(0) != nil { r0 = ret.Get(0).(*types.Node) @@ -45,8 +43,31 @@ func (_m *Store) GetNode(nodename string) (*types.Node, error) { } var r1 error - if rf, ok := ret.Get(1).(func(string) error); ok { - r1 = rf(nodename) + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, nodename) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// GetNodeStatus provides a mock function with given fields: ctx, nodename +func (_m *Store) GetNodeStatus(ctx context.Context, nodename string) (*types.NodeStatus, error) { + ret := _m.Called(ctx, nodename) + + var r0 *types.NodeStatus + if rf, ok := ret.Get(0).(func(context.Context, string) *types.NodeStatus); ok { + r0 = rf(ctx, nodename) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*types.NodeStatus) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, string) error); ok { + r1 = rf(ctx, nodename) } else { r1 = ret.Error(1) } @@ -54,13 +75,61 @@ func (_m *Store) GetNode(nodename string) (*types.Node, error) { return r0, r1 } -// SetContainerStatus provides a mock function with given fields: _a0, _a1, _a2, _a3 -func (_m *Store) SetContainerStatus(_a0 context.Context, _a1 *agenttypes.Container, _a2 *types.Node, _a3 int64) error { - ret := _m.Called(_a0, _a1, _a2, _a3) +// ListPodNodes provides a mock function with given fields: ctx, all, podname, labels +func (_m *Store) ListPodNodes(ctx context.Context, all bool, podname string, labels map[string]string) ([]*types.Node, error) { + ret := _m.Called(ctx, all, podname, labels) + + var r0 []*types.Node + if rf, ok := ret.Get(0).(func(context.Context, bool, string, map[string]string) []*types.Node); ok { + r0 = rf(ctx, all, podname, labels) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*types.Node) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, bool, string, map[string]string) error); ok { + r1 = rf(ctx, all, podname, labels) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// NodeStatusStream provides a mock function with given fields: ctx +func (_m *Store) NodeStatusStream(ctx context.Context) (<-chan *types.NodeStatus, <-chan error) { + ret := _m.Called(ctx) + + var r0 <-chan *types.NodeStatus + if rf, ok := ret.Get(0).(func(context.Context) <-chan *types.NodeStatus); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(<-chan *types.NodeStatus) + } + } + + var r1 <-chan error + if rf, ok := ret.Get(1).(func(context.Context) <-chan error); ok { + r1 = rf(ctx) + } else { + if ret.Get(1) != nil { + r1 = ret.Get(1).(<-chan error) + } + } + + return r0, r1 +} + +// SetNode provides a mock function with given fields: ctx, node, status +func (_m *Store) SetNode(ctx context.Context, node string, status bool) error { + ret := _m.Called(ctx, node, status) var r0 error - if rf, ok := ret.Get(0).(func(context.Context, *agenttypes.Container, *types.Node, int64) error); ok { - r0 = rf(_a0, _a1, _a2, _a3) + if rf, ok := ret.Get(0).(func(context.Context, string, bool) error); ok { + r0 = rf(ctx, node, status) } else { r0 = ret.Error(0) } @@ -68,13 +137,27 @@ func (_m *Store) SetContainerStatus(_a0 context.Context, _a1 *agenttypes.Contain return r0 } -// SetNodeStatus provides a mock function with given fields: _a0, _a1 -func (_m *Store) SetNodeStatus(_a0 context.Context, _a1 int64) error { - ret := _m.Called(_a0, _a1) +// SetNodeStatus provides a mock function with given fields: ctx, ttl +func (_m *Store) SetNodeStatus(ctx context.Context, ttl int64) error { + ret := _m.Called(ctx, ttl) var r0 error if rf, ok := ret.Get(0).(func(context.Context, int64) error); ok { - r0 = rf(_a0, _a1) + r0 = rf(ctx, ttl) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// SetWorkloadStatus provides a mock function with given fields: ctx, status, ttl +func (_m *Store) SetWorkloadStatus(ctx context.Context, status *types.WorkloadStatus, ttl int64) error { + ret := _m.Called(ctx, status, ttl) + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context, *types.WorkloadStatus, int64) error); ok { + r0 = rf(ctx, status, ttl) } else { r0 = ret.Error(0) } @@ -82,13 +165,13 @@ func (_m *Store) SetNodeStatus(_a0 context.Context, _a1 int64) error { return r0 } -// UpdateNode provides a mock function with given fields: node -func (_m *Store) UpdateNode(node *types.Node) error { - ret := _m.Called(node) +// UpdateNode provides a mock function with given fields: ctx, node +func (_m *Store) UpdateNode(ctx context.Context, node *types.Node) error { + ret := _m.Called(ctx, node) var r0 error - if rf, ok := ret.Get(0).(func(*types.Node) error); ok { - r0 = rf(node) + if rf, ok := ret.Get(0).(func(context.Context, *types.Node) error); ok { + r0 = rf(ctx, node) } else { r0 = ret.Error(0) } diff --git a/store/mocks/template.go b/store/mocks/template.go new file mode 100644 index 0000000..1ca35ff --- /dev/null +++ b/store/mocks/template.go @@ -0,0 +1,119 @@ +package mocks + +import ( + "context" + "fmt" + + "github.com/projecteru2/agent/store" + "github.com/projecteru2/agent/types" + "github.com/stretchr/testify/mock" +) + +// MockStore . +type MockStore struct { + Store + workloadStatus map[string]*types.WorkloadStatus + nodeStatus map[string]*types.NodeStatus + nodeInfo map[string]*types.Node + msgChan chan *types.NodeStatus + errChan chan error +} + +func (m *MockStore) init() { + m.workloadStatus = map[string]*types.WorkloadStatus{} + m.nodeStatus = map[string]*types.NodeStatus{} + m.msgChan = make(chan *types.NodeStatus) + m.errChan = make(chan error) + + m.nodeInfo = map[string]*types.Node{ + "fake": { + Name: "fake", + Endpoint: "eva://127.0.0.1:6666", + }, + "faker": { + Name: "faker", + Endpoint: "eva://127.0.0.1:6667", + }, + } +} + +// FromTemplate returns a mock store instance created from template +func FromTemplate() store.Store { + m := &MockStore{} + m.init() + m.On("GetNode", mock.Anything, mock.Anything).Return(func(ctx context.Context, nodename string) *types.Node { + return m.nodeInfo[nodename] + }, nil) + m.On("SetNodeStatus", mock.Anything, mock.Anything).Return(func(ctx context.Context, ttl int64) error { + fmt.Printf("[MockStore] set node status\n") + nodename := "fake" + if status, ok := m.nodeStatus[nodename]; ok { + status.Alive = true + } else { + m.nodeStatus[nodename] = &types.NodeStatus{ + Nodename: nodename, + Alive: true, + } + } + return nil + }) + m.On("GetNodeStatus", mock.Anything, mock.Anything).Return(func(ctx context.Context, nodename string) *types.NodeStatus { + if status, ok := m.nodeStatus[nodename]; ok { + return status + } + return &types.NodeStatus{ + Nodename: nodename, + Alive: false, + } + }, nil) + m.On("SetWorkloadStatus", mock.Anything, mock.Anything, mock.Anything).Return(func(ctx context.Context, status *types.WorkloadStatus, ttl int64) error { + fmt.Printf("[MockStore] set workload status: %+v\n", status) + m.workloadStatus[status.ID] = status + return nil + }) + m.On("GetIdentifier", mock.Anything).Return("fake-identifier") + m.On("SetNode", mock.Anything, mock.Anything, mock.Anything).Return(func(ctx context.Context, node string, status bool) error { + fmt.Printf("[MockStore] set node %s as status: %v\n", node, status) + if nodeInfo, ok := m.nodeInfo[node]; ok { + nodeInfo.Available = status + } else { + m.nodeInfo[node] = &types.Node{ + Name: node, + Available: status, + } + } + return nil + }) + m.On("ListPodNodes", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return([]*types.Node{ + { + Name: "fake", + }, + { + Name: "faker", + }, + }, nil) + m.On("NodeStatusStream", mock.Anything).Return(func(ctx context.Context) <-chan *types.NodeStatus { + return m.msgChan + }, func(ctx context.Context) <-chan error { + return m.errChan + }) + + return m +} + +// GetMockWorkloadStatus returns the mock workload status by ID +func (m *MockStore) GetMockWorkloadStatus(ID string) *types.WorkloadStatus { + return m.workloadStatus[ID] +} + +// StartNodeStatusStream "faker" up, "fake" down. +func (m *MockStore) StartNodeStatusStream() { + m.msgChan <- &types.NodeStatus{ + Nodename: "faker", + Alive: true, + } + m.msgChan <- &types.NodeStatus{ + Nodename: "fake", + Alive: false, + } +} diff --git a/store/store.go b/store/store.go index c5d10a1..782c456 100644 --- a/store/store.go +++ b/store/store.go @@ -4,16 +4,17 @@ import ( "context" "github.com/projecteru2/agent/types" - coretypes "github.com/projecteru2/core/types" ) -// Store indicate store +// Store wrapper of remote calls type Store interface { - GetNode(nodename string) (*coretypes.Node, error) - UpdateNode(node *coretypes.Node) error - - SetNodeStatus(context.Context, int64) error - SetContainerStatus(context.Context, *types.Container, *coretypes.Node, int64) error - - GetCoreIdentifier() string + GetNode(ctx context.Context, nodename string) (*types.Node, error) + UpdateNode(ctx context.Context, node *types.Node) error + SetNodeStatus(ctx context.Context, ttl int64) error + GetNodeStatus(ctx context.Context, nodename string) (*types.NodeStatus, error) + SetWorkloadStatus(ctx context.Context, status *types.WorkloadStatus, ttl int64) error + SetNode(ctx context.Context, node string, status bool) error + GetIdentifier(ctx context.Context) string + NodeStatusStream(ctx context.Context) (<-chan *types.NodeStatus, <-chan error) + ListPodNodes(ctx context.Context, all bool, podname string, labels map[string]string) ([]*types.Node, error) } From 2f158f0335f3027776b30ac1a731b92e5685ae02 Mon Sep 17 00:00:00 2001 From: DuodenumL Date: Wed, 8 Sep 2021 15:04:41 +0800 Subject: [PATCH 4/6] Replace "Engine" with "WorkloadManager" and "NodeManager" --- agent.go | 66 +++- agent.yaml.sample | 12 + api/http.go | 37 +- common/common.go | 15 + engine/attach.go | 99 ----- engine/engine.go | 148 -------- engine/engine_test.go | 310 --------------- engine/health_check.go | 191 ---------- engine/health_check_test.go | 73 ---- engine/helper.go | 166 -------- engine/helper_test.go | 41 -- engine/load.go | 34 -- engine/load_test.go | 29 -- engine/metrics.go | 374 ------------------- engine/monitor.go | 64 ---- engine/monitor_test.go | 53 --- engine/stat.go | 138 ------- engine/stat_linux.go | 48 --- engine/stat_notlinux.go | 31 -- engine/status/container.go | 71 ---- engine/status_report.go | 45 --- go.mod | 7 +- go.sum | 29 +- {engine/logs => logs}/enc.go | 3 +- {engine/logs => logs}/writer.go | 1 + {engine/logs => logs}/writer_test.go | 4 +- manager/node/heartbeat.go | 53 +++ manager/node/heartbeat_test.go | 30 ++ manager/node/manager.go | 89 +++++ manager/node/manager_test.go | 36 ++ manager/workload/attach.go | 101 +++++ manager/workload/attach_test.go | 30 ++ {engine/status => manager/workload}/event.go | 17 +- manager/workload/event_test.go | 50 +++ manager/workload/filter.go | 26 ++ manager/workload/health_check.go | 85 +++++ manager/workload/health_check_test.go | 19 + manager/workload/load.go | 44 +++ manager/workload/load_test.go | 43 +++ manager/workload/log.go | 84 +++++ manager/workload/log_test.go | 78 ++++ manager/workload/manager.go | 124 ++++++ manager/workload/manager_test.go | 46 +++ manager/workload/monitor.go | 61 +++ 44 files changed, 1137 insertions(+), 1968 deletions(-) delete mode 100644 engine/attach.go delete mode 100644 engine/engine.go delete mode 100644 engine/engine_test.go delete mode 100644 engine/health_check.go delete mode 100644 engine/health_check_test.go delete mode 100644 engine/helper.go delete mode 100644 engine/helper_test.go delete mode 100644 engine/load.go delete mode 100644 engine/load_test.go delete mode 100644 engine/metrics.go delete mode 100644 engine/monitor.go delete mode 100644 engine/monitor_test.go delete mode 100644 engine/stat.go delete mode 100644 engine/stat_linux.go delete mode 100644 engine/stat_notlinux.go delete mode 100644 engine/status/container.go delete mode 100644 engine/status_report.go rename {engine/logs => logs}/enc.go (99%) rename {engine/logs => logs}/writer.go (99%) rename {engine/logs => logs}/writer_test.go (100%) create mode 100644 manager/node/heartbeat.go create mode 100644 manager/node/heartbeat_test.go create mode 100644 manager/node/manager.go create mode 100644 manager/node/manager_test.go create mode 100644 manager/workload/attach.go create mode 100644 manager/workload/attach_test.go rename {engine/status => manager/workload}/event.go (59%) create mode 100644 manager/workload/event_test.go create mode 100644 manager/workload/filter.go create mode 100644 manager/workload/health_check.go create mode 100644 manager/workload/health_check_test.go create mode 100644 manager/workload/load.go create mode 100644 manager/workload/load_test.go create mode 100644 manager/workload/log.go create mode 100644 manager/workload/log_test.go create mode 100644 manager/workload/manager.go create mode 100644 manager/workload/manager_test.go create mode 100644 manager/workload/monitor.go diff --git a/agent.go b/agent.go index d7510f6..40a4138 100644 --- a/agent.go +++ b/agent.go @@ -2,23 +2,24 @@ package main import ( "fmt" + "math/rand" "os" "os/signal" "syscall" - - _ "go.uber.org/automaxprocs" + "time" "github.com/projecteru2/agent/api" - "github.com/projecteru2/agent/engine" + "github.com/projecteru2/agent/manager/node" + "github.com/projecteru2/agent/manager/workload" "github.com/projecteru2/agent/selfmon" "github.com/projecteru2/agent/types" "github.com/projecteru2/agent/utils" "github.com/projecteru2/agent/version" - "github.com/projecteru2/agent/watcher" "github.com/jinzhu/configor" log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" + _ "go.uber.org/automaxprocs" ) func setupLogLevel(l string) error { @@ -44,6 +45,8 @@ func initConfig(c *cli.Context) *types.Config { } func serve(c *cli.Context) error { + rand.Seed(time.Now().UnixNano()) + if err := setupLogLevel(c.String("log-level")); err != nil { log.Fatal(err) } @@ -53,22 +56,45 @@ func serve(c *cli.Context) error { defer os.Remove(config.PidFile) if c.Bool("selfmon") { - return selfmon.Monitor(c.Context, config) + mon, err := selfmon.New(c.Context, config) + if err != nil { + return err + } + return mon.Run(c.Context) } ctx, cancel := signal.NotifyContext(c.Context, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) defer cancel() - watcher.InitMonitor() - go watcher.LogMonitor.Serve(ctx) + errChan := make(chan error, 1) - agent, err := engine.NewEngine(ctx, config) + workloadManager, err := workload.NewManager(ctx, config) if err != nil { return err } + go func() { + errChan <- workloadManager.Run(ctx) + }() + + nodeManager, err := node.NewManager(ctx, config) + if err != nil { + return err + } + go func() { + errChan <- nodeManager.Run(ctx) + }() + + apiHandler := api.NewHandler(config, workloadManager) + go apiHandler.Serve() - go api.Serve(config.API.Addr) - return agent.Run(ctx) + select { + case err := <-errChan: + log.Debugf("[agent] err: %v", err) + return err + case <-ctx.Done(): + log.Info("[agent] Agent caught system signal, exiting") + return nil + } } func main() { @@ -93,6 +119,12 @@ func main() { Usage: "set log level", EnvVars: []string{"ERU_AGENT_LOG_LEVEL"}, }, + &cli.StringFlag{ + Name: "store", + Value: "grpc", + Usage: "store type", + EnvVars: []string{"ERU_AGENT_STORE"}, + }, &cli.StringFlag{ Name: "core-endpoint", Value: "", @@ -111,6 +143,12 @@ func main() { Usage: "core password", EnvVars: []string{"ERU_AGENT_CORE_PASSWORD"}, }, + &cli.StringFlag{ + Name: "runtime", + Value: "docker", + Usage: "runtime type", + EnvVars: []string{"ERU_AGENT_RUNTIME"}, + }, &cli.StringFlag{ Name: "docker-endpoint", Value: "", @@ -132,7 +170,7 @@ func main() { &cli.StringFlag{ Name: "api-addr", Value: "", - Usage: "agent API serving address", + Usage: "agent api serving address", EnvVars: []string{"ERU_AGENT_API_ADDR"}, }, &cli.StringSliceFlag{ @@ -184,6 +222,12 @@ func main() { Value: false, Usage: "run this agent as a selfmon daemon", }, + &cli.StringFlag{ + Name: "kv", + Value: "etcd", + Usage: "kv type", + EnvVars: []string{"ERU_AGENT_KV"}, + }, &cli.BoolFlag{ Name: "check-only-mine", Value: false, diff --git a/agent.yaml.sample b/agent.yaml.sample index 26cfc1d..8eb92e5 100644 --- a/agent.yaml.sample +++ b/agent.yaml.sample @@ -4,6 +4,18 @@ # This option is not required as the default value is "/tmp/agent.pid". pid: /tmp/agent.pid +# store defines the type of core service. +# This option is not required as the default value is "grpc". +store: grpc + +# runtime defines the type of runtime. +# This option is not required as the default value is "docker". +runtime: docker + +# kv defines the type of kv store. +# This option is not required as the default value is "etcd". +kv: etcd + # core defines the address of eru-core component. # This option is not required as the default value is "127.0.0.1:5001". core: diff --git a/api/http.go b/api/http.go index b9acfb2..c0d04ee 100644 --- a/api/http.go +++ b/api/http.go @@ -3,19 +3,17 @@ package api import ( "encoding/json" "net/http" - "runtime/pprof" - + "runtime/pprof" // nolint // enable profile _ "net/http/pprof" // nolint + "github.com/projecteru2/agent/manager/workload" "github.com/projecteru2/agent/types" "github.com/projecteru2/agent/version" - "github.com/projecteru2/agent/watcher" - coreutils "github.com/projecteru2/core/utils" - "github.com/prometheus/client_golang/prometheus/promhttp" - log "github.com/sirupsen/logrus" "github.com/bmizerany/pat" + "github.com/prometheus/client_golang/prometheus/promhttp" + log "github.com/sirupsen/logrus" ) // JSON define a json @@ -23,6 +21,8 @@ type JSON map[string]interface{} // Handler define handler type Handler struct { + config *types.Config + workloadManager *workload.Manager } // URL /version/ @@ -58,24 +58,27 @@ func (h *Handler) log(w http.ResponseWriter, req *http.Request) { log.Errorf("[apiLog] connect failed %v", err) return } - logConsumer := &types.LogConsumer{ - ID: coreutils.RandomString(8), - App: app, Conn: conn, Buf: buf, - } - watcher.LogMonitor.ConsumerC <- logConsumer - log.Infof("[apiLog] %s %s log attached", app, logConsumer.ID) + defer conn.Close() + h.workloadManager.Subscribe(app, buf) + } +} + +// NewHandler new api http handler +func NewHandler(config *types.Config, workloadManager *workload.Manager) *Handler { + return &Handler{ + config: config, + workloadManager: workloadManager, } } // Serve start a api service // blocks by http.ListenAndServe // run this in a separated goroutine -func Serve(addr string) { - if addr == "" { +func (h *Handler) Serve() { + if h.config.API.Addr == "" { return } - h := &Handler{} restfulAPIServer := pat.New() handlers := map[string]map[string]func(http.ResponseWriter, *http.Request){ "GET": { @@ -93,9 +96,9 @@ func Serve(addr string) { http.Handle("/", restfulAPIServer) http.Handle("/metrics", promhttp.Handler()) - log.Infof("[apiServe] http api started %s", addr) + log.Infof("[apiServe] http api started %s", h.config.API.Addr) - err := http.ListenAndServe(addr, nil) + err := http.ListenAndServe(h.config.API.Addr, nil) if err != nil { log.Panicf("http api failed %s", err) } diff --git a/common/common.go b/common/common.go index 2433812..3d177ed 100644 --- a/common/common.go +++ b/common/common.go @@ -17,4 +17,19 @@ const ( // LocalIP . LocalIP = "127.0.0.1" + + // DockerRuntime use docker as runtime + DockerRuntime = "docker" + // MocksRuntime use the mock runtime + MocksRuntime = "mocks" + + // GRPCStore use gRPC as store + GRPCStore = "grpc" + // MocksStore use the mock store + MocksStore = "mocks" + + // ETCDKV use ETCD as KV + ETCDKV = "etcd" + // MocksKV use the mock KV + MocksKV = "mocks" ) diff --git a/engine/attach.go b/engine/attach.go deleted file mode 100644 index c6f7d81..0000000 --- a/engine/attach.go +++ /dev/null @@ -1,99 +0,0 @@ -package engine - -import ( - "bufio" - "context" - "io" - "net/http/httputil" - "strings" - "time" - - "github.com/docker/docker/pkg/stdcopy" - - dockertypes "github.com/docker/docker/api/types" - coreutils "github.com/projecteru2/core/utils" - log "github.com/sirupsen/logrus" - - "github.com/docker/go-units" - "github.com/projecteru2/agent/common" - "github.com/projecteru2/agent/engine/logs" - "github.com/projecteru2/agent/types" - "github.com/projecteru2/agent/utils" - "github.com/projecteru2/agent/watcher" -) - -func (e *Engine) attach(container *types.Container) { - transfer := e.forwards.Get(container.ID, 0) - if transfer == "" { - transfer = logs.Discard - } - writer, err := logs.NewWriter(transfer, e.config.Log.Stdout) - if err != nil { - log.Errorf("[attach] Create log forward failed %s", err) - return - } - - cap, _ := units.RAMInBytes("10M") - outr, outw := utils.NewBufPipe(cap) - errr, errw := utils.NewBufPipe(cap) - ctx := context.Background() - cancelCtx, cancel := context.WithCancel(ctx) - go func() { - options := dockertypes.ContainerAttachOptions{ - Stream: true, - Stdin: false, - Stdout: true, - Stderr: true, - } - resp, err := e.docker.ContainerAttach(ctx, container.ID, options) - if err != nil && err != httputil.ErrPersistEOF { // nolint - log.Errorf("[attach] attach %s container %s failed %s", container.Name, coreutils.ShortID(container.ID), err) - return - } - defer resp.Close() - defer outw.Close() - defer errw.Close() - defer cancel() - _, err = stdcopy.StdCopy(outw, errw, resp.Reader) - if err != nil { - log.Errorf("[attach] attach get stream failed %s", err) - } - log.Infof("[attach] attach %s container %s finished", container.Name, coreutils.ShortID(container.ID)) - }() - log.Infof("[attach] attach %s container %s success", container.Name, coreutils.ShortID(container.ID)) - // attach metrics - go e.stat(cancelCtx, container) - - extra := container.LogFieldExtra() - pump := func(typ string, source io.Reader) { - buf := bufio.NewReader(source) - for { - data, err := buf.ReadString('\n') - if err != nil { - if err != io.EOF { - log.Errorf("[attach] attach pump %s %s %s %s", container.Name, coreutils.ShortID(container.ID), typ, err) - } - return - } - data = strings.TrimSuffix(data, "\n") - data = strings.TrimSuffix(data, "\r") - l := &types.Log{ - ID: container.ID, - Name: container.Name, - Type: typ, - EntryPoint: container.EntryPoint, - Ident: container.Ident, - Data: replaceNonUtf8(data), - Datetime: time.Now().Format(common.DateTimeFormat), - Extra: extra, - } - watcher.LogMonitor.LogC <- l - if err := writer.Write(l); err != nil && !(container.EntryPoint == "agent" && e.dockerized) { - log.Errorf("[attach] %s container %s_%s write failed %v", container.Name, container.EntryPoint, coreutils.ShortID(container.ID), err) - log.Errorf("[attach] %s", data) - } - } - } - go pump("stdout", outr) - go pump("stderr", errr) -} diff --git a/engine/engine.go b/engine/engine.go deleted file mode 100644 index 4af299c..0000000 --- a/engine/engine.go +++ /dev/null @@ -1,148 +0,0 @@ -package engine - -import ( - "context" - "os" - - "github.com/projecteru2/agent/common" - "github.com/projecteru2/agent/store" - corestore "github.com/projecteru2/agent/store/core" - "github.com/projecteru2/agent/types" - "github.com/projecteru2/agent/utils" - dockerengine "github.com/projecteru2/core/engine/docker" - coretypes "github.com/projecteru2/core/types" - coreutils "github.com/projecteru2/core/utils" - - engineapi "github.com/docker/docker/client" - "github.com/shirou/gopsutil/cpu" - "github.com/shirou/gopsutil/mem" - log "github.com/sirupsen/logrus" -) - -// Engine is agent -type Engine struct { - store store.Store - config *types.Config - docker *engineapi.Client - node *coretypes.Node - nodeIP string - cpuCore float64 // 因为到时候要乘以 float64 所以就直接转换成 float64 吧 - memory int64 - cas utils.GroupCAS - - transfers *utils.HashBackends - forwards *utils.HashBackends - - dockerized bool - - // coreIdentifier indicates which eru this agent belongs to - // it can be used to identify the corresponding core - // and all containers that belong to this core - coreIdentifier string -} - -// NewEngine make a engine instance -func NewEngine(ctx context.Context, config *types.Config) (*Engine, error) { - engine := &Engine{} - docker, err := utils.MakeDockerClient(config) - if err != nil { - return nil, err - } - - store, err := corestore.New(ctx, config) - if err != nil { - return nil, err - } - - // set core identifier - engine.coreIdentifier = store.GetCoreIdentifier() - - // get self - node, err := store.GetNode(config.HostName) - if err != nil { - return nil, err - } - - engine.config = config - engine.store = store - engine.docker = docker - engine.node = node - engine.nodeIP = dockerengine.GetIP(ctx, node.Endpoint) - if engine.nodeIP == "" { - engine.nodeIP = common.LocalIP - } - log.Infof("[NewEngine] Host IP %s", engine.nodeIP) - engine.dockerized = os.Getenv(common.DOCKERIZED) != "" - if engine.dockerized { - os.Setenv("HOST_PROC", "/hostProc") - } - cpus, err := cpu.Info() - if err != nil { - return nil, err - } - log.Infof("[NewEngine] Host has %d cpus", len(cpus)) - memory, err := mem.VirtualMemory() - if err != nil { - return nil, err - } - log.Infof("[NewEngine] Host has %d memory", memory.Total) - engine.cpuCore = float64(len(cpus)) - engine.memory = int64(memory.Total) - engine.transfers = utils.NewHashBackends(config.Metrics.Transfers) - engine.forwards = utils.NewHashBackends(config.Log.Forwards) - return engine, nil -} - -// Run will start agent -// blocks by ctx.Done() -// either call this in a separated goroutine, or used in main to block main goroutine -func (e *Engine) Run(ctx context.Context) error { - // load container - if err := e.load(ctx); err != nil { - return err - } - // start status watcher - eventChan, errChan := e.initMonitor() - go e.monitor(eventChan) - - // start health check - go e.healthCheck(ctx) - - // start node heartbeat - go e.heartbeat(ctx) - - log.Info("[Engine] Node activated") - - // wait for signal - select { - case <-ctx.Done(): - log.Info("[Engine] Agent caught system signal, exiting") - return nil - case err := <-errChan: - if err := e.crash(ctx); err != nil { - log.Infof("[Engine] Mark node crash failed %v", err) - } - return err - } -} - -func (e *Engine) crash(ctx context.Context) error { - log.Info("[crash] mark all containers unhealthy") - containers, err := e.listContainers(false, nil) - if err != nil { - return err - } - for _, c := range containers { - container, err := e.detectContainer(ctx, c.ID) - if err != nil { - return err - } - container.Healthy = false - - if err := e.setContainerStatus(container); err != nil { - return err - } - log.Infof("[crash] mark %s unhealthy", coreutils.ShortID(container.ID)) - } - return e.activated(false) -} diff --git a/engine/engine_test.go b/engine/engine_test.go deleted file mode 100644 index e4c8ff6..0000000 --- a/engine/engine_test.go +++ /dev/null @@ -1,310 +0,0 @@ -package engine - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "io" - "io/ioutil" - "net/http" - - "runtime" - "strings" - "testing" - "time" - - "github.com/docker/docker/api/types" - "github.com/docker/docker/api/types/container" - "github.com/docker/docker/api/types/events" - "github.com/docker/docker/api/types/filters" - "github.com/docker/docker/client" - "github.com/docker/docker/pkg/ioutils" - "github.com/docker/docker/pkg/stringid" - "github.com/projecteru2/agent/store/mocks" - agenttypes "github.com/projecteru2/agent/types" - agentutils "github.com/projecteru2/agent/utils" - "github.com/stretchr/testify/assert" -) - -const ( - apiVersion = "v1.25" - mockID = "f1f9da344e8f8f90f73899ddad02da6cdf2218bbe52413af2bcfef4fba2d22de" -) - -var ( - i int - err error - mockStore *mocks.Store -) - -func testlogF(format interface{}, a ...interface{}) { - var ( - caller string - main string - ) - _, fn, line, _ := runtime.Caller(1) - caller = fmt.Sprintf("%s:%d", fn, line) - s := strings.Split(caller, "/") - caller = s[len(s)-1] - - switch format.(type) { - case string: - main = fmt.Sprintf(format.(string), a...) - default: - main = fmt.Sprintf("%v", format) - } - fmt.Printf("%s: %s \n", caller, main) -} - -func mockEvents(ctx context.Context) (*http.Response, error) { - pr, pw := io.Pipe() - w := ioutils.NewWriteFlusher(pw) - msgChan := make(chan []byte) - - filters := filters.NewArgs() - filters.Add("type", events.ContainerEventType) - - eventsCases := struct { - options types.EventsOptions - events []events.Message - }{ - options: types.EventsOptions{ - Filters: filters, - }, - events: []events.Message{ - { - Type: "container", - ID: stringid.GenerateRandomID(), - Action: "create", - Status: "state create", - }, - { - Type: "container", - ID: stringid.GenerateRandomID(), - Action: "die", - Status: "state die", - }, - { - Type: "container", - ID: stringid.GenerateRandomID(), - Action: "destroy", - Status: "state destroy", - }, - }, - } - go func() { - for _, e := range eventsCases.events { - b, _ := json.Marshal(e) - msgChan <- b - time.Sleep(1000 * time.Millisecond) - } - - }() - go func() { - for { - select { - case <-ctx.Done(): - testlogF("Context canceld") - w.Close() - pw.Close() - pr.Close() - return - case msg := <-msgChan: - w.Write(msg) - } - } - }() - - return &http.Response{ - StatusCode: http.StatusOK, - Body: pr, - }, nil -} - -func mockPing() (*http.Response, error) { - header := http.Header{} - header.Add("OSType", "Linux") - header.Add("API-Version", apiVersion) - header.Add("Docker-Experimental", "true") - return &http.Response{ - StatusCode: http.StatusOK, - Header: header, - }, nil -} - -func mockDockerDoer(r *http.Request) (*http.Response, error) { - var b []byte - prefix := fmt.Sprintf("/%s", apiVersion) - path := strings.TrimPrefix(r.URL.Path, prefix) - - // get container id - containerID := "" - if strings.HasPrefix(path, "/containers/") { - cid := strings.TrimPrefix(path, "/containers/") - containerID = strings.Split(cid, "/")[0] - if containerID == "" { - containerID = "_" - } - } - - // mock docker responses - switch path { - case "/info": // docker info - testlogF("mock docker info response") - info := &types.Info{ - ID: "daemonID", - Containers: 3, - } - b, _ = json.Marshal(info) - case "/_ping": // just ping - testlogF("mock docker ping response") - return mockPing() - case fmt.Sprintf("/containers/%s/json", containerID): - testlogF("inspect container %s", containerID) - b, _ = json.Marshal(types.ContainerJSON{ - ContainerJSONBase: &types.ContainerJSONBase{ - ID: containerID, - Image: "image:latest", - Name: "name_entry_ident", - State: &types.ContainerState{ - Running: true, - }, - HostConfig: &container.HostConfig{ - Resources: container.Resources{ - CPUQuota: 9999, - CPUPeriod: 9999, - Memory: 99999, - }, - }, - }, - Config: &container.Config{ - Labels: map[string]string{ - "ERU": "1", - "healthcheck": "1", - "healthcheck_http": "80", - "healthcheck_code": "404", - "healthcheck_url": "/", - }, - Image: "image:latest", - }, - }) - case "/networks/bridge/disconnect": - var disconnect types.NetworkDisconnect - json.NewDecoder(r.Body).Decode(&disconnect) - testlogF("disconnect container %s from bridge network", disconnect.Container) - b = []byte("body") - case "/networks": - b, _ = json.Marshal([]types.NetworkResource{ - { - Name: "mock_network", - Driver: "bridge", - }, - }) - case "/events": - testlogF("mock docker events") - return mockEvents(r.Context()) - case "/containers/json": - testlogF("mock docker ps") - b, _ = json.Marshal([]types.Container{ - { - ID: stringid.GenerateRandomID(), - Names: []string{"hello_docker_ident"}, - Image: "test:image", - ImageID: stringid.GenerateRandomID(), - Command: "top", - Labels: map[string]string{"ERU": "1"}, - }, - }) - default: - errMsg := fmt.Sprintf("Server Error, unknown path: %s", path) - return errorMock(500, errMsg) - } - - return &http.Response{ - StatusCode: http.StatusOK, - Body: ioutil.NopCloser(bytes.NewReader(b)), - }, nil -} - -func newMockClient(doer func(*http.Request) (*http.Response, error)) *http.Client { - r := &http.Client{ - Transport: transportFunc(doer), - } - return r -} - -func mockDockerHTTPClient() *http.Client { - return newMockClient(mockDockerDoer) -} - -func errorMock(statusCode int, message string) (*http.Response, error) { - header := http.Header{} - header.Set("Content-Type", "application/json") - - body, err := json.Marshal(&types.ErrorResponse{ - Message: message, - }) - if err != nil { - return nil, err - } - - return &http.Response{ - StatusCode: statusCode, - Body: ioutil.NopCloser(bytes.NewReader(body)), - Header: header, - }, fmt.Errorf(message) -} - -// transportFunc allows us to inject a mock transport for testing. We define it -// here so we can detect the tlsconfig and return nil for only this type. -type transportFunc func(*http.Request) (*http.Response, error) - -func (tf transportFunc) RoundTrip(req *http.Request) (*http.Response, error) { - return tf(req) -} - -func mockNewEngine() *Engine { - engine := new(Engine) - mockStore = new(mocks.Store) - - docker, err := client.NewClient("http://127.0.0.1", "1.25", mockDockerHTTPClient(), nil) - if err != nil { - panic(err) - } - - engine.config = &agenttypes.Config{} - engine.store = mockStore - engine.docker = docker - engine.cpuCore = float64(runtime.NumCPU()) - engine.transfers = agentutils.NewHashBackends([]string{"127.0.0.1:8125"}) - engine.forwards = agentutils.NewHashBackends([]string{"udp://127.0.0.1:5144"}) - - return engine -} - -func TestPing(t *testing.T) { - e := mockNewEngine() - _, err := e.docker.Ping(context.Background()) - assert.NoError(t, err) -} - -func TestEvents(t *testing.T) { - docker, err := client.NewClient("http://10.0.0.1", "1.25", mockDockerHTTPClient(), nil) - if err != nil { - panic(err) - } - ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) - defer cancel() - eventChan, errChan := docker.Events(ctx, types.EventsOptions{}) - - for { - select { - case err := <-errChan: - assert.Error(t, err) - return - case event := <-eventChan: - testlogF("ID: %s, Action: %s, Status: %s", event.ID, event.Action, event.Status) - } - } -} diff --git a/engine/health_check.go b/engine/health_check.go deleted file mode 100644 index 04a1239..0000000 --- a/engine/health_check.go +++ /dev/null @@ -1,191 +0,0 @@ -package engine - -import ( - "context" - "fmt" - "net" - "net/http" - "time" - - "github.com/projecteru2/agent/types" - "github.com/projecteru2/agent/utils" - coreutils "github.com/projecteru2/core/utils" - - log "github.com/sirupsen/logrus" -) - -func (e *Engine) healthCheck(ctx context.Context) { - tick := time.NewTicker(time.Duration(e.config.HealthCheck.Interval) * time.Second) - defer tick.Stop() - - for { - select { - case <-tick.C: - go e.checkAllContainers(ctx) - case <-ctx.Done(): - return - } - } -} - -// 检查全部 label 为ERU=1的容器 -// 这里需要 list all,原因是 monitor 检测到 die 的时候已经标记为 false 了 -// 但是这时候 health check 刚返回 true 回来并写入 core -// 为了保证最终数据一致性这里也要检测 -func (e *Engine) checkAllContainers(ctx context.Context) { - log.Debug("[checkAllContainers] health check begin") - containers, err := e.listContainers(true, nil) - if err != nil { - log.Errorf("[checkAllContainers] Error when list all containers with label \"ERU=1\": %v", err) - return - } - - for _, c := range containers { - // 我只想 fuck docker - // ContainerList 返回 enginetypes.Container - // ContainerInspect 返回 enginetypes.ContainerJSON - // 是不是有毛病啊, 不能返回一样的数据结构么我真是日了狗了... 艹他妹妹... - container, err := e.detectContainer(ctx, c.ID) - if err != nil { - log.Errorf("[checkAllContainers] detect container failed %v", err) - continue - } - - go e.checkOneContainer(container) - } -} - -// 检查一个容器 -func (e *Engine) checkOneContainer(container *types.Container) { - free, acquired := e.cas.Acquire(container.ID) - if !acquired { - return - } - defer free() - - // 理论上这里都是 running 的容器,因为 listContainers 标记为 all=false 了 - // 并且都有 healthcheck 标记 - // 检查现在是不是还健康 - // for safe - container.Healthy = container.Running - if container.HealthCheck != nil { - timeout := time.Duration(e.config.HealthCheck.Timeout) * time.Second - container.Healthy = checkSingleContainerHealthy(container, timeout) - log.Debugf("[checkSingleContainerHealthy] check container %s health status: %v", container.ID, container.Healthy) - } - - if err := e.setContainerStatus(container); err != nil { - log.Errorf("[checkOneContainer] update deploy status failed %v", err) - } -} - -// 设置容器状态,允许重试,带timeout控制 -func (e *Engine) setContainerStatus(container *types.Container) error { - return utils.BackoffRetry(context.Background(), 3, func() error { - ctx, cancel := context.WithTimeout(context.Background(), e.config.GlobalConnectionTimeout) - defer cancel() - return e.store.SetContainerStatus(ctx, container, e.node, e.config.GetHealthCheckStatusTTL()) - }) -} - -// 检查一个容器,允许重试 -func (e *Engine) checkOneContainerWithBackoffRetry(container *types.Container) { - log.Debugf("[checkOneContainerWithBackoffRetry] check container %s", container.ID) - err := utils.BackoffRetry(context.Background(), getMaxAttemptsByTTL(e.config.GetHealthCheckStatusTTL()), func() error { - e.checkOneContainer(container) - if !container.Healthy { - // 这个err就是用来判断要不要继续的,不用打在日志里 - return fmt.Errorf("not healthy") - } - return nil - }) - if err != nil { - log.Debugf("[checkOneContainerWithBackoffRetry] %s still not healthy", container.ID) - } -} - -func checkSingleContainerHealthy(container *types.Container, timeout time.Duration) bool { - tcpChecker := []string{} - httpChecker := []string{} - - for _, port := range container.HealthCheck.TCPPorts { - tcpChecker = append(tcpChecker, fmt.Sprintf("%s:%s", container.LocalIP, port)) - } - if container.HealthCheck.HTTPPort != "" { - httpChecker = append(httpChecker, fmt.Sprintf("http://%s:%s%s", container.LocalIP, container.HealthCheck.HTTPPort, container.HealthCheck.HTTPURL)) - } - - ID := coreutils.ShortID(container.ID) - f1 := checkHTTP(ID, httpChecker, container.HealthCheck.HTTPCode, timeout) - f2 := checkTCP(ID, tcpChecker, timeout) - return f1 && f2 -} - -// 检查一个容器的所有URL -// 事实上一般也就一个 -func checkHTTP(ID string, backends []string, code int, timeout time.Duration) bool { - for _, backend := range backends { - log.Debugf("[checkHTTP] Check health via http: container %s, url %s, expect code %d", ID, backend, code) - if !checkOneURL(backend, code, timeout) { - log.Infof("[checkHTTP] Check health failed via http: container %s, url %s, expect code %d", ID, backend, code) - return false - } - } - return true -} - -// 检查一个TCP -func checkTCP(ID string, backends []string, timeout time.Duration) bool { - for _, backend := range backends { - log.Debugf("[checkTCP] Check health via tcp: container %s, backend %s", ID, backend) - conn, err := net.DialTimeout("tcp", backend, timeout) - if err != nil { - return false - } - conn.Close() - } - return true -} - -// 偷来的函数 -// 谁要官方的context没有收录他 ¬ ¬ -func get(ctx context.Context, client *http.Client, url string) (*http.Response, error) { - if client == nil { - client = http.DefaultClient - } - - req, err := http.NewRequest("GET", url, nil) - if err != nil { - return nil, err - } - - resp, err := client.Do(req.WithContext(ctx)) - if err != nil { - select { - case <-ctx.Done(): - err = ctx.Err() - default: - } - } - return resp, err -} - -// 就先定义 [200, 500) 这个区间的 code 都算是成功吧 -func checkOneURL(url string, expectedCode int, timeout time.Duration) bool { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - resp, err := get(ctx, nil, url) - if err != nil { - log.Warnf("[checkOneURL] Error when checking %s, %s", url, err.Error()) - return false - } - defer resp.Body.Close() - if expectedCode == 0 { - return resp.StatusCode < 500 && resp.StatusCode >= 200 - } - if resp.StatusCode != expectedCode { - log.Infof("[checkOneURL] Error when checking %s, expect %d, got %d", url, expectedCode, resp.StatusCode) - } - return resp.StatusCode == expectedCode -} diff --git a/engine/health_check_test.go b/engine/health_check_test.go deleted file mode 100644 index f3b536a..0000000 --- a/engine/health_check_test.go +++ /dev/null @@ -1,73 +0,0 @@ -package engine - -import ( - "context" - "net/http" - "os" - "testing" - "time" - - "github.com/docker/docker/pkg/stringid" - "github.com/projecteru2/agent/store/mocks" - "github.com/projecteru2/agent/types" - coretypes "github.com/projecteru2/core/types" - log "github.com/sirupsen/logrus" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" -) - -func TestCheckSingleContainerHealthy(t *testing.T) { - go http.ListenAndServe(":10236", http.NotFoundHandler()) - time.Sleep(100 * time.Millisecond) - go http.ListenAndServe(":10237", http.NotFoundHandler()) - time.Sleep(100 * time.Millisecond) - container := &types.Container{ - StatusMeta: coretypes.StatusMeta{ - ID: stringid.GenerateRandomID(), - Running: true, - }, - Pid: 12349, - Name: "test", - EntryPoint: "t1", - HealthCheck: &coretypes.HealthCheck{ - TCPPorts: []string{"10236"}, - HTTPPort: "10237", - HTTPURL: "/", - HTTPCode: 404, - }, - } - state := checkSingleContainerHealthy(container, 3*time.Second) - assert.True(t, state) -} - -func TestCheckAllContainers(t *testing.T) { - log.SetOutput(os.Stdout) - log.SetLevel(log.DebugLevel) - - e := mockNewEngine() - mockStore := e.store.(*mocks.Store) - mockStore.On("SetContainerStatus", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - e.checkAllContainers(context.TODO()) - - time.Sleep(1 * time.Second) -} - -func TestCheckMethodTCP(t *testing.T) { - log.SetOutput(os.Stdout) - log.SetLevel(log.DebugLevel) - - assert.False(t, checkTCP(stringid.GenerateRandomID(), []string{"192.168.233.233:10234"}, 2*time.Second)) - go http.ListenAndServe(":10235", http.NotFoundHandler()) - time.Sleep(100 * time.Millisecond) - assert.True(t, checkTCP(stringid.GenerateRandomID(), []string{"127.0.0.1:10235"}, 2*time.Second)) -} - -func TestCheckMethodHTTP(t *testing.T) { - log.SetOutput(os.Stdout) - log.SetLevel(log.DebugLevel) - - // server - go http.ListenAndServe(":10234", http.NotFoundHandler()) - time.Sleep(100 * time.Millisecond) - assert.True(t, checkHTTP(stringid.GenerateRandomID(), []string{"http://127.0.0.1:10234/"}, 404, 5*time.Second)) -} diff --git a/engine/helper.go b/engine/helper.go deleted file mode 100644 index b2c0407..0000000 --- a/engine/helper.go +++ /dev/null @@ -1,166 +0,0 @@ -package engine - -import ( - "context" - "fmt" - "math" - "os" - "strings" - "unicode" - "unicode/utf8" - - "github.com/projecteru2/agent/common" - "github.com/projecteru2/agent/engine/status" - "github.com/projecteru2/agent/types" - "github.com/projecteru2/core/cluster" - coreutils "github.com/projecteru2/core/utils" - - enginetypes "github.com/docker/docker/api/types" - enginecontainer "github.com/docker/docker/api/types/container" - enginefilters "github.com/docker/docker/api/types/filters" -) - -func useLabelAsFilter() bool { - return os.Getenv("ERU_AGENT_EXPERIMENTAL_FILTER") == "label" -} - -func (e *Engine) getFilter(extend map[string]string) enginefilters.Args { - f := enginefilters.NewArgs() - f.Add("label", fmt.Sprintf("%s=1", cluster.ERUMark)) - - if e.config.CheckOnlyMine && useLabelAsFilter() { - f.Add("label", fmt.Sprintf("eru.nodename=%s", e.config.HostName)) - if e.coreIdentifier != "" { - f.Add("label", fmt.Sprintf("eru.coreid=%s", e.coreIdentifier)) - } - } - - for k, v := range extend { - f.Add(k, v) - } - return f -} - -func (e *Engine) listContainers(all bool, extend map[string]string) ([]enginetypes.Container, error) { - f := e.getFilter(extend) - opts := enginetypes.ContainerListOptions{Filters: f, All: all} - - ctx, cancel := context.WithTimeout(context.Background(), e.config.GlobalConnectionTimeout) - defer cancel() - return e.docker.ContainerList(ctx, opts) -} - -func (e *Engine) activated(f bool) error { - e.node.Available = f - return e.store.UpdateNode(e.node) -} - -// check if ERU_NODE_NAME env in container is the hostname of this agent -// TODO should be removed in the future, should always use label to filter -func checkHostname(env []string, hostname string) bool { - for _, e := range env { - ps := strings.SplitN(e, "=", 2) - if len(ps) != 2 { - continue - } - if ps[0] == "ERU_NODE_NAME" && ps[1] == hostname { - return true - } - } - return false -} - -func (e *Engine) detectContainer(ctx context.Context, ID string) (*types.Container, error) { - // 标准化为 inspect 的数据 - ctx, cancel := context.WithTimeout(ctx, e.config.GlobalConnectionTimeout) - defer cancel() - c, err := e.docker.ContainerInspect(ctx, ID) - if err != nil { - return nil, err - } - label := c.Config.Labels - - if _, ok := label[cluster.ERUMark]; !ok { - return nil, fmt.Errorf("not a eru container %s", coreutils.ShortID(ID)) - } - - // TODO should be removed in the future - if e.config.CheckOnlyMine && !useLabelAsFilter() && !checkHostname(c.Config.Env, e.config.HostName) { - return nil, fmt.Errorf("should ignore this container") - } - - // 生成基准 meta - meta := coreutils.DecodeMetaInLabel(context.TODO(), label) - - // 是否符合 eru pattern,如果一个容器又有 ERUMark 又是三段式的 name,那它就是个 ERU 容器 - container, err := status.GenerateContainerMeta(c, meta, label) - if err != nil { - return container, err - } - // 计算容器用了多少 CPU - container = status.CalcuateCPUNum(container, c, e.cpuCore) - if container.Memory == 0 || container.Memory == math.MaxInt64 { - container.Memory = e.memory - } - // 活着才有发布必要 - if c.NetworkSettings != nil && container.Running { - networks := map[string]string{} - for name, endpoint := range c.NetworkSettings.Networks { - networkmode := enginecontainer.NetworkMode(name) - if networkmode.IsHost() { - container.LocalIP = common.LocalIP - networks[name] = e.nodeIP - } else { - container.LocalIP = endpoint.IPAddress - networks[name] = endpoint.IPAddress - } - break - } - container.Networks = networks - } - - return container, nil -} - -func getMaxAttemptsByTTL(ttl int64) int { - if ttl <= 1 { - return 1 - } - maxAttempts := int(math.Floor(math.Log2((float64(ttl) - 1) / 2))) - if maxAttempts < 1 { - maxAttempts = 1 - } - return maxAttempts -} - -// replaceNonUtf8 replaces non-utf8 characters in \x format. -func replaceNonUtf8(str string) string { - if str == "" { - return str - } - - // deal with "legal" error rune in utf8 - if strings.ContainsRune(str, utf8.RuneError) { - str = strings.ReplaceAll(str, string(utf8.RuneError), "\\xff\\xfd") - } - - if utf8.ValidString(str) { - return str - } - - v := make([]rune, 0, len(str)) - for i, r := range str { - switch { - case r == utf8.RuneError: - _, size := utf8.DecodeRuneInString(str[i:]) - if size > 0 { - v = append(v, []rune(fmt.Sprintf("\\x%02x", str[i:i+size]))...) - } - case unicode.IsControl(r) && r != '\r' && r != '\n': - v = append(v, []rune(fmt.Sprintf("\\x%02x", r))...) - default: - v = append(v, r) - } - } - return string(v) -} diff --git a/engine/helper_test.go b/engine/helper_test.go deleted file mode 100644 index 6f2b78d..0000000 --- a/engine/helper_test.go +++ /dev/null @@ -1,41 +0,0 @@ -package engine - -import ( - "fmt" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestReplaceNonUtf8(t *testing.T) { - str := "test, 1\x00\xff\x01\xbb\xfd\xff\xfd\n" - assert.Equal(t, "test, 1\\x00\\xff\\x01\\xbb\\xfd\\xff\\xfd\n", replaceNonUtf8(str)) - - data := []byte{ - 0x7b, 0x0a, 0x20, 0x20, 0x22, 0x41, 0x44, 0x44, 0x52, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x30, 0x2e, - 0x31, 0x36, 0x38, 0x2e, 0x33, 0x39, 0x2e, 0x31, 0x39, 0x3a, 0x31, 0x30, 0x30, 0x30, 0x31, 0x22, - 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x43, 0x4d, 0x44, 0x22, 0x3a, 0x20, 0x22, 0x53, 0x45, 0x54, 0x20, - 0x74, 0x63, 0x69, 0x72, 0x5f, 0x53, 0x47, 0x5f, 0x5f, 0x34, 0x37, 0x31, 0x35, 0x5f, 0x70, 0x69, - 0x64, 0x20, 0x5c, 0x74, 0x2c, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x4e, 0xef, - 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x3f, 0x5c, 0x75, 0x30, 0x30, 0x31, 0x31, 0x36, 0x5c, 0x75, 0x30, - 0x30, 0x30, 0x37, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x5c, 0x75, 0x30, 0x30, - 0x31, 0x32, 0xef, 0xbf, 0xbd, 0x3f, 0x5c, 0x75, 0x30, 0x30, 0x31, 0x39, 0x41, 0x5c, 0x75, 0x30, - 0x30, 0x31, 0x37, 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x55, 0x5b, 0xef, 0xbf, 0xbd, 0x3f, 0x21, - 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd, 0x48, 0xef, 0xbf, 0xbd, 0x27, 0x24, 0xef, 0xbf, 0xbd, 0x3f, - 0x28, 0xef, 0xbf, 0xbd, 0x3f, 0x7e, 0x48, 0xef, 0xbf, 0xbd, 0x5c, 0x75, 0x30, 0x30, 0x30, 0x36, - 0x30, 0xef, 0xbf, 0xbd, 0x29, 0x50, 0x3f, 0x7e, 0x40, 0x5c, 0x75, 0x30, 0x30, 0x30, 0x33, 0x20, - 0x45, 0x58, 0x20, 0x36, 0x30, 0x34, 0x38, 0x30, 0x30, 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x44, - 0x55, 0x52, 0x41, 0x54, 0x49, 0x4f, 0x4e, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x32, 0x31, 0x39, 0x30, - 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x49, 0x44, 0x22, 0x3a, 0x20, 0x22, 0x63, 0x6c, 0x75, 0x73, - 0x74, 0x65, 0x72, 0x3a, 0x68, 0x61, 0x6e, 0x71, 0x69, 0x61, 0x6e, 0x67, 0x2e, 0x74, 0x65, 0x73, - 0x74, 0x2e, 0x73, 0x67, 0x37, 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x4a, 0x52, 0x5f, 0x54, 0x49, - 0x4d, 0x45, 0x53, 0x54, 0x41, 0x4d, 0x50, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x36, 0x32, 0x37, 0x35, - 0x33, 0x30, 0x33, 0x36, 0x37, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x22, 0x2c, 0x0a, 0x20, 0x20, - 0x22, 0x50, 0x41, 0x43, 0x4b, 0x41, 0x47, 0x45, 0x22, 0x3a, 0x20, 0x22, 0x73, 0x6c, 0x6f, 0x77, - 0x6c, 0x6f, 0x67, 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x54, 0x49, 0x4d, 0x45, 0x53, 0x54, 0x41, - 0x4d, 0x50, 0x22, 0x3a, 0x20, 0x22, 0x31, 0x36, 0x32, 0x37, 0x35, 0x33, 0x30, 0x33, 0x36, 0x37, - 0x22, 0x2c, 0x0a, 0x20, 0x20, 0x22, 0x54, 0x59, 0x50, 0x45, 0x22, 0x3a, 0x20, 0x22, 0x70, 0x72, - 0x6f, 0x78, 0x79, 0x22, 0x0a, 0x7d, 0x0a, - } - fmt.Println(replaceNonUtf8(string(data))) -} diff --git a/engine/load.go b/engine/load.go deleted file mode 100644 index 9cdd94b..0000000 --- a/engine/load.go +++ /dev/null @@ -1,34 +0,0 @@ -package engine - -import ( - "context" - - coreutils "github.com/projecteru2/core/utils" - log "github.com/sirupsen/logrus" -) - -func (e *Engine) load(ctx context.Context) error { - log.Info("[load] Load containers") - containers, err := e.listContainers(true, nil) - if err != nil { - return err - } - - for _, container := range containers { - log.Debugf("[load] detect container %s", coreutils.ShortID(container.ID)) - c, err := e.detectContainer(ctx, container.ID) - if err != nil { - log.Errorf("[load] detect container failed %v", err) - continue - } - - if c.Running { - e.attach(c) - } - - if err := e.setContainerStatus(c); err != nil { - log.Errorf("[load] update deploy status failed %v", err) - } - } - return nil -} diff --git a/engine/load_test.go b/engine/load_test.go deleted file mode 100644 index 0c7d7e6..0000000 --- a/engine/load_test.go +++ /dev/null @@ -1,29 +0,0 @@ -package engine - -import ( - "context" - "os" - "testing" - "time" - - coretypes "github.com/projecteru2/core/types" - log "github.com/sirupsen/logrus" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" -) - -func TestLoad(t *testing.T) { - log.SetOutput(os.Stdout) - log.SetLevel(log.DebugLevel) - - e := mockNewEngine() - - n := new(coretypes.Node) - mockStore.On("GetNode", mock.AnythingOfType("string")).Return(n, nil) - mockStore.On("UpdateNode", mock.Anything).Return(nil) - mockStore.On("SetContainerStatus", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - - err := e.load(context.TODO()) - assert.NoError(t, err) - time.Sleep(1 * time.Second) -} diff --git a/engine/metrics.go b/engine/metrics.go deleted file mode 100644 index 0428000..0000000 --- a/engine/metrics.go +++ /dev/null @@ -1,374 +0,0 @@ -package engine - -import ( - "fmt" - "strings" - - statsdlib "github.com/CMGS/statsd" - "github.com/projecteru2/agent/types" - "github.com/projecteru2/core/cluster" - coreutils "github.com/projecteru2/core/utils" - "github.com/prometheus/client_golang/prometheus" - log "github.com/sirupsen/logrus" -) - -// MetricsClient combine statsd and prometheus -type MetricsClient struct { - statsd string - statsdClient *statsdlib.Client - prefix string - data map[string]float64 - - cpuHostUsage prometheus.Gauge - cpuHostSysUsage prometheus.Gauge - cpuHostUserUsage prometheus.Gauge - - cpuContainerUsage prometheus.Gauge - cpuContainerSysUsage prometheus.Gauge - cpuContainerUserUsage prometheus.Gauge - - memUsage prometheus.Gauge - memMaxUsage prometheus.Gauge - memRss prometheus.Gauge - memPercent prometheus.Gauge - memRSSPercent prometheus.Gauge - - bytesSent *prometheus.GaugeVec - bytesRecv *prometheus.GaugeVec - packetsSent *prometheus.GaugeVec - packetsRecv *prometheus.GaugeVec - errIn *prometheus.GaugeVec - errOut *prometheus.GaugeVec - dropIn *prometheus.GaugeVec - dropOut *prometheus.GaugeVec -} - -// NewMetricsClient new a metrics client -func NewMetricsClient(statsd, hostname string, container *types.Container) *MetricsClient { - clables := []string{} - for k, v := range container.Labels { - if strings.HasPrefix(k, cluster.ERUMark) || strings.HasPrefix(k, cluster.LabelMeta) { - continue - } - clables = append(clables, fmt.Sprintf("%s=%s", k, v)) - } - labels := map[string]string{ - "containerID": container.ID, - "hostname": hostname, - "appname": container.Name, - "entrypoint": container.EntryPoint, - "orchestrator": cluster.ERUMark, - "labels": strings.Join(clables, ","), - } - - cpuHostUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cpu_host_usage", - Help: "cpu usage in host view.", - ConstLabels: labels, - }) - cpuHostSysUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cpu_host_sys_usage", - Help: "cpu sys usage in host view.", - ConstLabels: labels, - }) - cpuHostUserUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cpu_host_user_usage", - Help: "cpu user usage in host view.", - ConstLabels: labels, - }) - cpuContainerUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cpu_container_usage", - Help: "cpu usage in container view.", - ConstLabels: labels, - }) - cpuContainerSysUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cpu_container_sys_usage", - Help: "cpu sys usage in container view.", - ConstLabels: labels, - }) - cpuContainerUserUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "cpu_container_user_usage", - Help: "cpu user usage in container view.", - ConstLabels: labels, - }) - memUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "mem_usage", - Help: "memory usage.", - ConstLabels: labels, - }) - memMaxUsage := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "mem_max_usage", - Help: "memory max usage.", - ConstLabels: labels, - }) - memRss := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "mem_rss", - Help: "memory rss.", - ConstLabels: labels, - }) - memPercent := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "mem_percent", - Help: "memory percent.", - ConstLabels: labels, - }) - memRSSPercent := prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "mem_rss_percent", - Help: "memory rss percent.", - ConstLabels: labels, - }) - bytesSent := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "bytes_send", - Help: "bytes send.", - ConstLabels: labels, - }, []string{"nic"}) - bytesRecv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "bytes_recv", - Help: "bytes recv.", - ConstLabels: labels, - }, []string{"nic"}) - packetsSent := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "packets_send", - Help: "packets send.", - ConstLabels: labels, - }, []string{"nic"}) - packetsRecv := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "packets_recv", - Help: "packets recv.", - ConstLabels: labels, - }, []string{"nic"}) - errIn := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "err_in", - Help: "err in.", - ConstLabels: labels, - }, []string{"nic"}) - errOut := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "err_out", - Help: "err out.", - ConstLabels: labels, - }, []string{"nic"}) - dropIn := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "drop_in", - Help: "drop in.", - ConstLabels: labels, - }, []string{"nic"}) - dropOut := prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "drop_out", - Help: "drop out.", - ConstLabels: labels, - }, []string{"nic"}) - - // TODO 这里已经没有了版本了 - tag := fmt.Sprintf("%s.%s", hostname, coreutils.ShortID(container.ID)) - endpoint := fmt.Sprintf("%s.%s", container.Name, container.EntryPoint) - prefix := fmt.Sprintf("%s.%s.%s", cluster.ERUMark, endpoint, tag) - - prometheus.MustRegister( - cpuHostSysUsage, cpuHostUsage, cpuHostUserUsage, - cpuContainerSysUsage, cpuContainerUsage, cpuContainerUserUsage, - memMaxUsage, memRss, memUsage, memPercent, memRSSPercent, - bytesRecv, bytesSent, packetsRecv, packetsSent, - errIn, errOut, dropIn, dropOut, - ) - - return &MetricsClient{ - statsd: statsd, - prefix: prefix, - data: map[string]float64{}, - - cpuHostUsage: cpuHostUsage, - cpuHostSysUsage: cpuHostSysUsage, - cpuHostUserUsage: cpuHostUserUsage, - - cpuContainerUsage: cpuContainerUsage, - cpuContainerSysUsage: cpuContainerSysUsage, - cpuContainerUserUsage: cpuContainerUserUsage, - - memUsage: memUsage, - memMaxUsage: memMaxUsage, - memRss: memRss, - memPercent: memPercent, - memRSSPercent: memRSSPercent, - - bytesSent: bytesSent, - bytesRecv: bytesRecv, - packetsSent: packetsSent, - packetsRecv: packetsRecv, - errIn: errIn, - errOut: errOut, - dropIn: dropIn, - dropOut: dropOut, - } -} - -// Unregister unlink all prometheus things -func (m *MetricsClient) Unregister() { - prometheus.Unregister(m.cpuHostSysUsage) - prometheus.Unregister(m.cpuHostUsage) - prometheus.Unregister(m.cpuHostUserUsage) - - prometheus.Unregister(m.cpuContainerUsage) - prometheus.Unregister(m.cpuContainerSysUsage) - prometheus.Unregister(m.cpuContainerUserUsage) - - prometheus.Unregister(m.memUsage) - prometheus.Unregister(m.memMaxUsage) - prometheus.Unregister(m.memRss) - prometheus.Unregister(m.memPercent) - prometheus.Unregister(m.memRSSPercent) - - prometheus.Unregister(m.bytesRecv) - prometheus.Unregister(m.bytesSent) - prometheus.Unregister(m.packetsRecv) - prometheus.Unregister(m.packetsSent) - prometheus.Unregister(m.errIn) - prometheus.Unregister(m.errOut) - prometheus.Unregister(m.dropIn) - prometheus.Unregister(m.dropOut) -} - -// CPUHostUsage set cpu usage in host view -func (m *MetricsClient) CPUHostUsage(i float64) { - m.data["cpu_host_usage"] = i - m.cpuHostUsage.Set(i) -} - -// CPUHostSysUsage set cpu sys usage in host view -func (m *MetricsClient) CPUHostSysUsage(i float64) { - m.data["cpu_host_sys_usage"] = i - m.cpuHostSysUsage.Set(i) -} - -// CPUHostUserUsage set cpu user usage in host view -func (m *MetricsClient) CPUHostUserUsage(i float64) { - m.data["cpu_host_user_usage"] = i - m.cpuHostUserUsage.Set(i) -} - -// CPUContainerUsage set cpu usage in container view -func (m *MetricsClient) CPUContainerUsage(i float64) { - m.data["cpu_container_usage"] = i - m.cpuContainerUsage.Set(i) -} - -// CPUContainerSysUsage set cpu sys usage in container view -func (m *MetricsClient) CPUContainerSysUsage(i float64) { - m.data["cpu_container_sys_usage"] = i - m.cpuContainerSysUsage.Set(i) -} - -// CPUContainerUserUsage set cpu user usage in container view -func (m *MetricsClient) CPUContainerUserUsage(i float64) { - m.data["cpu_container_user_usage"] = i - m.cpuContainerUserUsage.Set(i) -} - -// MemUsage set memory usage -func (m *MetricsClient) MemUsage(i float64) { - m.data["mem_usage"] = i - m.memUsage.Set(i) -} - -// MemMaxUsage set memory max usage -func (m *MetricsClient) MemMaxUsage(i float64) { - m.data["mem_max_usage"] = i - m.memMaxUsage.Set(i) -} - -// MemRss set memory rss -func (m *MetricsClient) MemRss(i float64) { - m.data["mem_rss"] = i - m.memRss.Set(i) -} - -// MemPercent set memory percent -func (m *MetricsClient) MemPercent(i float64) { - m.data["mem_percent"] = i - m.memPercent.Set(i) -} - -// MemRSSPercent set memory percent -func (m *MetricsClient) MemRSSPercent(i float64) { - m.data["mem_rss_percent"] = i - m.memRSSPercent.Set(i) -} - -// BytesSent set bytes send -func (m *MetricsClient) BytesSent(nic string, i float64) { - m.data[nic+".bytes.sent"] = i - m.bytesSent.WithLabelValues(nic).Set(i) -} - -// BytesRecv set bytes recv -func (m *MetricsClient) BytesRecv(nic string, i float64) { - m.data[nic+".bytes.recv"] = i - m.bytesRecv.WithLabelValues(nic).Set(i) -} - -// PacketsSent set packets send -func (m *MetricsClient) PacketsSent(nic string, i float64) { - m.data[nic+".packets.sent"] = i - m.packetsSent.WithLabelValues(nic).Set(i) -} - -// PacketsRecv set packets recv -func (m *MetricsClient) PacketsRecv(nic string, i float64) { - m.data[nic+".packets.recv"] = i - m.packetsRecv.WithLabelValues(nic).Set(i) -} - -// ErrIn set inbound err count -func (m *MetricsClient) ErrIn(nic string, i float64) { - m.data[nic+".err.in"] = i - m.errIn.WithLabelValues(nic).Set(i) -} - -// ErrOut set outbound err count -func (m *MetricsClient) ErrOut(nic string, i float64) { - m.data[nic+".err.out"] = i - m.errOut.WithLabelValues(nic).Set(i) -} - -// DropIn set inbound drop count -func (m *MetricsClient) DropIn(nic string, i float64) { - m.data[nic+".drop.in"] = i - m.dropIn.WithLabelValues(nic).Set(i) -} - -// DropOut set outbound drop count -func (m *MetricsClient) DropOut(nic string, i float64) { - m.data[nic+".drop.out"] = i - m.dropOut.WithLabelValues(nic).Set(i) -} - -// Lazy connecting -func (m *MetricsClient) checkConn() error { - if m.statsdClient != nil { - return nil - } - // We needn't try to renew/reconnect because of only supporting UDP protocol now - // We should add an `errorCount` to reconnect when implementing TCP protocol - var err error - if m.statsdClient, err = statsdlib.New(m.statsd, statsdlib.WithErrorHandler(func(err error) { - log.Errorf("[statsd] Sending statsd failed: %v", err) - })); err != nil { - log.Errorf("[statsd] Connect statsd failed: %v", err) - return err - } - return nil -} - -// Send to statsd -func (m *MetricsClient) Send() error { - if m.statsd == "" { - return nil - } - if err := m.checkConn(); err != nil { - return err - } - for k, v := range m.data { - key := fmt.Sprintf("%s.%s", m.prefix, k) - m.statsdClient.Gauge(key, v) - delete(m.data, k) - } - return nil -} diff --git a/engine/monitor.go b/engine/monitor.go deleted file mode 100644 index 380554f..0000000 --- a/engine/monitor.go +++ /dev/null @@ -1,64 +0,0 @@ -package engine - -import ( - "context" - - types "github.com/docker/docker/api/types" - eventtypes "github.com/docker/docker/api/types/events" - log "github.com/sirupsen/logrus" - - "github.com/projecteru2/agent/common" - "github.com/projecteru2/agent/engine/status" - coreutils "github.com/projecteru2/core/utils" -) - -var eventHandler = status.NewEventHandler() - -func (e *Engine) initMonitor() (<-chan eventtypes.Message, <-chan error) { - eventHandler.Handle(common.StatusStart, e.handleContainerStart) - eventHandler.Handle(common.StatusDie, e.handleContainerDie) - - ctx := context.Background() - f := e.getFilter(map[string]string{"type": eventtypes.ContainerEventType}) - options := types.EventsOptions{Filters: f} - eventChan, errChan := e.docker.Events(ctx, options) - return eventChan, errChan -} - -func (e *Engine) monitor(eventChan <-chan eventtypes.Message) { - log.Info("[monitor] Status watch start") - eventHandler.Watch(eventChan) -} - -func (e *Engine) handleContainerStart(ctx context.Context, event eventtypes.Message) { - log.Debugf("[handleContainerStart] container %s start", coreutils.ShortID(event.ID)) - container, err := e.detectContainer(ctx, event.ID) - if err != nil { - log.Errorf("[handleContainerStart] detect container failed %v", err) - return - } - - if container.Running { - // 这货会自动退出 - e.attach(container) - } - - // 发现需要 health check 立刻执行 - if container.Healthy { - if err := e.setContainerStatus(container); err != nil { - log.Errorf("[handleContainerStart] update deploy status failed %v", err) - } - } else { - go e.checkOneContainerWithBackoffRetry(container) - } -} - -func (e *Engine) handleContainerDie(ctx context.Context, event eventtypes.Message) { - log.Debugf("[handleContainerDie] container %s die", coreutils.ShortID(event.ID)) - container, err := e.detectContainer(ctx, event.ID) - if err != nil { - log.Errorf("[handleContainerDie] detect container failed %v", err) - } else if err := e.setContainerStatus(container); err != nil { - log.Errorf("[handleContainerDie] update deploy status failed %v", err) - } -} diff --git a/engine/monitor_test.go b/engine/monitor_test.go deleted file mode 100644 index c7ca988..0000000 --- a/engine/monitor_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package engine - -import ( - "io" - "os" - "testing" - "time" - - log "github.com/sirupsen/logrus" - // "github.com/docker/docker/api/types/network" - - coretypes "github.com/projecteru2/core/types" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" -) - -func TestInitMonitor(t *testing.T) { - log.SetOutput(os.Stdout) - log.SetLevel(log.DebugLevel) - - e := mockNewEngine() - eventChan, errChan := e.initMonitor() - - go func() { - for { - select { - case err := <-errChan: - assert.Equal(t, err, io.ErrClosedPipe) - return - case event := <-eventChan: - testlogF("ID: %s, Action: %s, Status: %s", event.ID, event.Action, event.Status) - } - } - }() - - time.Sleep(3 * time.Second) -} - -func TestMonitor(t *testing.T) { - log.SetOutput(os.Stdout) - log.SetLevel(log.DebugLevel) - - e := mockNewEngine() - eventChan, _ := e.initMonitor() - - n := new(coretypes.Node) - mockStore.On("GetNode", mock.AnythingOfType("string")).Return(n, nil) - mockStore.On("UpdateNode", mock.Anything).Return(nil) - mockStore.On("SetContainerStatus", mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(nil) - - go e.monitor(eventChan) - time.Sleep(3 * time.Second) -} diff --git a/engine/stat.go b/engine/stat.go deleted file mode 100644 index 5695e54..0000000 --- a/engine/stat.go +++ /dev/null @@ -1,138 +0,0 @@ -package engine - -import ( - "context" - "strings" - "time" - - "github.com/projecteru2/agent/types" - coreutils "github.com/projecteru2/core/utils" - "github.com/shirou/gopsutil/net" - log "github.com/sirupsen/logrus" -) - -func (e *Engine) stat(ctx context.Context, container *types.Container) { - // TODO - // FIXME fuck internal pkg - proc := "/proc" - if e.dockerized { - proc = "/hostProc" - } - // init stats - containerCPUStats, systemCPUStats, containerNetStats, err := getStats(ctx, container, proc) - if err != nil { - log.Errorf("[stat] get %s stats failed %v", coreutils.ShortID(container.ID), err) - return - } - - delta := float64(e.config.Metrics.Step) - timeout := time.Duration(e.config.Metrics.Step) * time.Second - tick := time.NewTicker(timeout) - defer tick.Stop() - hostname := strings.ReplaceAll(e.config.HostName, ".", "-") - addr := "" - if e.transfers.Len() > 0 { - addr = e.transfers.Get(container.ID, 0) - } - - period := float64(e.config.Metrics.Step) - hostCPUCount := e.cpuCore * period - - mClient := NewMetricsClient(addr, hostname, container) - defer log.Infof("[stat] container %s %s metric report stop", container.Name, coreutils.ShortID(container.ID)) - log.Infof("[stat] container %s %s metric report start", container.Name, coreutils.ShortID(container.ID)) - - updateMetrics := func() { - container, err = e.detectContainer(ctx, container.ID) - if err != nil { - log.Errorf("[stat] can not refresh container meta %s", container.ID) - return - } - containerCPUCount := container.CPUNum * period - timeoutCtx, cancel := context.WithTimeout(ctx, timeout) - defer cancel() - newContainrCPUStats, newSystemCPUStats, newContainerNetStats, err := getStats(timeoutCtx, container, proc) - if err != nil { - log.Errorf("[stat] get %s stats failed %v", coreutils.ShortID(container.ID), err) - return - } - containerMemStats, err := getMemStats(timeoutCtx, container) - if err != nil { - log.Errorf("[stat] get %s mem stats failed %v", coreutils.ShortID(container.ID), err) - return - } - - deltaContainerCPUUsage := newContainrCPUStats.Usage - containerCPUStats.Usage // CPU Usage in seconds - deltaContainerCPUSysUsage := newContainrCPUStats.System - containerCPUStats.System // Sys Usage in jiffies / tick - deltaContainerCPUUserUsage := newContainrCPUStats.User - containerCPUStats.User // User Usage in jiffies / tick - - deltaSystemCPUSysUsage := newSystemCPUStats.System - systemCPUStats.System - deltaSystemCPUUserUsage := newSystemCPUStats.User - systemCPUStats.User - - cpuHostUsage := deltaContainerCPUUsage / hostCPUCount - cpuHostSysUsage := 0.0 - if deltaSystemCPUSysUsage > 0 { - cpuHostSysUsage = deltaContainerCPUSysUsage / deltaSystemCPUSysUsage - } - cpuHostUserUsage := 0.0 - if deltaSystemCPUUserUsage > 0 { - cpuHostUserUsage = deltaContainerCPUUserUsage / deltaSystemCPUUserUsage - - } - mClient.CPUHostUsage(cpuHostUsage) - mClient.CPUHostSysUsage(cpuHostSysUsage) - mClient.CPUHostUserUsage(cpuHostUserUsage) - - cpuContainerUsage := deltaContainerCPUUsage / containerCPUCount // 实际消耗的 CPU 秒 / 允许消耗的 CPU 秒 - cpuContainerSysUsage := 0.0 - if deltaContainerCPUUsage > 0 { - cpuContainerSysUsage = deltaContainerCPUSysUsage / deltaContainerCPUUsage - } - cpuContainerUserUsage := 0.0 - if deltaContainerCPUUsage > 0 { - cpuContainerUserUsage = deltaContainerCPUUserUsage / deltaContainerCPUUsage - } - mClient.CPUContainerUsage(cpuContainerUsage) - mClient.CPUContainerSysUsage(cpuContainerSysUsage) - mClient.CPUContainerUserUsage(cpuContainerUserUsage) - - mClient.MemUsage(float64(containerMemStats.MemUsageInBytes)) - mClient.MemMaxUsage(float64(containerMemStats.MemMaxUsageInBytes)) - mClient.MemRss(float64(containerMemStats.RSS)) - if container.Memory > 0 { - mClient.MemPercent(float64(containerMemStats.MemUsageInBytes) / float64(container.Memory)) - mClient.MemRSSPercent(float64(containerMemStats.RSS) / float64(container.Memory)) - } - nics := map[string]net.IOCountersStat{} - for _, nic := range containerNetStats { - nics[nic.Name] = nic - } - for _, nic := range newContainerNetStats { - if _, ok := nics[nic.Name]; !ok { - continue - } - oldNICStats := nics[nic.Name] - mClient.BytesSent(nic.Name, float64(nic.BytesSent-oldNICStats.BytesSent)/delta) - mClient.BytesRecv(nic.Name, float64(nic.BytesRecv-oldNICStats.BytesRecv)/delta) - mClient.PacketsSent(nic.Name, float64(nic.PacketsSent-oldNICStats.PacketsSent)/delta) - mClient.PacketsRecv(nic.Name, float64(nic.PacketsRecv-oldNICStats.PacketsRecv)/delta) - mClient.ErrIn(nic.Name, float64(nic.Errin-oldNICStats.Errin)/delta) - mClient.ErrOut(nic.Name, float64(nic.Errout-oldNICStats.Errout)/delta) - mClient.DropIn(nic.Name, float64(nic.Dropin-oldNICStats.Dropin)/delta) - mClient.DropOut(nic.Name, float64(nic.Dropout-oldNICStats.Dropout)/delta) - } - containerCPUStats, systemCPUStats, containerNetStats = newContainrCPUStats, newSystemCPUStats, newContainerNetStats - if err := mClient.Send(); err != nil { - log.Errorf("[stat] Send metrics failed %v", err) - } - } - for { - select { - case <-tick.C: - updateMetrics() - case <-ctx.Done(): - mClient.Unregister() - return - } - } -} diff --git a/engine/stat_linux.go b/engine/stat_linux.go deleted file mode 100644 index 75a3fa8..0000000 --- a/engine/stat_linux.go +++ /dev/null @@ -1,48 +0,0 @@ -//go:build linux -// +build linux - -package engine - -import ( - "context" - "fmt" - - "github.com/projecteru2/agent/types" - "github.com/shirou/gopsutil/cpu" - "github.com/shirou/gopsutil/docker" - "github.com/shirou/gopsutil/net" -) - -func getStats(ctx context.Context, container *types.Container, proc string) (*docker.CgroupCPUStat, cpu.TimesStat, []net.IOCountersStat, error) { - // get container cpu stats - containerCPUStatsWithoutUsage, err := docker.CgroupCPUDockerWithContext(ctx, container.ID) - if err != nil { - return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err - } - containerCPUStatsUsage, err := docker.CgroupCPUDockerUsageWithContext(ctx, container.ID) - if err != nil { - return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err - } - containerCPUStats := &docker.CgroupCPUStat{ - TimesStat: *containerCPUStatsWithoutUsage, - Usage: containerCPUStatsUsage, - } - // get system cpu stats - systemCPUsStats, err := cpu.TimesWithContext(ctx, false) - if err != nil { - return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err - } - // 0 means all cpu - systemCPUStats := systemCPUsStats[0] - // get container nic stats - netFilePath := fmt.Sprintf("%s/%d/net/dev", proc, container.Pid) - containerNetStats, err := net.IOCountersByFileWithContext(ctx, true, netFilePath) - if err != nil { - return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err - } - return containerCPUStats, systemCPUStats, containerNetStats, nil -} - -func getMemStats(ctx context.Context, container *types.Container) (*docker.CgroupMemStat, error) { - return docker.CgroupMemDockerWithContext(ctx, container.ID) -} diff --git a/engine/stat_notlinux.go b/engine/stat_notlinux.go deleted file mode 100644 index 882dad0..0000000 --- a/engine/stat_notlinux.go +++ /dev/null @@ -1,31 +0,0 @@ -// +build !linux - -package engine - -import ( - "context" - - "github.com/projecteru2/agent/types" - "github.com/shirou/gopsutil/cpu" - "github.com/shirou/gopsutil/docker" - "github.com/shirou/gopsutil/net" -) - -func getStats(ctx context.Context, container *types.Container, proc string) (*docker.CgroupCPUStat, cpu.TimesStat, []net.IOCountersStat, error) { - containerCPUStats := &docker.CgroupCPUStat{ - TimesStat: cpu.TimesStat{}, - Usage: 0.0, - } - // get system cpu stats - systemCPUsStats, err := cpu.TimesWithContext(ctx, false) - if err != nil { - return nil, cpu.TimesStat{}, []net.IOCountersStat{}, err - } - // 0 means all cpu - systemCPUStats := systemCPUsStats[0] - return containerCPUStats, systemCPUStats, []net.IOCountersStat{}, nil -} - -func getMemStats(ctx context.Context, container *types.Container) (*docker.CgroupMemStat, error) { - return &docker.CgroupMemStat{}, nil -} diff --git a/engine/status/container.go b/engine/status/container.go deleted file mode 100644 index fbd6c5c..0000000 --- a/engine/status/container.go +++ /dev/null @@ -1,71 +0,0 @@ -package status - -import ( - "strings" - - "github.com/projecteru2/agent/types" - "github.com/projecteru2/agent/utils" - coretypes "github.com/projecteru2/core/types" - - enginetypes "github.com/docker/docker/api/types" - log "github.com/sirupsen/logrus" -) - -// CalcuateCPUNum calculate how many cpu container used -func CalcuateCPUNum(container *types.Container, containerJSON enginetypes.ContainerJSON, hostCPUNum float64) *types.Container { - cpuNum := hostCPUNum - if containerJSON.HostConfig.CPUPeriod != 0 && containerJSON.HostConfig.CPUQuota != 0 { - cpuNum = float64(containerJSON.HostConfig.CPUQuota) / float64(containerJSON.HostConfig.CPUPeriod) - } - container.CPUNum = cpuNum - return container -} - -func normalizeEnv(env []string) map[string]string { - em := make(map[string]string) - for _, e := range env { - ps := strings.SplitN(e, "=", 2) - if len(ps) == 2 { - em[ps[0]] = ps[1] - } else { - em[ps[0]] = "" - } - } - return em -} - -// GenerateContainerMeta make meta obj -func GenerateContainerMeta(c enginetypes.ContainerJSON, meta *coretypes.LabelMeta, labels map[string]string) (*types.Container, error) { - name, entrypoint, ident, err := utils.GetAppInfo(c.Name) - if err != nil { - return nil, err - } - - container := &types.Container{ - StatusMeta: coretypes.StatusMeta{ID: c.ID}, - Name: name, - EntryPoint: entrypoint, - Ident: ident, - Labels: labels, - Env: normalizeEnv(c.Config.Env), - HealthCheck: meta.HealthCheck, - CPUQuota: c.HostConfig.Resources.CPUQuota, - CPUPeriod: c.HostConfig.Resources.CPUPeriod, - Memory: utils.Max(c.HostConfig.Memory, c.HostConfig.MemoryReservation), - } - - if !c.State.Running || c.State.Pid == 0 { - container.Healthy = false - container.Running = false - } else { - // 第一次上的容器可能没有设置health check - // 那么我们认为这个容器一直是健康的, 并且不做检查 - // 需要告诉第一次上的时候这个容器是健康的, 还是不是 - container.Pid = c.State.Pid - container.Running = c.State.Running - container.Healthy = !(meta.HealthCheck != nil) - } - - log.Debugf("[GenerateContainerMeta] Generate container meta %v %v", container.Name, container.EntryPoint) - return container, nil -} diff --git a/engine/status_report.go b/engine/status_report.go deleted file mode 100644 index f2891aa..0000000 --- a/engine/status_report.go +++ /dev/null @@ -1,45 +0,0 @@ -package engine - -import ( - "context" - "time" - - log "github.com/sirupsen/logrus" -) - -// heartbeat creates a new goroutine to report status every HeartbeatInterval seconds -// By default HeartbeatInterval is 0, will not do heartbeat. -func (e *Engine) heartbeat(ctx context.Context) { - if e.config.HeartbeatInterval <= 0 { - return - } - - tick := time.NewTicker(time.Duration(e.config.HeartbeatInterval) * time.Second) - defer tick.Stop() - - for { - select { - case <-tick.C: - go e.nodeStatusReport() - case <-ctx.Done(): - return - } - } -} - -// nodeStatusReport does heartbeat, tells core this node is alive. -// The TTL is set to double of HeartbeatInterval, by default it will be 360s, -// which means if a node is not available, subcriber will notice this after at least 360s. -// HealthCheck.Timeout is used as timeout of requesting core API -func (e *Engine) nodeStatusReport() { - log.Debug("[nodeStatusReport] report begins") - defer log.Debug("[nodeStatusReport] report ends") - - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(e.config.HealthCheck.Timeout)*time.Second) - defer cancel() - - ttl := int64(e.config.HeartbeatInterval * 2) - if err := e.store.SetNodeStatus(ctx, ttl); err != nil { - log.Errorf("[nodeStatusReport] error when set node status: %v", err) - } -} diff --git a/go.mod b/go.mod index 26284be..7b05ef5 100644 --- a/go.mod +++ b/go.mod @@ -20,12 +20,9 @@ require ( github.com/stretchr/objx v0.2.0 // indirect github.com/stretchr/testify v1.7.0 github.com/urfave/cli/v2 v2.3.0 - go.etcd.io/etcd/api/v3 v3.5.0 - go.etcd.io/etcd/client/v3 v3.5.0 - go.opencensus.io v0.22.2 // indirect + go.etcd.io/etcd/api/v3 v3.5.0 // indirect + go.etcd.io/etcd/client/v3 v3.5.0 // indirect go.uber.org/automaxprocs v1.3.0 - google.golang.org/grpc v1.38.0 gopkg.in/yaml.v2 v2.4.0 - gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect gotest.tools/v3 v3.0.3 // indirect ) diff --git a/go.sum b/go.sum index e1848f1..641ad55 100644 --- a/go.sum +++ b/go.sum @@ -5,6 +5,7 @@ cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSR cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3 h1:AVXDdKsrtX33oR9fbCMu/+c1o8Ofjq6Ku/MInaLVg5Y= cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= @@ -25,7 +26,6 @@ github.com/CloudyKit/jet/v3 v3.0.0/go.mod h1:HKQPgSJmdK8hdoAbKUUWajkHyHo4RaU5rMd github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= github.com/Microsoft/go-winio v0.4.16-0.20201130162521-d1ffc52c7331 h1:3YnB7Hpmh1lPecPE8doMOtYCrMdrpedZOvxfuNES/Vk= github.com/Microsoft/go-winio v0.4.16-0.20201130162521-d1ffc52c7331/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0= -github.com/Microsoft/hcsshim v0.8.11 h1:qs8+XI1mFA1H/zhXT9qVG/lcJO18p1yCsICIrCjVXw8= github.com/Microsoft/hcsshim v0.8.11/go.mod h1:NtVKoYxQuTLx6gEq0L96c9Ju4JbRJ4nY2ow3VK6a9Lg= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= @@ -38,6 +38,7 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/alexcesaro/statsd v2.0.0+incompatible h1:HG17k1Qk8V1F4UOoq6tx+IUoAbOcI5PHzzEUGeDD72w= github.com/alexcesaro/statsd v2.0.0+incompatible/go.mod h1:vNepIbQAiyLe1j480173M6NYYaAsGwEcvuDTU3OCUGY= github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc= github.com/alicebob/miniredis/v2 v2.14.3/go.mod h1:gquAfGbzn92jvtrSC69+6zZnwSODVXVpYDRaGhWaL6I= @@ -49,6 +50,7 @@ github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmV github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= +github.com/benbjohnson/clock v1.0.3 h1:vkLuvpK4fmtSCuo60+yC63p7y0BmQ8gm5ZXGuBCJyXg= github.com/benbjohnson/clock v1.0.3/go.mod h1:bGMdMPoPVvcYyt1gHDf4J2KE153Yf9BuiUKYMaxlTDM= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= @@ -62,6 +64,7 @@ github.com/cenkalti/backoff/v4 v4.0.2 h1:JIufpQLbh4DkbQoii76ItQIUFzevQSqOLZca4ea github.com/cenkalti/backoff/v4 v4.0.2/go.mod h1:eEew/i+1Q6OrCDZh3WiXYv3+nJwBASZ8Bog/87DQnVg= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= +github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054 h1:uH66TXeswKn5PW5zdZ39xEwfS9an067BirqA+P4QaLI= github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= @@ -74,18 +77,19 @@ github.com/cilium/ebpf v0.0.0-20200110133405-4032b1d8aae3/go.mod h1:MA5e5Lr8slmE github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5 h1:xD/lrqdvwsc+O2bjSSi3YqY73Ke3LAiSCx49aCesA0E= github.com/cockroachdb/datadriven v0.0.0-20200714090401-bf6692d28da5/go.mod h1:h6jFvWxBdQXxjopDMZyH2UVceIRfR84bdzbkoKrsWNo= +github.com/cockroachdb/errors v1.2.4 h1:Lap807SXTH5tri2TivECb/4abUkMZC9zRoLarvcKDqs= github.com/cockroachdb/errors v1.2.4/go.mod h1:rQD95gz6FARkaKkQXUksEje/d9a6wBJoCr5oaCLELYA= +github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= -github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59 h1:qWj4qVYZ95vLWwqyNJCQg7rDsG5wPdze0UaPolH7DUk= github.com/containerd/cgroups v0.0.0-20200531161412-0dbf7f05ba59/go.mod h1:pA0z1pT8KYB3TCXK/ocprsh7MAkoW8bZVzPdih9snmM= github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/containerd v1.3.2/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= github.com/containerd/containerd v1.4.3 h1:ijQT13JedHSHrQGWFcGEwzcNKrAGIiZ+jSD5QQG07SY= github.com/containerd/containerd v1.4.3/go.mod h1:bC6axHOhabU15QhwfG7w5PipXdVtMXFTttgp+kVtyUA= github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y= -github.com/containerd/continuity v0.0.0-20200710164510-efbc4488d8fe h1:PEmIrUvwG9Yyv+0WKZqjXfSFDeZjs/q15g0m08BYS9k= github.com/containerd/continuity v0.0.0-20200710164510-efbc4488d8fe/go.mod h1:cECdGN1O8G9bgKTlLhuPJimka6Xb/Gg7vYzCTNVxhvo= github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI= github.com/containerd/go-runc v0.0.0-20180907222934-5a6d9f37cfa3/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0= @@ -128,7 +132,6 @@ github.com/docker/docker v20.10.0+incompatible h1:4g8Xjho+7quMwzsTrhtrWpdQU9UTc2 github.com/docker/docker v20.10.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= -github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8= github.com/docker/go-metrics v0.0.1/go.mod h1:cG1hvH2utMXtqgqqYE9plW6lDxS3/5ayHzueweSI3Vw= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= @@ -155,6 +158,7 @@ github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoD github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= +github.com/getsentry/raven-go v0.2.0 h1:no+xWJRb5ZI7eE8TWgIq1jLulQiIoLG0IfYxv5JYMGs= github.com/getsentry/raven-go v0.2.0/go.mod h1:KungGk8q33+aIAZUIVWZDr2OfAEBsO49PX4NzFV5kcQ= github.com/getsentry/sentry-go v0.9.0 h1:KIfpY/D9hX3gWAEd3d8z6ImuHNWtqEsjlpdF8zXFsHM= github.com/getsentry/sentry-go v0.9.0/go.mod h1:kELm/9iCblqUYh+ZRML7PNdCvEuw24wBvJPYyi86cws= @@ -163,6 +167,7 @@ github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NB github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM= github.com/gliderlabs/ssh v0.2.2/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= +github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E= github.com/go-git/go-billy/v5 v5.0.0/go.mod h1:pmpqyWchKfYfrkb/UVH4otLvyi/5gJlGI4Hb3ZqZ3W0= @@ -231,6 +236,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -328,6 +334,7 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.8/go.mod h1:O1sed60cT9XZ5uDucP5qwvh+TE3NnUj51EiZO/lmSfw= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/labstack/echo/v4 v4.1.11/go.mod h1:i541M3Fj6f76NZtHSj7TXnyM8n2gaodfvfxNnFqi74g= github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= @@ -354,9 +361,7 @@ github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS4 github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= -github.com/moby/sys/mount v0.2.0 h1:WhCW5B355jtxndN5ovugJlMFJawbUODuW8fSnEH6SSM= github.com/moby/sys/mount v0.2.0/go.mod h1:aAivFE2LB3W4bACsUXChRHQ0qKWsetY4Y9V7sxOougM= -github.com/moby/sys/mountinfo v0.4.0 h1:1KInV3Huv18akCu58V7lzNlt+jFmqlu1EaErnEHE/VM= github.com/moby/sys/mountinfo v0.4.0/go.mod h1:rEr8tzG/lsIZHBtN/JjGG+LMYx9eXgW2JI+6q0qou+A= github.com/moby/term v0.0.0-20201110203204-bea5bbe245bf h1:Un6PNx5oMK6CCwO3QTUyPiK2mtZnPrpDl5UnZ64eCkw= github.com/moby/term v0.0.0-20201110203204-bea5bbe245bf/go.mod h1:FBS0z0QWA44HXygs7VXDUOGoN/1TV3RuWkLO04am3wc= @@ -376,6 +381,7 @@ github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5Vgl github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= @@ -398,7 +404,6 @@ github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3I github.com/opencontainers/image-spec v1.0.1 h1:JMemWkRwHx4Zj+fVxWoMCFm/8sYGGrUVojFA6h/TRcI= github.com/opencontainers/image-spec v1.0.1/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/runc v0.0.0-20190115041553-12f6a991201f/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= -github.com/opencontainers/runc v0.1.1 h1:GlxAyO6x8rfZYN9Tt0Kti5a/cP41iuiO2yYT0IJGY8Y= github.com/opencontainers/runc v0.1.1/go.mod h1:qT5XzbpPznkRYVz/mWwUaVBUv2rmF59PVA73FjuZG0U= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= @@ -406,6 +411,7 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FI github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -560,8 +566,6 @@ go.etcd.io/etcd/tests/v3 v3.5.0/go.mod h1:f+mtZ1bE1YPvgKdOJV2BKy4JQW0nAFnQehgOE7 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.1/go.mod h1:Ap50jQcDJrx6rB6VgeeFPtuPIf3wMRvRfrfYDO6+BmA= -go.opencensus.io v0.22.2 h1:75k/FF0Q2YM8QYo07VPddOLBslDt1MZOdEslOHvmzAs= -go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opentelemetry.io/contrib v0.20.0 h1:ubFQUn0VCZ0gPwIoJfBJVpeBlyRMxu8Mm/huKWYd9p0= go.opentelemetry.io/contrib v0.20.0/go.mod h1:G/EtFaa6qaN7+LxqfIAT3GiZa7Wv5DTBUzl5H4LY0Kc= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0 h1:sO4WKdPAudZGKPcpZT4MJn6JaDmpyLrMPDGGyA1SttE= @@ -576,6 +580,7 @@ go.opentelemetry.io/otel/metric v0.19.0/go.mod h1:8f9fglJPRnXuskQmKpnad31lcLJ2Vm go.opentelemetry.io/otel/metric v0.20.0 h1:4kzhXFP+btKm4jwxpjIqjs41A7MakRFUS86bqLHTIw8= go.opentelemetry.io/otel/metric v0.20.0/go.mod h1:598I5tYlH1vzBjn+BTuhzTCSb/9debfNp6R3s7Pr1eU= go.opentelemetry.io/otel/oteltest v0.19.0/go.mod h1:tI4yxwh8U21v7JD6R3BcA/2+RBoTKFexE/PJ/nSO7IA= +go.opentelemetry.io/otel/oteltest v0.20.0 h1:HiITxCawalo5vQzdHfKeZurV8x7ljcqAgiWzF6Vaeaw= go.opentelemetry.io/otel/oteltest v0.20.0/go.mod h1:L7bgKf9ZB7qCwT9Up7i9/pn0PWIa9FqQ2IQ8LoxiGnw= go.opentelemetry.io/otel/sdk v0.20.0 h1:JsxtGXd06J8jrnya7fdI/U/MR6yXA5DtbZy+qoHQlr8= go.opentelemetry.io/otel/sdk v0.20.0/go.mod h1:g/IcepuwNsoiX5Byy2nNV0ySUF1em498m7hBWC279Yc= @@ -593,6 +598,7 @@ go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/automaxprocs v1.3.0 h1:II28aZoGdaglS5vVNnspf28lnZpXScxtIozx1lAjdb0= go.uber.org/automaxprocs v1.3.0/go.mod h1:9CWT6lKIep8U41DDaPiH6eFscnTyjfTANNQNx6LrIcA= +go.uber.org/goleak v1.1.10 h1:z+mqJhf6ss6BSfSM671tgKyZBFPTTJM+HLxnhPC3wu0= go.uber.org/goleak v1.1.10/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= @@ -682,6 +688,7 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d h1:TzXSXBo42m9gQenoE3b9BGiEpg5IG2JkU5FkPIawgtw= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -806,6 +813,7 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.3 h1:hvZejVcIxAKHR8Pq2gXaDggf6CWT1QEqO+JEBeOKCG8= google.golang.org/appengine v1.6.3/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -852,6 +860,7 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -880,8 +889,10 @@ gopkg.in/yaml.v3 v3.0.0-20191120175047-4206685974f2/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= +gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= diff --git a/engine/logs/enc.go b/logs/enc.go similarity index 99% rename from engine/logs/enc.go rename to logs/enc.go index 737626a..d52512e 100644 --- a/engine/logs/enc.go +++ b/logs/enc.go @@ -6,8 +6,9 @@ import ( "io" "sync" - "github.com/coreos/go-systemd/journal" "github.com/projecteru2/agent/types" + + "github.com/coreos/go-systemd/journal" ) // Encoder . diff --git a/engine/logs/writer.go b/logs/writer.go similarity index 99% rename from engine/logs/writer.go rename to logs/writer.go index b42e197..5a2271e 100644 --- a/engine/logs/writer.go +++ b/logs/writer.go @@ -9,6 +9,7 @@ import ( "time" "github.com/projecteru2/agent/types" + log "github.com/sirupsen/logrus" ) diff --git a/engine/logs/writer_test.go b/logs/writer_test.go similarity index 100% rename from engine/logs/writer_test.go rename to logs/writer_test.go index 4f328ff..1b06850 100644 --- a/engine/logs/writer_test.go +++ b/logs/writer_test.go @@ -4,9 +4,9 @@ import ( "net" "testing" - "github.com/stretchr/testify/assert" - "github.com/projecteru2/agent/types" + + "github.com/stretchr/testify/assert" ) func TestNewWriterWithUDP(t *testing.T) { diff --git a/manager/node/heartbeat.go b/manager/node/heartbeat.go new file mode 100644 index 0000000..57bf928 --- /dev/null +++ b/manager/node/heartbeat.go @@ -0,0 +1,53 @@ +package node + +import ( + "context" + "time" + + "github.com/projecteru2/agent/utils" + + log "github.com/sirupsen/logrus" +) + +// heartbeat creates a new goroutine to report status every HeartbeatInterval seconds +// By default HeartbeatInterval is 0, will not do heartbeat. +func (m *Manager) heartbeat(ctx context.Context) { + if m.config.HeartbeatInterval <= 0 { + return + } + + go m.nodeStatusReport(ctx) + + tick := time.NewTicker(time.Duration(m.config.HeartbeatInterval) * time.Second) + defer tick.Stop() + + for { + select { + case <-tick.C: + go m.nodeStatusReport(ctx) + case <-ctx.Done(): + return + } + } +} + +// nodeStatusReport does heartbeat, tells core this node is alive. +// The TTL is set to double of HeartbeatInterval, by default it will be 360s, +// which means if a node is not available, subcriber will notice this after at least 360s. +// HealthCheck.Timeout is used as timeout of requesting core Profile +func (m *Manager) nodeStatusReport(ctx context.Context) { + log.Debug("[nodeStatusReport] report begins") + defer log.Debug("[nodeStatusReport] report ends") + + if !m.runtimeClient.IsDaemonRunning(ctx) { + log.Debugf("[nodeStatusReport] cannot connect to runtime daemon") + return + } + + utils.WithTimeout(ctx, time.Duration(m.config.HealthCheck.Timeout)*time.Second, func(ctx context.Context) { + ttl := int64(m.config.HeartbeatInterval * 2) + if err := m.store.SetNodeStatus(ctx, ttl); err != nil { + log.Errorf("[nodeStatusReport] error when set node status: %v", err) + } + }) +} diff --git a/manager/node/heartbeat_test.go b/manager/node/heartbeat_test.go new file mode 100644 index 0000000..f1b2e8f --- /dev/null +++ b/manager/node/heartbeat_test.go @@ -0,0 +1,30 @@ +package node + +import ( + "context" + "testing" + + runtimemocks "github.com/projecteru2/agent/runtime/mocks" + storemocks "github.com/projecteru2/agent/store/mocks" + + "github.com/stretchr/testify/assert" +) + +func TestNodeStatusReport(t *testing.T) { + ctx := context.Background() + manager := newMockNodeManager(t) + runtime := manager.runtimeClient.(*runtimemocks.Nerv) + store := manager.store.(*storemocks.MockStore) + + runtime.SetDaemonRunning(false) + manager.nodeStatusReport(ctx) + status, err := store.GetNodeStatus(ctx, "fake") + assert.Nil(t, err) + assert.Equal(t, status.Alive, false) + + runtime.SetDaemonRunning(true) + manager.nodeStatusReport(ctx) + status, err = store.GetNodeStatus(ctx, "fake") + assert.Nil(t, err) + assert.Equal(t, status.Alive, true) +} diff --git a/manager/node/manager.go b/manager/node/manager.go new file mode 100644 index 0000000..562a20a --- /dev/null +++ b/manager/node/manager.go @@ -0,0 +1,89 @@ +package node + +import ( + "context" + "errors" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/runtime" + "github.com/projecteru2/agent/runtime/docker" + runtimemocks "github.com/projecteru2/agent/runtime/mocks" + "github.com/projecteru2/agent/store" + corestore "github.com/projecteru2/agent/store/core" + storemocks "github.com/projecteru2/agent/store/mocks" + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + + log "github.com/sirupsen/logrus" +) + +// Manager manages node status +type Manager struct { + config *types.Config + store store.Store + runtimeClient runtime.Runtime +} + +// NewManager . +func NewManager(ctx context.Context, config *types.Config) (*Manager, error) { + m := &Manager{config: config} + switch config.Store { + case common.GRPCStore: + corestore.Init(ctx, config) + m.store = corestore.Get() + if m.store == nil { + return nil, errors.New("failed to get store client") + } + case common.MocksStore: + m.store = storemocks.FromTemplate() + default: + return nil, errors.New("unknown store type") + } + + switch config.Runtime { + case common.DockerRuntime: + node, err := m.store.GetNode(ctx, config.HostName) + if err != nil { + log.Errorf("[NewManager] failed to get node %s, err: %s", config.HostName, err) + return nil, err + } + + nodeIP := utils.GetIP(node.Endpoint) + if nodeIP == "" { + nodeIP = common.LocalIP + } + docker.InitClient(config, nodeIP) + m.runtimeClient = docker.GetClient() + if m.runtimeClient == nil { + return nil, errors.New("failed to get runtime client") + } + case common.MocksRuntime: + m.runtimeClient = runtimemocks.FromTemplate() + default: + return nil, errors.New("unknown runtime type") + } + + return m, nil +} + +// Run runs a node manager +func (m *Manager) Run(ctx context.Context) error { + log.Infof("[NodeManager] start node status heartbeat") + go m.heartbeat(ctx) + + // wait for signal + <-ctx.Done() + log.Info("[NodeManager] exiting") + log.Infof("[NodeManager] mark node %s as down", m.config.HostName) + + // ctx is now canceled. use a new context. + var err error + utils.WithTimeout(context.TODO(), m.config.GlobalConnectionTimeout, func(ctx context.Context) { + err = m.store.SetNode(ctx, m.config.HostName, false) + }) + if err != nil { + log.Errorf("[NodeManager] failed to mark the node %s as down, err: %s", m.config.HostName, err) + return err + } + return nil +} diff --git a/manager/node/manager_test.go b/manager/node/manager_test.go new file mode 100644 index 0000000..c126af8 --- /dev/null +++ b/manager/node/manager_test.go @@ -0,0 +1,36 @@ +package node + +import ( + "context" + "testing" + "time" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/types" + + "github.com/stretchr/testify/assert" +) + +func newMockNodeManager(t *testing.T) *Manager { + config := &types.Config{ + HostName: "fake", + HeartbeatInterval: 2, + CheckOnlyMine: false, + Store: common.MocksStore, + Runtime: common.MocksRuntime, + Log: types.LogConfig{ + Stdout: true, + }, + HealthCheck: types.HealthCheckConfig{ + Interval: 10, + Timeout: 5, + CacheTTL: 300, + EnableSelfmon: true, + }, + GlobalConnectionTimeout: 5 * time.Second, + } + + m, err := NewManager(context.Background(), config) + assert.Nil(t, err) + return m +} diff --git a/manager/workload/attach.go b/manager/workload/attach.go new file mode 100644 index 0000000..d45973e --- /dev/null +++ b/manager/workload/attach.go @@ -0,0 +1,101 @@ +package workload + +import ( + "bufio" + "context" + "io" + "strings" + "sync" + "time" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/logs" + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + coreutils "github.com/projecteru2/core/utils" + + log "github.com/sirupsen/logrus" +) + +func (m *Manager) attach(ctx context.Context, ID string) { + log.Debugf("[attach] attaching workload %v", ID) + transfer := m.forwards.Get(ID, 0) + if transfer == "" { + transfer = logs.Discard + } + writer, err := logs.NewWriter(transfer, m.config.Log.Stdout) + if err != nil { + log.Errorf("[attach] Create log forward failed %s", err) + return + } + + // get app info + workloadName, err := m.runtimeClient.GetWorkloadName(ctx, ID) + if err != nil { + log.Errorf("[attach] failed to get workload name, id: %v, err: %v", ID, err) + return + } + + name, entryPoint, ident, err := utils.GetAppInfo(workloadName) + if err != nil { + log.Errorf("[attach] invalid workload name %s, err: %v", workloadName, err) + return + } + + // attach workload + outr, errr, err := m.runtimeClient.AttachWorkload(ctx, ID) + if err != nil { + log.Errorf("[attach] failed to attach workload %s, err: %v", workloadName, err) + return + } + log.Infof("[attach] attach %s workload %s success", workloadName, coreutils.ShortID(ID)) + + cancelCtx, cancel := context.WithCancel(ctx) + defer cancel() + + // attach metrics + go m.runtimeClient.CollectWorkloadMetrics(cancelCtx, ID) + + extra, err := m.runtimeClient.LogFieldsExtra(ctx, ID) + if err != nil { + log.Errorf("[attach] failed to get log fields extra, err: %v", err) + } + + wg := &sync.WaitGroup{} + pump := func(typ string, source io.Reader) { + defer wg.Done() + buf := bufio.NewReader(source) + for { + data, err := buf.ReadString('\n') + if err != nil { + if err != io.EOF { + log.Errorf("[attach] attach pump %s %s %s %s", workloadName, coreutils.ShortID(ID), typ, err) + } + return + } + data = strings.TrimSuffix(data, "\n") + data = strings.TrimSuffix(data, "\r") + l := &types.Log{ + ID: ID, + Name: name, + Type: typ, + EntryPoint: entryPoint, + Ident: ident, + Data: utils.ReplaceNonUtf8(data), + Datetime: time.Now().Format(common.DateTimeFormat), + Extra: extra, + } + if m.logBroadcaster != nil && m.logBroadcaster.logC != nil { + m.logBroadcaster.logC <- l + } + if err := writer.Write(l); err != nil && !(entryPoint == "agent" && utils.IsDockerized()) { + log.Errorf("[attach] %s workload %s_%s write failed %v", workloadName, entryPoint, coreutils.ShortID(ID), err) + log.Errorf("[attach] %s", data) + } + } + } + wg.Add(2) + go pump("stdout", outr) + go pump("stderr", errr) + wg.Wait() +} diff --git a/manager/workload/attach_test.go b/manager/workload/attach_test.go new file mode 100644 index 0000000..1a15e38 --- /dev/null +++ b/manager/workload/attach_test.go @@ -0,0 +1,30 @@ +package workload + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestAttach(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + manager := newMockWorkloadManager(t) + go func() { + for { + log := <-manager.logBroadcaster.logC + // see: runtime.FromTemplate + switch log.Type { + case "stdout": + assert.Equal(t, log.Data, "stdout") + case "stderr": + assert.Equal(t, log.Data, "stderr") + } + } + }() + + manager.attach(ctx, "Rei") + time.Sleep(2 * time.Second) +} diff --git a/engine/status/event.go b/manager/workload/event.go similarity index 59% rename from engine/status/event.go rename to manager/workload/event.go index d1a5a1e..b1911ce 100644 --- a/engine/status/event.go +++ b/manager/workload/event.go @@ -1,40 +1,41 @@ -package status +package workload import ( "context" "sync" - eventtypes "github.com/docker/docker/api/types/events" + "github.com/projecteru2/agent/types" coreutils "github.com/projecteru2/core/utils" + log "github.com/sirupsen/logrus" ) // EventHandler define event handler type EventHandler struct { sync.Mutex - handlers map[string]func(context.Context, eventtypes.Message) + handlers map[string]func(context.Context, *types.WorkloadEventMessage) } // NewEventHandler new a event handler func NewEventHandler() *EventHandler { - return &EventHandler{handlers: make(map[string]func(context.Context, eventtypes.Message))} + return &EventHandler{handlers: make(map[string]func(context.Context, *types.WorkloadEventMessage))} } // Handle hand a event -func (e *EventHandler) Handle(action string, h func(context.Context, eventtypes.Message)) { +func (e *EventHandler) Handle(action string, h func(context.Context, *types.WorkloadEventMessage)) { e.Lock() defer e.Unlock() e.handlers[action] = h } // Watch watch change -func (e *EventHandler) Watch(c <-chan eventtypes.Message) { +func (e *EventHandler) Watch(ctx context.Context, c <-chan *types.WorkloadEventMessage) { for ev := range c { - log.Infof("[Watch] Monitor: cid %s action %s", coreutils.ShortID(ev.ID), ev.Action) + log.Infof("[Watch] Monitor: workload id %s action %s", coreutils.ShortID(ev.ID), ev.Action) e.Lock() h := e.handlers[ev.Action] if h != nil { - go h(context.TODO(), ev) + go h(ctx, ev) } e.Unlock() } diff --git a/manager/workload/event_test.go b/manager/workload/event_test.go new file mode 100644 index 0000000..176e105 --- /dev/null +++ b/manager/workload/event_test.go @@ -0,0 +1,50 @@ +package workload + +import ( + "context" + "testing" + "time" + + runtimemocks "github.com/projecteru2/agent/runtime/mocks" + storemocks "github.com/projecteru2/agent/store/mocks" + "github.com/projecteru2/agent/types" + + "github.com/stretchr/testify/assert" +) + +func TestEvent(t *testing.T) { + ctx := context.Background() + + manager := newMockWorkloadManager(t) + runtime := manager.runtimeClient.(*runtimemocks.Nerv) + store := manager.store.(*storemocks.MockStore) + // init workload status + assert.Nil(t, manager.load(ctx)) + assertInitStatus(t, store) + + // errChan is useless here + msgChan, _ := manager.initMonitor(ctx) + go manager.monitor(ctx, msgChan) + + // starts the events: Shinji 400%, Asuka starts, Asuka dies, Rei dies + go runtime.StartEvents() + time.Sleep(5 * time.Second) + + assert.Equal(t, store.GetMockWorkloadStatus("Asuka"), &types.WorkloadStatus{ + ID: "Asuka", + Running: false, + Healthy: false, + }) + + assert.Equal(t, store.GetMockWorkloadStatus("Rei"), &types.WorkloadStatus{ + ID: "Rei", + Running: false, + Healthy: false, + }) + + assert.Equal(t, store.GetMockWorkloadStatus("Shinji"), &types.WorkloadStatus{ + ID: "Shinji", + Running: true, + Healthy: true, + }) +} diff --git a/manager/workload/filter.go b/manager/workload/filter.go new file mode 100644 index 0000000..fc01160 --- /dev/null +++ b/manager/workload/filter.go @@ -0,0 +1,26 @@ +package workload + +import ( + "fmt" + + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + "github.com/projecteru2/core/cluster" +) + +func (m *Manager) getFilter(extend map[string]string) []types.KV { + var f []types.KV + f = append(f, types.KV{Key: "label", Value: fmt.Sprintf("%s=1", cluster.ERUMark)}) + + if m.config.CheckOnlyMine && utils.UseLabelAsFilter() { + f = append(f, types.KV{Key: "label", Value: fmt.Sprintf("eru.nodename=%s", m.config.HostName)}) + if m.storeIdentifier != "" { + f = append(f, types.KV{Key: "label", Value: fmt.Sprintf("eru.coreid=%s", m.storeIdentifier)}) + } + } + + for k, v := range extend { + f = append(f, types.KV{Key: k, Value: v}) + } + return f +} diff --git a/manager/workload/health_check.go b/manager/workload/health_check.go new file mode 100644 index 0000000..dbfe5f6 --- /dev/null +++ b/manager/workload/health_check.go @@ -0,0 +1,85 @@ +package workload + +import ( + "context" + "errors" + "time" + + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + + log "github.com/sirupsen/logrus" +) + +func (m *Manager) healthCheck(ctx context.Context) { + tick := time.NewTicker(time.Duration(m.config.HealthCheck.Interval) * time.Second) + defer tick.Stop() + + for { + select { + case <-tick.C: + go m.checkAllWorkloads(ctx) + case <-ctx.Done(): + return + } + } +} + +// 检查全部 label 为ERU=1的workload +// 这里需要 list all,原因是 monitor 检测到 die 的时候已经标记为 false 了 +// 但是这时候 health check 刚返回 true 回来并写入 core +// 为了保证最终数据一致性这里也要检测 +func (m *Manager) checkAllWorkloads(ctx context.Context) { + log.Debugf("[checkAllWorkloads] health check begin") + workloadIDs, err := m.runtimeClient.ListWorkloadIDs(ctx, true, nil) + if err != nil { + log.Errorf("[checkAllWorkloads] Error when list all workloads with label \"ERU=1\": %v", err) + return + } + + for _, wid := range workloadIDs { + go m.checkOneWorkload(ctx, wid) + } +} + +// 检查并保存一个workload的状态,最后返回workload是否healthy。 +// 返回healthy是为了重试用的,没啥别的意义。 +func (m *Manager) checkOneWorkload(ctx context.Context, ID string) bool { + workloadStatus, err := m.runtimeClient.GetStatus(ctx, ID, true) + if err != nil { + log.Errorf("[checkOneWorkload] failed to get status of workload %s, err: %v", ID, err) + return false + } + + if err = m.setWorkloadStatus(ctx, workloadStatus); err != nil { + log.Errorf("[checkOneWorkload] update workload status failed, err: %v", err) + } + return workloadStatus.Healthy +} + +// 设置workload状态,允许重试,带timeout控制 +func (m *Manager) setWorkloadStatus(ctx context.Context, status *types.WorkloadStatus) error { + return utils.BackoffRetry(ctx, 3, func() error { + var err error + utils.WithTimeout(ctx, m.config.GlobalConnectionTimeout, func(ctx context.Context) { + err = m.store.SetWorkloadStatus(ctx, status, m.config.GetHealthCheckStatusTTL()) + }) + return err + }) +} + +// 检查一个workload,允许重试 +func (m *Manager) checkOneWorkloadWithBackoffRetry(ctx context.Context, ID string) { + log.Debugf("[checkOneWorkloadWithBackoffRetry] check workload %s", ID) + err := utils.BackoffRetry(ctx, utils.GetMaxAttemptsByTTL(m.config.GetHealthCheckStatusTTL()), func() error { + if !m.checkOneWorkload(ctx, ID) { + // 这个err就是用来判断要不要继续的,不用打在日志里 + return errors.New("not healthy") + } + return nil + }) + + if err != nil { + log.Debugf("[checkOneWorkloadWithBackoffRetry] workload %s still not healthy", ID) + } +} diff --git a/manager/workload/health_check_test.go b/manager/workload/health_check_test.go new file mode 100644 index 0000000..00875af --- /dev/null +++ b/manager/workload/health_check_test.go @@ -0,0 +1,19 @@ +package workload + +import ( + "context" + "testing" + "time" + + "github.com/projecteru2/agent/store/mocks" +) + +func TestHealthCheck(t *testing.T) { + manager := newMockWorkloadManager(t) + ctx := context.Background() + manager.checkAllWorkloads(ctx) + store := manager.store.(*mocks.MockStore) + time.Sleep(2 * time.Second) + + assertInitStatus(t, store) +} diff --git a/manager/workload/load.go b/manager/workload/load.go new file mode 100644 index 0000000..798d246 --- /dev/null +++ b/manager/workload/load.go @@ -0,0 +1,44 @@ +package workload + +import ( + "context" + "sync" + + coreutils "github.com/projecteru2/core/utils" + + log "github.com/sirupsen/logrus" +) + +func (m *Manager) load(ctx context.Context) error { + log.Info("[load] Load workloads") + workloadIDs, err := m.runtimeClient.ListWorkloadIDs(ctx, true, nil) + if err != nil { + return err + } + + wg := &sync.WaitGroup{} + for _, wid := range workloadIDs { + log.Debugf("[load] detect workload %s", coreutils.ShortID(wid)) + wg.Add(1) + go func(ID string) { + defer wg.Done() + workloadStatus, err := m.runtimeClient.GetStatus(ctx, ID, true) + if err != nil { + log.Errorf("[load] get workload status failed %v", err) + return + } + + if workloadStatus.Running { + log.Debugf("[load] workload %s is running", workloadStatus.ID) + go m.attach(ctx, ID) + } + + // no health check here + if err := m.setWorkloadStatus(ctx, workloadStatus); err != nil { + log.Errorf("[load] update deploy status failed %v", err) + } + }(wid) + } + wg.Wait() + return nil +} diff --git a/manager/workload/load_test.go b/manager/workload/load_test.go new file mode 100644 index 0000000..18b6930 --- /dev/null +++ b/manager/workload/load_test.go @@ -0,0 +1,43 @@ +package workload + +import ( + "context" + "testing" + "time" + + "github.com/projecteru2/agent/store/mocks" + "github.com/projecteru2/agent/types" + + "github.com/stretchr/testify/assert" +) + +func assertInitStatus(t *testing.T, store *mocks.MockStore) { + assert.Equal(t, store.GetMockWorkloadStatus("Asuka"), &types.WorkloadStatus{ + ID: "Asuka", + Running: false, + Healthy: false, + }) + + assert.Equal(t, store.GetMockWorkloadStatus("Rei"), &types.WorkloadStatus{ + ID: "Rei", + Running: true, + Healthy: false, + }) + + assert.Equal(t, store.GetMockWorkloadStatus("Shinji"), &types.WorkloadStatus{ + ID: "Shinji", + Running: true, + Healthy: true, + }) +} + +func TestLoad(t *testing.T) { + manager := newMockWorkloadManager(t) + store := manager.store.(*mocks.MockStore) + ctx := context.Background() + err := manager.load(ctx) + // wait for attaching + time.Sleep(2 * time.Second) + assert.Nil(t, err) + assertInitStatus(t, store) +} diff --git a/manager/workload/log.go b/manager/workload/log.go new file mode 100644 index 0000000..cb1ee80 --- /dev/null +++ b/manager/workload/log.go @@ -0,0 +1,84 @@ +package workload + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + + "github.com/projecteru2/agent/types" + coreutils "github.com/projecteru2/core/utils" + + "github.com/sirupsen/logrus" +) + +type subscriber struct { + buf *bufio.ReadWriter + unsubscribe func() +} + +// logBroadcaster receives log and broadcasts to subscribers +type logBroadcaster struct { + logC chan *types.Log + subscribers map[string]map[string]*subscriber +} + +func newLogBroadcaster() *logBroadcaster { + return &logBroadcaster{ + logC: make(chan *types.Log), + subscribers: map[string]map[string]*subscriber{}, + } +} + +// subscribe subscribes logs of the specific app. +func (l *logBroadcaster) subscribe(app string, buf *bufio.ReadWriter) { + if _, ok := l.subscribers[app]; !ok { + l.subscribers[app] = map[string]*subscriber{} + } + + ID := coreutils.RandomString(8) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + l.subscribers[app][ID] = &subscriber{buf, cancel} + logrus.Infof("%s %s log subscribed", app, ID) + <-ctx.Done() + + delete(l.subscribers[app], ID) + if len(l.subscribers[app]) == 0 { + delete(l.subscribers, app) + } +} + +func (l *logBroadcaster) broadcast(log *types.Log) { + if _, ok := l.subscribers[log.Name]; !ok { + return + } + data, err := json.Marshal(log) + if err != nil { + logrus.Error(err) + return + } + line := fmt.Sprintf("%X\r\n%s\r\n\r\n", len(data)+2, string(data)) + for ID, subscriber := range l.subscribers[log.Name] { + if _, err := subscriber.buf.WriteString(line); err != nil { + logrus.Error(err) + logrus.Infof("%s %s detached", log.Name, ID) + subscriber.unsubscribe() + } + subscriber.buf.Flush() + logrus.Debugf("sub %s get %s", ID, line) + } +} + +func (l *logBroadcaster) run(ctx context.Context) { + for { + select { + case <-ctx.Done(): + logrus.Infof("[logBroadcaster] stops") + return + case log := <-l.logC: + l.broadcast(log) + } + } +} diff --git a/manager/workload/log_test.go b/manager/workload/log_test.go new file mode 100644 index 0000000..5a77f6e --- /dev/null +++ b/manager/workload/log_test.go @@ -0,0 +1,78 @@ +package workload + +import ( + "bufio" + "context" + "net/http" + "testing" + "time" + + "github.com/projecteru2/agent/types" + + "github.com/bmizerany/pat" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +func TestLogBroadcaster(t *testing.T) { + l := newLogBroadcaster() + + handler := func(w http.ResponseWriter, req *http.Request) { + app := req.URL.Query().Get("app") + if app == "" { + w.WriteHeader(http.StatusBadRequest) + return + } + // fuck httpie + w.WriteHeader(http.StatusOK) + if hijack, ok := w.(http.Hijacker); ok { + conn, buf, err := hijack.Hijack() + if err != nil { + logrus.Errorf("[apiLog] connect failed %v", err) + return + } + defer conn.Close() + l.subscribe(app, buf) + } + } + + go func() { + restfulAPIServer := pat.New() + restfulAPIServer.Add("GET", "/log/", http.HandlerFunc(handler)) + http.Handle("/", restfulAPIServer) + http.ListenAndServe(":12310", nil) + }() + + go func() { + time.Sleep(3 * time.Second) + l.logC <- &types.Log{ + ID: "Rei", + Name: "nerv", + Type: "stdout", + EntryPoint: "eva0", + Data: "data0", + } + l.logC <- &types.Log{ + ID: "Rei", + Name: "nerv", + Type: "stdout", + EntryPoint: "eva0", + Data: "data1", + } + }() + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + go l.run(ctx) + + time.Sleep(2 * time.Second) + resp, err := http.Get("http://127.0.0.1:12310/log/?app=nerv") + assert.Nil(t, err) + + reader := bufio.NewReader(resp.Body) + for i := 0; i < 2; i++ { + line, err := reader.ReadBytes('\n') + assert.Nil(t, err) + t.Log(string(line)) + } +} diff --git a/manager/workload/manager.go b/manager/workload/manager.go new file mode 100644 index 0000000..2813ee4 --- /dev/null +++ b/manager/workload/manager.go @@ -0,0 +1,124 @@ +package workload + +import ( + "bufio" + "context" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/runtime" + "github.com/projecteru2/agent/runtime/docker" + runtimemocks "github.com/projecteru2/agent/runtime/mocks" + "github.com/projecteru2/agent/store" + corestore "github.com/projecteru2/agent/store/core" + storemocks "github.com/projecteru2/agent/store/mocks" + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + + log "github.com/sirupsen/logrus" +) + +// Manager . +type Manager struct { + config *types.Config + store store.Store + runtimeClient runtime.Runtime + + nodeIP string + forwards *utils.HashBackends + + logBroadcaster *logBroadcaster + + // storeIdentifier indicates which eru this agent belongs to + // it can be used to identify the corresponding core + // and all containers that belong to this core + storeIdentifier string +} + +// NewManager returns a workload manager +func NewManager(ctx context.Context, config *types.Config) (*Manager, error) { + manager := &Manager{} + var err error + + manager.config = config + + switch config.Store { + case common.GRPCStore: + corestore.Init(ctx, config) + manager.store = corestore.Get() + if manager.store == nil { + log.Errorf("[NewManager] failed to create core store client") + return nil, err + } + case common.MocksStore: + manager.store = storemocks.FromTemplate() + default: + log.Errorf("[NewManager] unknown store type %s", config.Store) + } + + node, err := manager.store.GetNode(ctx, config.HostName) + if err != nil { + log.Errorf("[NewManager] failed to get node %s, err: %s", config.HostName, err) + return nil, err + } + + manager.nodeIP = utils.GetIP(node.Endpoint) + if manager.nodeIP == "" { + manager.nodeIP = common.LocalIP + } + + manager.forwards = utils.NewHashBackends(config.Log.Forwards) + manager.storeIdentifier = manager.store.GetIdentifier(ctx) + + switch config.Runtime { + case common.DockerRuntime: + docker.InitClient(config, manager.nodeIP) + manager.runtimeClient = docker.GetClient() + if manager.runtimeClient == nil { + log.Errorf("[NewManager] failed to create runtime client") + return nil, err + } + case common.MocksRuntime: + manager.runtimeClient = runtimemocks.FromTemplate() + default: + log.Errorf("[NewManager] unknown runtime type %s", config.Runtime) + return nil, err + } + + manager.logBroadcaster = newLogBroadcaster() + + return manager, nil +} + +// Run will start agent +// blocks by ctx.Done() +// either call this in a separated goroutine, or used in main to block main goroutine +func (m *Manager) Run(ctx context.Context) error { + // start log broadcaster + go m.logBroadcaster.run(ctx) + + // load container + if err := m.load(ctx); err != nil { + return err + } + // start status watcher + eventChan, errChan := m.initMonitor(ctx) + go m.monitor(ctx, eventChan) + + // start health check + go m.healthCheck(ctx) + + // wait for signal + select { + case <-ctx.Done(): + log.Info("[WorkloadManager] exiting") + return nil + case err := <-errChan: + log.Infof("[WorkloadManager] failed to watch node status, err: %v", err) + return err + } +} + +// Subscribe subscribes logs +func (m *Manager) Subscribe(app string, buf *bufio.ReadWriter) { + m.logBroadcaster.subscribe(app, buf) +} diff --git a/manager/workload/manager_test.go b/manager/workload/manager_test.go new file mode 100644 index 0000000..5362ebf --- /dev/null +++ b/manager/workload/manager_test.go @@ -0,0 +1,46 @@ +package workload + +import ( + "context" + "testing" + "time" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/runtime/mocks" + "github.com/projecteru2/agent/types" + + "github.com/stretchr/testify/assert" +) + +func newMockWorkloadManager(t *testing.T) *Manager { + config := &types.Config{ + HostName: "fake", + HeartbeatInterval: 10, + CheckOnlyMine: false, + Store: common.MocksStore, + Runtime: common.MocksRuntime, + Log: types.LogConfig{ + Stdout: true, + }, + HealthCheck: types.HealthCheckConfig{ + Interval: 10, + Timeout: 5, + CacheTTL: 300, + EnableSelfmon: true, + }, + GlobalConnectionTimeout: 5 * time.Second, + } + + m, err := NewManager(context.Background(), config) + assert.Nil(t, err) + return m +} + +func TestRun(t *testing.T) { + manager := newMockWorkloadManager(t) + runtime := manager.runtimeClient.(*mocks.Nerv) + ctx, cancel := context.WithTimeout(context.Background(), time.Second*30) + defer cancel() + go runtime.StartEvents() + assert.Nil(t, manager.Run(ctx)) +} diff --git a/manager/workload/monitor.go b/manager/workload/monitor.go new file mode 100644 index 0000000..a52f0a7 --- /dev/null +++ b/manager/workload/monitor.go @@ -0,0 +1,61 @@ +package workload + +import ( + "context" + + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/types" + coreutils "github.com/projecteru2/core/utils" + + log "github.com/sirupsen/logrus" +) + +var eventHandler = NewEventHandler() + +func (m *Manager) initMonitor(ctx context.Context) (<-chan *types.WorkloadEventMessage, <-chan error) { + eventHandler.Handle(common.StatusStart, m.handleWorkloadStart) + eventHandler.Handle(common.StatusDie, m.handleWorkloadDie) + + f := m.getFilter(map[string]string{}) + eventChan, errChan := m.runtimeClient.Events(ctx, f) + return eventChan, errChan +} + +func (m *Manager) monitor(ctx context.Context, eventChan <-chan *types.WorkloadEventMessage) { + log.Info("[monitor] Status watch start") + eventHandler.Watch(ctx, eventChan) +} + +func (m *Manager) handleWorkloadStart(ctx context.Context, event *types.WorkloadEventMessage) { + log.Debugf("[handleWorkloadStart] workload %s start", coreutils.ShortID(event.ID)) + workloadStatus, err := m.runtimeClient.GetStatus(ctx, event.ID, true) + if err != nil { + log.Errorf("[handleWorkloadStart] faild to get workload %v status, err: %v", event.ID, err) + return + } + + if workloadStatus.Running { + go m.attach(ctx, event.ID) + } + + if workloadStatus.Healthy { + if err := m.store.SetWorkloadStatus(ctx, workloadStatus, m.config.GetHealthCheckStatusTTL()); err != nil { + log.Errorf("[handleWorkloadStart] update deploy status failed %v", err) + } + } else { + go m.checkOneWorkloadWithBackoffRetry(ctx, event.ID) + } +} + +func (m *Manager) handleWorkloadDie(ctx context.Context, event *types.WorkloadEventMessage) { + log.Debugf("[handleWorkloadDie] container %s die", coreutils.ShortID(event.ID)) + workloadStatus, err := m.runtimeClient.GetStatus(ctx, event.ID, true) + if err != nil { + log.Errorf("[handleWorkloadDie] faild to get workload %v status, err: %v", event.ID, err) + return + } + + if err := m.store.SetWorkloadStatus(ctx, workloadStatus, m.config.GetHealthCheckStatusTTL()); err != nil { + log.Errorf("[handleWorkloadDie] update deploy status failed %v", err) + } +} From a195e70e766597450e24b17de1efcf467800bc29 Mon Sep 17 00:00:00 2001 From: DuodenumL Date: Wed, 8 Sep 2021 15:05:17 +0800 Subject: [PATCH 5/6] Refactor selfmon --- selfmon/mocks/CoreRPCClient.go | 1248 -------------------------------- selfmon/node.go | 116 +++ selfmon/register.go | 80 ++ selfmon/selfmon.go | 346 ++------- selfmon/selfmon_test.go | 177 +++-- watcher/log.go | 69 -- 6 files changed, 366 insertions(+), 1670 deletions(-) delete mode 100644 selfmon/mocks/CoreRPCClient.go create mode 100644 selfmon/node.go create mode 100644 selfmon/register.go delete mode 100644 watcher/log.go diff --git a/selfmon/mocks/CoreRPCClient.go b/selfmon/mocks/CoreRPCClient.go deleted file mode 100644 index 478bc1c..0000000 --- a/selfmon/mocks/CoreRPCClient.go +++ /dev/null @@ -1,1248 +0,0 @@ -// Code generated by mockery v2.5.1. DO NOT EDIT. - -package mocks - -import ( - context "context" - - grpc "google.golang.org/grpc" - - mock "github.com/stretchr/testify/mock" - - pb "github.com/projecteru2/core/rpc/gen" -) - -// CoreRPCClient is an autogenerated mock type for the CoreRPCClient type -type CoreRPCClient struct { - mock.Mock -} - -// AddNode provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) AddNode(ctx context.Context, in *pb.AddNodeOptions, opts ...grpc.CallOption) (*pb.Node, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Node - if rf, ok := ret.Get(0).(func(context.Context, *pb.AddNodeOptions, ...grpc.CallOption) *pb.Node); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Node) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.AddNodeOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// AddPod provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) AddPod(ctx context.Context, in *pb.AddPodOptions, opts ...grpc.CallOption) (*pb.Pod, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Pod - if rf, ok := ret.Get(0).(func(context.Context, *pb.AddPodOptions, ...grpc.CallOption) *pb.Pod); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Pod) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.AddPodOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// BuildImage provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) BuildImage(ctx context.Context, in *pb.BuildImageOptions, opts ...grpc.CallOption) (pb.CoreRPC_BuildImageClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_BuildImageClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.BuildImageOptions, ...grpc.CallOption) pb.CoreRPC_BuildImageClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_BuildImageClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.BuildImageOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// CacheImage provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) CacheImage(ctx context.Context, in *pb.CacheImageOptions, opts ...grpc.CallOption) (pb.CoreRPC_CacheImageClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_CacheImageClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.CacheImageOptions, ...grpc.CallOption) pb.CoreRPC_CacheImageClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_CacheImageClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.CacheImageOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// CalculateCapacity provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) CalculateCapacity(ctx context.Context, in *pb.DeployOptions, opts ...grpc.CallOption) (*pb.CapacityMessage, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.CapacityMessage - if rf, ok := ret.Get(0).(func(context.Context, *pb.DeployOptions, ...grpc.CallOption) *pb.CapacityMessage); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.CapacityMessage) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.DeployOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ConnectNetwork provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ConnectNetwork(ctx context.Context, in *pb.ConnectNetworkOptions, opts ...grpc.CallOption) (*pb.Network, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Network - if rf, ok := ret.Get(0).(func(context.Context, *pb.ConnectNetworkOptions, ...grpc.CallOption) *pb.Network); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Network) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ConnectNetworkOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ControlWorkload provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ControlWorkload(ctx context.Context, in *pb.ControlWorkloadOptions, opts ...grpc.CallOption) (pb.CoreRPC_ControlWorkloadClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_ControlWorkloadClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.ControlWorkloadOptions, ...grpc.CallOption) pb.CoreRPC_ControlWorkloadClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_ControlWorkloadClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ControlWorkloadOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// Copy provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) Copy(ctx context.Context, in *pb.CopyOptions, opts ...grpc.CallOption) (pb.CoreRPC_CopyClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_CopyClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.CopyOptions, ...grpc.CallOption) pb.CoreRPC_CopyClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_CopyClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.CopyOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// CreateWorkload provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) CreateWorkload(ctx context.Context, in *pb.DeployOptions, opts ...grpc.CallOption) (pb.CoreRPC_CreateWorkloadClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_CreateWorkloadClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.DeployOptions, ...grpc.CallOption) pb.CoreRPC_CreateWorkloadClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_CreateWorkloadClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.DeployOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// DisconnectNetwork provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) DisconnectNetwork(ctx context.Context, in *pb.DisconnectNetworkOptions, opts ...grpc.CallOption) (*pb.Empty, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Empty - if rf, ok := ret.Get(0).(func(context.Context, *pb.DisconnectNetworkOptions, ...grpc.CallOption) *pb.Empty); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Empty) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.DisconnectNetworkOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// DissociateWorkload provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) DissociateWorkload(ctx context.Context, in *pb.DissociateWorkloadOptions, opts ...grpc.CallOption) (pb.CoreRPC_DissociateWorkloadClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_DissociateWorkloadClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.DissociateWorkloadOptions, ...grpc.CallOption) pb.CoreRPC_DissociateWorkloadClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_DissociateWorkloadClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.DissociateWorkloadOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ExecuteWorkload provides a mock function with given fields: ctx, opts -func (_m *CoreRPCClient) ExecuteWorkload(ctx context.Context, opts ...grpc.CallOption) (pb.CoreRPC_ExecuteWorkloadClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_ExecuteWorkloadClient - if rf, ok := ret.Get(0).(func(context.Context, ...grpc.CallOption) pb.CoreRPC_ExecuteWorkloadClient); ok { - r0 = rf(ctx, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_ExecuteWorkloadClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, ...grpc.CallOption) error); ok { - r1 = rf(ctx, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetNode provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetNode(ctx context.Context, in *pb.GetNodeOptions, opts ...grpc.CallOption) (*pb.Node, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Node - if rf, ok := ret.Get(0).(func(context.Context, *pb.GetNodeOptions, ...grpc.CallOption) *pb.Node); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Node) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.GetNodeOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetNodeResource provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetNodeResource(ctx context.Context, in *pb.GetNodeResourceOptions, opts ...grpc.CallOption) (*pb.NodeResource, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.NodeResource - if rf, ok := ret.Get(0).(func(context.Context, *pb.GetNodeResourceOptions, ...grpc.CallOption) *pb.NodeResource); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.NodeResource) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.GetNodeResourceOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetNodeStatus provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetNodeStatus(ctx context.Context, in *pb.GetNodeStatusOptions, opts ...grpc.CallOption) (*pb.NodeStatusStreamMessage, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.NodeStatusStreamMessage - if rf, ok := ret.Get(0).(func(context.Context, *pb.GetNodeStatusOptions, ...grpc.CallOption) *pb.NodeStatusStreamMessage); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.NodeStatusStreamMessage) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.GetNodeStatusOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetPod provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetPod(ctx context.Context, in *pb.GetPodOptions, opts ...grpc.CallOption) (*pb.Pod, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Pod - if rf, ok := ret.Get(0).(func(context.Context, *pb.GetPodOptions, ...grpc.CallOption) *pb.Pod); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Pod) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.GetPodOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetPodResource provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetPodResource(ctx context.Context, in *pb.GetPodOptions, opts ...grpc.CallOption) (*pb.PodResource, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.PodResource - if rf, ok := ret.Get(0).(func(context.Context, *pb.GetPodOptions, ...grpc.CallOption) *pb.PodResource); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.PodResource) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.GetPodOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetWorkload provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetWorkload(ctx context.Context, in *pb.WorkloadID, opts ...grpc.CallOption) (*pb.Workload, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Workload - if rf, ok := ret.Get(0).(func(context.Context, *pb.WorkloadID, ...grpc.CallOption) *pb.Workload); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Workload) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.WorkloadID, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetWorkloads provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetWorkloads(ctx context.Context, in *pb.WorkloadIDs, opts ...grpc.CallOption) (*pb.Workloads, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Workloads - if rf, ok := ret.Get(0).(func(context.Context, *pb.WorkloadIDs, ...grpc.CallOption) *pb.Workloads); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Workloads) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.WorkloadIDs, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// GetWorkloadsStatus provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) GetWorkloadsStatus(ctx context.Context, in *pb.WorkloadIDs, opts ...grpc.CallOption) (*pb.WorkloadsStatus, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.WorkloadsStatus - if rf, ok := ret.Get(0).(func(context.Context, *pb.WorkloadIDs, ...grpc.CallOption) *pb.WorkloadsStatus); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.WorkloadsStatus) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.WorkloadIDs, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// Info provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) Info(ctx context.Context, in *pb.Empty, opts ...grpc.CallOption) (*pb.CoreInfo, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.CoreInfo - if rf, ok := ret.Get(0).(func(context.Context, *pb.Empty, ...grpc.CallOption) *pb.CoreInfo); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.CoreInfo) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.Empty, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ListNetworks provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ListNetworks(ctx context.Context, in *pb.ListNetworkOptions, opts ...grpc.CallOption) (*pb.Networks, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Networks - if rf, ok := ret.Get(0).(func(context.Context, *pb.ListNetworkOptions, ...grpc.CallOption) *pb.Networks); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Networks) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ListNetworkOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ListNodeWorkloads provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ListNodeWorkloads(ctx context.Context, in *pb.GetNodeOptions, opts ...grpc.CallOption) (*pb.Workloads, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Workloads - if rf, ok := ret.Get(0).(func(context.Context, *pb.GetNodeOptions, ...grpc.CallOption) *pb.Workloads); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Workloads) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.GetNodeOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ListPodNodes provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ListPodNodes(ctx context.Context, in *pb.ListNodesOptions, opts ...grpc.CallOption) (*pb.Nodes, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Nodes - if rf, ok := ret.Get(0).(func(context.Context, *pb.ListNodesOptions, ...grpc.CallOption) *pb.Nodes); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Nodes) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ListNodesOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ListPods provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ListPods(ctx context.Context, in *pb.Empty, opts ...grpc.CallOption) (*pb.Pods, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Pods - if rf, ok := ret.Get(0).(func(context.Context, *pb.Empty, ...grpc.CallOption) *pb.Pods); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Pods) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.Empty, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ListWorkloads provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ListWorkloads(ctx context.Context, in *pb.ListWorkloadsOptions, opts ...grpc.CallOption) (pb.CoreRPC_ListWorkloadsClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_ListWorkloadsClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.ListWorkloadsOptions, ...grpc.CallOption) pb.CoreRPC_ListWorkloadsClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_ListWorkloadsClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ListWorkloadsOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// LogStream provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) LogStream(ctx context.Context, in *pb.LogStreamOptions, opts ...grpc.CallOption) (pb.CoreRPC_LogStreamClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_LogStreamClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.LogStreamOptions, ...grpc.CallOption) pb.CoreRPC_LogStreamClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_LogStreamClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.LogStreamOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// NodeStatusStream provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) NodeStatusStream(ctx context.Context, in *pb.Empty, opts ...grpc.CallOption) (pb.CoreRPC_NodeStatusStreamClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_NodeStatusStreamClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.Empty, ...grpc.CallOption) pb.CoreRPC_NodeStatusStreamClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_NodeStatusStreamClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.Empty, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ReallocResource provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ReallocResource(ctx context.Context, in *pb.ReallocOptions, opts ...grpc.CallOption) (*pb.ReallocResourceMessage, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.ReallocResourceMessage - if rf, ok := ret.Get(0).(func(context.Context, *pb.ReallocOptions, ...grpc.CallOption) *pb.ReallocResourceMessage); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.ReallocResourceMessage) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ReallocOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// RemoveImage provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) RemoveImage(ctx context.Context, in *pb.RemoveImageOptions, opts ...grpc.CallOption) (pb.CoreRPC_RemoveImageClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_RemoveImageClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.RemoveImageOptions, ...grpc.CallOption) pb.CoreRPC_RemoveImageClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_RemoveImageClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.RemoveImageOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// RemoveNode provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) RemoveNode(ctx context.Context, in *pb.RemoveNodeOptions, opts ...grpc.CallOption) (*pb.Empty, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Empty - if rf, ok := ret.Get(0).(func(context.Context, *pb.RemoveNodeOptions, ...grpc.CallOption) *pb.Empty); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Empty) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.RemoveNodeOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// RemovePod provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) RemovePod(ctx context.Context, in *pb.RemovePodOptions, opts ...grpc.CallOption) (*pb.Empty, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Empty - if rf, ok := ret.Get(0).(func(context.Context, *pb.RemovePodOptions, ...grpc.CallOption) *pb.Empty); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Empty) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.RemovePodOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// RemoveWorkload provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) RemoveWorkload(ctx context.Context, in *pb.RemoveWorkloadOptions, opts ...grpc.CallOption) (pb.CoreRPC_RemoveWorkloadClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_RemoveWorkloadClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.RemoveWorkloadOptions, ...grpc.CallOption) pb.CoreRPC_RemoveWorkloadClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_RemoveWorkloadClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.RemoveWorkloadOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// ReplaceWorkload provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) ReplaceWorkload(ctx context.Context, in *pb.ReplaceOptions, opts ...grpc.CallOption) (pb.CoreRPC_ReplaceWorkloadClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_ReplaceWorkloadClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.ReplaceOptions, ...grpc.CallOption) pb.CoreRPC_ReplaceWorkloadClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_ReplaceWorkloadClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.ReplaceOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// RunAndWait provides a mock function with given fields: ctx, opts -func (_m *CoreRPCClient) RunAndWait(ctx context.Context, opts ...grpc.CallOption) (pb.CoreRPC_RunAndWaitClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_RunAndWaitClient - if rf, ok := ret.Get(0).(func(context.Context, ...grpc.CallOption) pb.CoreRPC_RunAndWaitClient); ok { - r0 = rf(ctx, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_RunAndWaitClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, ...grpc.CallOption) error); ok { - r1 = rf(ctx, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// Send provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) Send(ctx context.Context, in *pb.SendOptions, opts ...grpc.CallOption) (pb.CoreRPC_SendClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_SendClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.SendOptions, ...grpc.CallOption) pb.CoreRPC_SendClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_SendClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.SendOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// SetNode provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) SetNode(ctx context.Context, in *pb.SetNodeOptions, opts ...grpc.CallOption) (*pb.Node, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Node - if rf, ok := ret.Get(0).(func(context.Context, *pb.SetNodeOptions, ...grpc.CallOption) *pb.Node); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Node) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.SetNodeOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// SetNodeStatus provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) SetNodeStatus(ctx context.Context, in *pb.SetNodeStatusOptions, opts ...grpc.CallOption) (*pb.Empty, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.Empty - if rf, ok := ret.Get(0).(func(context.Context, *pb.SetNodeStatusOptions, ...grpc.CallOption) *pb.Empty); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.Empty) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.SetNodeStatusOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// SetWorkloadsStatus provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) SetWorkloadsStatus(ctx context.Context, in *pb.SetWorkloadsStatusOptions, opts ...grpc.CallOption) (*pb.WorkloadsStatus, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 *pb.WorkloadsStatus - if rf, ok := ret.Get(0).(func(context.Context, *pb.SetWorkloadsStatusOptions, ...grpc.CallOption) *pb.WorkloadsStatus); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*pb.WorkloadsStatus) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.SetWorkloadsStatusOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// WatchServiceStatus provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) WatchServiceStatus(ctx context.Context, in *pb.Empty, opts ...grpc.CallOption) (pb.CoreRPC_WatchServiceStatusClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_WatchServiceStatusClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.Empty, ...grpc.CallOption) pb.CoreRPC_WatchServiceStatusClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_WatchServiceStatusClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.Empty, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// WorkloadStatusStream provides a mock function with given fields: ctx, in, opts -func (_m *CoreRPCClient) WorkloadStatusStream(ctx context.Context, in *pb.WorkloadStatusStreamOptions, opts ...grpc.CallOption) (pb.CoreRPC_WorkloadStatusStreamClient, error) { - _va := make([]interface{}, len(opts)) - for _i := range opts { - _va[_i] = opts[_i] - } - var _ca []interface{} - _ca = append(_ca, ctx, in) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - var r0 pb.CoreRPC_WorkloadStatusStreamClient - if rf, ok := ret.Get(0).(func(context.Context, *pb.WorkloadStatusStreamOptions, ...grpc.CallOption) pb.CoreRPC_WorkloadStatusStreamClient); ok { - r0 = rf(ctx, in, opts...) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(pb.CoreRPC_WorkloadStatusStreamClient) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, *pb.WorkloadStatusStreamOptions, ...grpc.CallOption) error); ok { - r1 = rf(ctx, in, opts...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} diff --git a/selfmon/node.go b/selfmon/node.go new file mode 100644 index 0000000..95076fd --- /dev/null +++ b/selfmon/node.go @@ -0,0 +1,116 @@ +package selfmon + +import ( + "context" + "io" + "math/rand" + "time" + + "github.com/projecteru2/agent/types" + "github.com/projecteru2/agent/utils" + + log "github.com/sirupsen/logrus" +) + +func (m *Selfmon) initNodeStatus(ctx context.Context) { + log.Debugf("[selfmon] init node status started") + nodes := make(chan *types.Node) + + go func() { + defer close(nodes) + // Get all nodes which are active status, and regardless of pod. + var podNodes []*types.Node + var err error + utils.WithTimeout(ctx, m.config.GlobalConnectionTimeout, func(ctx context.Context) { + podNodes, err = m.store.ListPodNodes(ctx, true, "", nil) + }) + if err != nil { + log.Errorf("[selfmon] get pod nodes failed %v", err) + return + } + + for _, n := range podNodes { + log.Debugf("[selfmon] watched %s/%s", n.Name, n.Endpoint) + nodes <- n + } + }() + + for n := range nodes { + status, err := m.store.GetNodeStatus(ctx, n.Name) + if err != nil { + status = &types.NodeStatus{ + Nodename: n.Name, + Podname: n.Podname, + Alive: false, + } + } + m.dealNodeStatusMessage(ctx, status) + } +} + +func (m *Selfmon) watchNodeStatus(ctx context.Context) { + for { + select { + case <-ctx.Done(): + log.Infof("[selfmon] %v stop watching node status", m.id) + return + default: + time.Sleep(time.Second) + go m.initNodeStatus(ctx) + if m.watch(ctx) != nil { + log.Debugf("[selfmon] retry to watch node status") + time.Sleep(m.config.GlobalConnectionTimeout) + } + } + } +} + +func (m *Selfmon) watch(ctx context.Context) error { + messageChan, errChan := m.store.NodeStatusStream(ctx) + log.Debugf("[selfmon] watch node status started") + defer log.Debugf("[selfmon] stop watching node status") + + for { + select { + case message := <-messageChan: + go m.dealNodeStatusMessage(ctx, message) + case err := <-errChan: + if err == io.EOF { + log.Debugf("[selfmon] server closed the stream") + return err + } + log.Debugf("[selfmon] read node status failed, err: %s", err) + return err + } + } +} + +func (m *Selfmon) dealNodeStatusMessage(ctx context.Context, message *types.NodeStatus) { + if message.Error != nil { + log.Errorf("[selfmon] deal with node status stream message failed %+v", message) + return + } + + lastValue, ok := m.status.Get(message.Nodename) + if ok { + last, o := lastValue.(bool) + if o && last == message.Alive { + return + } + } + + // TODO maybe we need a distributed lock to control concurrency + var err error + utils.WithTimeout(ctx, m.config.GlobalConnectionTimeout, func(ctx context.Context) { + err = m.store.SetNode(ctx, message.Nodename, message.Alive) + }) + + if err != nil { + log.Errorf("[selfmon] set node %s failed %v", message.Nodename, err) + m.status.Delete(message.Nodename) + return + } + log.Debugf("[selfmon] set node %s as alive: %v", message.Nodename, message.Alive) + + m.status.Set(message.Nodename, message.Alive, time.Duration(300+rand.Intn(100))*time.Second) // nolint +} diff --git a/selfmon/register.go b/selfmon/register.go new file mode 100644 index 0000000..44c8011 --- /dev/null +++ b/selfmon/register.go @@ -0,0 +1,80 @@ +package selfmon + +import ( + "context" + "time" + + coretypes "github.com/projecteru2/core/types" + + "github.com/pkg/errors" + log "github.com/sirupsen/logrus" +) + +// WithActiveLock acquires the active lock synchronously +func (m *Selfmon) WithActiveLock(parentCtx context.Context, f func(ctx context.Context)) { + ctx, cancel := context.WithCancel(parentCtx) + defer cancel() + + var expiry <-chan struct{} + var unregister func() + defer func() { + if unregister != nil { + log.Infof("[Register] %v unregisters", m.id) + unregister() + } + }() + + for { + select { + case <-ctx.Done(): + log.Infof("[Register] context canceled") + return + case <-m.Exit(): + log.Infof("[Register] selfmon %v closed", m.id) + return + default: + } + + // try to get the lock + if ne, un, err := m.register(ctx); err != nil { + if errors.Is(err, context.Canceled) { + log.Infof("[Register] context canceled") + return + } else if !errors.Is(err, coretypes.ErrKeyExists) { + log.Errorf("[Register] failed to re-register: %v", err) + time.Sleep(time.Second) + continue + } + log.Infof("[Register] %v there has been another active selfmon", m.id) + time.Sleep(time.Second) + } else { + log.Infof("[Register] the agent %v has been active", m.id) + expiry = ne + unregister = un + break + } + } + + // cancel the ctx when: 1. selfmon closed 2. lost the active lock + go func() { + defer cancel() + + select { + case <-ctx.Done(): + log.Infof("[Register] context canceled") + return + case <-m.Exit(): + log.Infof("[Register] selfmon %v closed", m.id) + return + case <-expiry: + log.Infof("[Register] lock expired") + return + } + }() + + f(ctx) +} + +func (m *Selfmon) register(ctx context.Context) (<-chan struct{}, func(), error) { + return m.kv.StartEphemeral(ctx, ActiveKey, time.Second*16) +} diff --git a/selfmon/selfmon.go b/selfmon/selfmon.go index baa58d2..3ce0d80 100644 --- a/selfmon/selfmon.go +++ b/selfmon/selfmon.go @@ -2,23 +2,21 @@ package selfmon import ( "context" - "io" "os/signal" "sync" "syscall" "time" + "github.com/projecteru2/agent/common" + "github.com/projecteru2/agent/store" corestore "github.com/projecteru2/agent/store/core" + storemocks "github.com/projecteru2/agent/store/mocks" "github.com/projecteru2/agent/types" - "github.com/projecteru2/agent/utils" - pb "github.com/projecteru2/core/rpc/gen" coremeta "github.com/projecteru2/core/store/etcdv3/meta" - coretypes "github.com/projecteru2/core/types" + "github.com/patrickmn/go-cache" "github.com/pkg/errors" log "github.com/sirupsen/logrus" - "go.etcd.io/etcd/api/v3/mvccpb" - etcdtypes "go.etcd.io/etcd/client/v3" ) // ActiveKey . @@ -27,10 +25,10 @@ const ActiveKey = "/selfmon/active" // Selfmon . type Selfmon struct { config *types.Config - status sync.Map - rpc corestore.RPCClientPool - etcd coremeta.KV - active utils.AtomicBool + status *cache.Cache + store store.Store + kv coremeta.KV + id int64 exit struct { sync.Once @@ -42,303 +40,91 @@ type Selfmon struct { func New(ctx context.Context, config *types.Config) (mon *Selfmon, err error) { mon = &Selfmon{} mon.config = config + mon.status = cache.New(time.Minute*5, time.Minute*15) mon.exit.C = make(chan struct{}, 1) - if mon.etcd, err = coremeta.NewETCD(config.Etcd, nil); err != nil { - return - } + mon.id = time.Now().UnixNano() / 1000 % 10000 - if mon.rpc, err = corestore.NewCoreRPCClientPool(ctx, mon.config); err != nil { - log.Errorf("[selfmon] no core rpc connection") - return + switch config.KV { + case common.ETCDKV: + if mon.kv, err = coremeta.NewETCD(config.Etcd, nil); err != nil { + log.Errorf("[selfmon] failed to get etcd client, err: %s", err) + return nil, err + } + case common.MocksKV: + log.Debugf("[selfmon] use embedded ETCD") + mon.kv = nil + default: + return nil, errors.New("unknown kv type") } - return -} - -// Exit . -func (m *Selfmon) Exit() <-chan struct{} { - return m.exit.C -} - -// Close . -func (m *Selfmon) Close() { - m.exit.Do(func() { - close(m.exit.C) - }) -} + switch config.Store { + case common.GRPCStore: + corestore.Init(ctx, config) + mon.store = corestore.Get() + if mon.store == nil { + log.Errorf("[selfmon] failed to get core store") + return nil, errors.New("failed to get core store") + } + case common.MocksStore: + mon.store = storemocks.FromTemplate() + default: + return nil, errors.New("unknown store type") + } -// Reload . -func (m *Selfmon) Reload() error { - return nil + return mon, nil } -// Run . -func (m *Selfmon) Run(ctx context.Context) { - ctx, cancel := context.WithCancel(ctx) - defer cancel() - +// Monitor . +func (m *Selfmon) Monitor(ctx context.Context) { go m.watchNodeStatus(ctx) - - <-m.Exit() - log.Warnf("[selfmon] exit from %p main loop", m) + log.Infof("[selfmon] selfmon %v is running", m.id) + <-ctx.Done() + log.Warnf("[selfmon] m %v monitor stops", m.id) } -func (m *Selfmon) initNodeStatus(ctx context.Context) { - nodes := make(chan *pb.Node) - - go func() { - defer close(nodes) - // Get all nodes which are active status, and regardless of pod. - cctx, cancel := context.WithTimeout(ctx, m.config.GlobalConnectionTimeout) - defer cancel() - podNodes, err := m.rpc.GetClient().ListPodNodes(cctx, &pb.ListNodesOptions{All: true}) - if err != nil { - log.Errorf("[selfmon] get pod nodes from %s failed %v", m.config.Core, err) - return - } - - for _, n := range podNodes.Nodes { - log.Debugf("[selfmon] watched %s/%s", n.Name, n.Endpoint) - nodes <- n - } - }() - - for n := range nodes { - status, err := m.rpc.GetClient().GetNodeStatus(ctx, &pb.GetNodeStatusOptions{Nodename: n.Name}) - fakeMessage := &pb.NodeStatusStreamMessage{ - Nodename: n.Name, - Podname: n.Podname, - } - fakeMessage.Alive = !(err != nil || status == nil) && status.Alive - m.dealNodeStatusMessage(ctx, fakeMessage) - } -} +// Run . +func (m *Selfmon) Run(ctx context.Context) error { + go m.handleSignals(ctx) -func (m *Selfmon) watchNodeStatus(ctx context.Context) { for { select { case <-ctx.Done(): - log.Infof("[selfmon] stop watching node status") - return + return nil + case <-m.Exit(): + return nil default: - go m.initNodeStatus(ctx) - if m.watch(ctx) != nil { - log.Debugf("[selfmon] retry to watch node status") - time.Sleep(m.config.GlobalConnectionTimeout) - } + m.WithActiveLock(ctx, func(ctx context.Context) { + m.Monitor(ctx) + }) } } } -func (m *Selfmon) watch(ctx context.Context) error { - client, err := m.rpc.GetClient().NodeStatusStream(ctx, &pb.Empty{}) - if err != nil { - log.Errorf("[selfmon] watch node status failed %v", err) - return err - } - log.Debugf("[selfmon] watch node status started") - defer log.Debugf("[selfmon] stop watching node status") - - for { - message, err := client.Recv() - if err == io.EOF { - log.Debugf("[selfmon] server closed the stream") - return err - } - if err != nil { - log.Errorf("[selfmon] read node events failed %v", err) - return err - } - go m.dealNodeStatusMessage(ctx, message) - } -} - -func (m *Selfmon) dealNodeStatusMessage(ctx context.Context, message *pb.NodeStatusStreamMessage) { - if message.Error != "" { - log.Errorf("[selfmon] deal with node status stream message failed %v", message.Error) - return - } - - lastValue, ok := m.status.Load(message.Nodename) - if ok { - last, o := lastValue.(bool) - if o && last == message.Alive { - return - } - } - - var opt pb.TriOpt - if message.Alive { - opt = pb.TriOpt_TRUE - } else { - opt = pb.TriOpt_FALSE - } - - // TODO maybe we need a distributed lock to control concurrency - ctx, cancel := context.WithTimeout(ctx, m.config.GlobalConnectionTimeout) - defer cancel() - if _, err := m.rpc.GetClient().SetNode(ctx, &pb.SetNodeOptions{ - Nodename: message.Nodename, - StatusOpt: opt, - WorkloadsDown: !message.Alive, - }); err != nil { - log.Errorf("[selfmon] set node %s failed %v", message.Nodename, err) - return - } - - m.status.Store(message.Nodename, message.Alive) -} - -// Register . -func (m *Selfmon) Register(ctx context.Context) (func(), error) { - ctx, cancel := context.WithCancel(ctx) - del := make(chan struct{}, 1) - - var wg sync.WaitGroup - wg.Add(1) - // Watching the active key permanently. - go func() { - defer wg.Done() - defer close(del) - - handleResp := func(resp etcdtypes.WatchResponse) { - if err := resp.Err(); err != nil { - if resp.Canceled { - log.Infof("[Register] watching is canceled") - return - } - log.Errorf("[Register] watch failed: %v", err) - time.Sleep(time.Second) - return - } - - for _, ev := range resp.Events { - if ev.Type == mvccpb.DELETE { - select { - case del <- struct{}{}: - case <-ctx.Done(): - return - } - } - } - } - - for { - select { - case <-ctx.Done(): - log.Infof("[Register] watching done") - return - case resp := <-m.etcd.Watch(ctx, ActiveKey): - handleResp(resp) - } - } - }() - - wg.Add(1) - // Always trying to register if the selfmon is alive. - go func() { - var expiry <-chan struct{} - unregister := func() {} - - defer func() { - m.active.Unset() - unregister() - wg.Done() - }() - - for { - m.active.Unset() - - // We have to put a single <-ctx.Done() here to avoid it may be starved - // while it combines with <-expiry and <-del. - select { - case <-ctx.Done(): - log.Infof("[Register] register done: %v", ctx.Err()) - return - default: - } - - if ne, un, err := m.register(ctx); err != nil { - if !errors.Is(err, coretypes.ErrKeyExists) { - log.Errorf("[Register] failed to re-register: %v", err) - time.Sleep(time.Second) - continue - } - log.Infof("[Register] there has been another active selfmon") - } else { - log.Infof("[Register] the agent has been active") - expiry = ne - unregister = un - m.active.Set() - } - - // Though there's a standalone <-ctx.Done() above, we still need <-ctx.Done() - // in this select block to make sure the select could be terminated - // once the ctx is done during hang together. - select { - case <-ctx.Done(): - log.Infof("[Register] register done: %v", ctx.Err()) - return - case <-expiry: - log.Infof("[Register] the original active selfmon has been expired") - case <-del: - log.Infof("[Register] The original active Selfmon is terminated") - } - } - }() - - return func() { - cancel() - wg.Wait() - }, nil +// Exit . +func (m *Selfmon) Exit() <-chan struct{} { + return m.exit.C } -func (m *Selfmon) register(ctx context.Context) (<-chan struct{}, func(), error) { - ctx, cancel := context.WithTimeout(ctx, m.config.GlobalConnectionTimeout*2) - defer cancel() - return m.etcd.StartEphemeral(ctx, ActiveKey, time.Second*16) +// Close . +func (m *Selfmon) Close() { + m.exit.Do(func() { + close(m.exit.C) + }) } -// Monitor . -func Monitor(ctx context.Context, config *types.Config) error { - mon, err := New(ctx, config) - if err != nil { - return err - } - - unregister, err := mon.Register(ctx) - if err != nil { - return err - } - defer unregister() - - var wg sync.WaitGroup - wg.Add(2) - go func() { - defer wg.Done() - handleSignals(ctx, mon) - }() - - go func() { - defer wg.Done() - mon.Run(ctx) - }() - - log.Infof("[selfmon] selfmon %p is running", mon) - wg.Wait() - - log.Infof("[selfmon] selfmon %p is terminated", mon) +// Reload . +func (m *Selfmon) Reload() error { return nil } // handleSignals . -func handleSignals(ctx context.Context, mon *Selfmon) { +func (m *Selfmon) handleSignals(ctx context.Context) { var reloadCtx context.Context var cancel1 context.CancelFunc defer func() { - log.Warnf("[selfmon] %p signals handler exit", mon) + log.Warnf("[selfmon] %v signals handler exit", m.id) cancel1() - mon.Close() + m.Close() }() reloadCtx, cancel1 = signal.NotifyContext(ctx, syscall.SIGHUP, syscall.SIGUSR2) @@ -347,16 +133,16 @@ func handleSignals(ctx context.Context, mon *Selfmon) { for { select { - case <-mon.Exit(): - log.Warnf("[selfmon] recv from mon %p exit ch", mon) + case <-m.Exit(): + log.Warnf("[selfmon] recv from m %v exit ch", m.id) return case <-exitCtx.Done(): log.Warn("[selfmon] recv signal to exit") return case <-reloadCtx.Done(): log.Warn("[selfmon] recv signal to reload") - if err := mon.Reload(); err != nil { - log.Errorf("[selfmon] reload %p failed %v", mon, err) + if err := m.Reload(); err != nil { + log.Errorf("[selfmon] reload %v failed %v", m.id, err) } reloadCtx, cancel1 = signal.NotifyContext(ctx, syscall.SIGHUP, syscall.SIGUSR2) } diff --git a/selfmon/selfmon_test.go b/selfmon/selfmon_test.go index aa72f7c..986f76f 100644 --- a/selfmon/selfmon_test.go +++ b/selfmon/selfmon_test.go @@ -2,105 +2,136 @@ package selfmon import ( "context" - "fmt" - "sync" "testing" "time" - "github.com/projecteru2/agent/selfmon/mocks" + "github.com/projecteru2/agent/common" storemocks "github.com/projecteru2/agent/store/mocks" "github.com/projecteru2/agent/types" - pb "github.com/projecteru2/core/rpc/gen" - coremeta "github.com/projecteru2/core/store/etcdv3/meta" + "github.com/projecteru2/core/store/etcdv3/meta" coretypes "github.com/projecteru2/core/types" - "github.com/stretchr/testify/mock" - "github.com/stretchr/testify/require" + "github.com/stretchr/testify/assert" ) +func newMockSelfmon(t *testing.T, withETCD bool) *Selfmon { + ctx := context.Background() + config := &types.Config{ + HostName: "fake", + Store: common.MocksStore, + Runtime: common.MocksRuntime, + KV: common.MocksKV, + Log: types.LogConfig{ + Stdout: true, + }, + Etcd: coretypes.EtcdConfig{ + Machines: []string{"127.0.0.1:2379"}, + Prefix: "/selfmon-agent", + LockPrefix: "__lock__/selfmon-agent", + }, + GlobalConnectionTimeout: 5 * time.Second, + } + + m, err := New(ctx, config) + assert.Nil(t, err) + + if withETCD { + etcd, err := meta.NewETCD(config.Etcd, t) + assert.Nil(t, err) + m.kv = etcd + } + + return m +} + func TestCloseTwice(t *testing.T) { - m, cancel := newTestSelfmon(t) - defer cancel() - m.rpc.GetClient().(*mocks.CoreRPCClient).On("ListPodNodes", mock.Anything, mock.Anything).Return(&pb.Nodes{}, nil) + m := newMockSelfmon(t, false) + defer m.Close() m.Close() m.Close() <-m.Exit() } -func TestRun(t *testing.T) { - m, cancel := newTestSelfmon(t) - - rpc, ok := m.rpc.GetClient().(*mocks.CoreRPCClient) - require.True(t, ok) - rpc.On("ListPodNodes", mock.Anything, mock.Anything).Return(&pb.Nodes{ - Nodes: []*pb.Node{ - { - Name: "foo", - Endpoint: "host:port", - }, - }, - }, nil).Once() - rpc.On("ListPodNodes", mock.Anything, mock.Anything).Return(&pb.Nodes{}, nil) - rpc.On("SetNode", mock.Anything, mock.Anything).Return(&pb.Node{}, nil) - defer rpc.AssertExpectations(t) +func TestEmbeddedETCD(t *testing.T) { + etcd, err := meta.NewETCD(coretypes.EtcdConfig{ + Machines: []string{"127.0.0.1:2379"}, + Prefix: "/selfmon-agent", + LockPrefix: "__lock__/selfmon-agent", + }, t) + assert.Nil(t, err) - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - m.Run(context.TODO()) - }() + ctx := context.Background() - // Makes it as an active selfmon. - m.active.Set() - time.Sleep(time.Second) + _, un, err := etcd.StartEphemeral(ctx, "/test/key", 1*time.Second) + assert.Nil(t, err) + time.Sleep(5 * time.Second) + un() - cancel() - wg.Wait() + _, _, err = etcd.StartEphemeral(ctx, "/test/key", 1*time.Second) + assert.Nil(t, err) } -func TestRegister(t *testing.T) { - m, cancel := newTestSelfmon(t) - defer cancel() - ctx := context.TODO() - - unregister0, err := m.Register(ctx) - require.NoError(t, err) - require.NotNil(t, unregister0) +func TestRegisterTwice(t *testing.T) { + m1 := newMockSelfmon(t, false) + m2 := newMockSelfmon(t, false) + defer m1.Close() + defer m2.Close() + + // make sure m1 and m2 are using the same embedded ETCD + etcd, err := meta.NewETCD(coretypes.EtcdConfig{ + Machines: []string{"127.0.0.1:2379"}, + Prefix: "/selfmon-agent", + LockPrefix: "__lock__/selfmon-agent", + }, t) + assert.Nil(t, err) + + m1.kv = etcd + m2.kv = etcd + + ctx := context.Background() + i := 0 + + go m1.WithActiveLock(ctx, func(ctx context.Context) { + i = 1 + time.Sleep(3 * time.Second) + }) + time.Sleep(time.Second) + go m2.WithActiveLock(ctx, func(ctx context.Context) { + i = 2 + }) + assert.Equal(t, i, 1) + time.Sleep(5 * time.Second) + assert.Equal(t, i, 2) +} - unregister1, err := m.Register(ctx) - require.NoError(t, err) - require.NotNil(t, unregister1) +func TestRun(t *testing.T) { + m := newMockSelfmon(t, true) + defer m.Close() - unregister0() + store := m.store.(*storemocks.MockStore) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() - time.Sleep(time.Second) - unregister1() -} + // set node "fake" as alive + assert.Nil(t, store.SetNodeStatus(ctx, 0)) -func newTestSelfmon(t *testing.T) (*Selfmon, func()) { - config := &types.Config{ - Etcd: coretypes.EtcdConfig{ - Machines: []string{"127.0.0.1:2379"}, - Prefix: "/selfmon-agent", - LockPrefix: "__lock__/selfmon-agent", - }, - } + go func() { + assert.Nil(t, m.Run(ctx)) + }() + time.Sleep(2 * time.Second) - m := &Selfmon{} - m.config = config - m.exit.C = make(chan struct{}, 1) - m.rpc = &storemocks.RPCClientPool{} + node, _ := store.GetNode(ctx, "fake") + assert.Equal(t, node.Available, true) + node, _ = store.GetNode(ctx, "faker") + assert.Equal(t, node.Available, false) - rpcClient := &mocks.CoreRPCClient{} - rpcClient.On("NodeStatusStream", mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock")) - rpcClient.On("GetNodeStatus", mock.Anything, mock.Anything).Return(nil, fmt.Errorf("mock")) - m.rpc.(*storemocks.RPCClientPool).On("GetClient").Return(rpcClient) + go store.StartNodeStatusStream() + time.Sleep(2 * time.Second) - // Uses an embedded one instead of the real one. - etcd, err := coremeta.NewETCD(config.Etcd, t) - require.NoError(t, err) - m.etcd = etcd + node, _ = store.GetNode(ctx, "fake") + assert.Equal(t, node.Available, false) + node, _ = store.GetNode(ctx, "faker") + assert.Equal(t, node.Available, true) - return m, m.Close + m.Close() } diff --git a/watcher/log.go b/watcher/log.go deleted file mode 100644 index 57c50ec..0000000 --- a/watcher/log.go +++ /dev/null @@ -1,69 +0,0 @@ -package watcher - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/sirupsen/logrus" - - "github.com/projecteru2/agent/types" -) - -// Watcher indicate watcher -type Watcher struct { - consumer map[string]map[string]*types.LogConsumer - LogC chan *types.Log - ConsumerC chan *types.LogConsumer -} - -// LogMonitor indicate log monitor -var LogMonitor *Watcher - -// InitMonitor init a monitor -func InitMonitor() { - LogMonitor = &Watcher{} - LogMonitor.consumer = map[string]map[string]*types.LogConsumer{} - LogMonitor.LogC = make(chan *types.Log) - LogMonitor.ConsumerC = make(chan *types.LogConsumer) -} - -// Serve start monitor -func (w *Watcher) Serve(ctx context.Context) { - logrus.Info("[logServe] Log monitor started") - defer logrus.Info("[logServe] Log monitor stopped") - for { - select { - case <-ctx.Done(): - return - case log := <-w.LogC: - if consumers, ok := w.consumer[log.Name]; ok { - data, err := json.Marshal(log) - if err != nil { - logrus.Error(err) - break - } - line := fmt.Sprintf("%X\r\n%s\r\n\r\n", len(data)+2, string(data)) - for ID, consumer := range consumers { - if _, err := consumer.Buf.WriteString(line); err != nil { - logrus.Error(err) - logrus.Infof("%s %s log detached", consumer.App, consumer.ID) - consumer.Conn.Close() - delete(consumers, ID) - if len(w.consumer[log.Name]) == 0 { - delete(w.consumer, log.Name) - } - } - consumer.Buf.Flush() - } - } - case consumer := <-w.ConsumerC: - if consumers, ok := w.consumer[consumer.App]; ok { - consumers[consumer.ID] = consumer - } else { - w.consumer[consumer.App] = map[string]*types.LogConsumer{} - w.consumer[consumer.App][consumer.ID] = consumer - } - } - } -} From 61028537499e2043a14b7e281f7187bf2d5901d2 Mon Sep 17 00:00:00 2001 From: DuodenumL Date: Wed, 8 Sep 2021 15:47:58 +0800 Subject: [PATCH 6/6] Add "HAKeepaliveInterval" in config --- agent.go | 6 +++--- agent.yaml.sample | 5 +++++ api/http.go | 2 +- manager/workload/log.go | 4 ++-- manager/workload/log_test.go | 2 +- manager/workload/manager.go | 4 ++-- selfmon/register.go | 2 +- selfmon/selfmon_test.go | 1 + types/config.go | 1 + types/config_test.go | 4 ++++ 10 files changed, 21 insertions(+), 10 deletions(-) diff --git a/agent.go b/agent.go index 40a4138..208461c 100644 --- a/agent.go +++ b/agent.go @@ -121,7 +121,7 @@ func main() { }, &cli.StringFlag{ Name: "store", - Value: "grpc", + Value: "", Usage: "store type", EnvVars: []string{"ERU_AGENT_STORE"}, }, @@ -145,7 +145,7 @@ func main() { }, &cli.StringFlag{ Name: "runtime", - Value: "docker", + Value: "", Usage: "runtime type", EnvVars: []string{"ERU_AGENT_RUNTIME"}, }, @@ -224,7 +224,7 @@ func main() { }, &cli.StringFlag{ Name: "kv", - Value: "etcd", + Value: "", Usage: "kv type", EnvVars: []string{"ERU_AGENT_KV"}, }, diff --git a/agent.yaml.sample b/agent.yaml.sample index 8eb92e5..140e548 100644 --- a/agent.yaml.sample +++ b/agent.yaml.sample @@ -123,6 +123,11 @@ healthcheck: # The default value is "5s", note that "s" in the end. global_connection_timeout: 15s +# ha_keepalive_interval defines the time interval for sending heartbeat +# when selfmon maintains its own active state. +# The default value is "16s", note that "s" in the end. +ha_keepalive_interval: 16s + # etcd defines the etcd configuration. # This option is required and has no default value. # If you don't plan to run this eru-agent in selfmon mode, diff --git a/api/http.go b/api/http.go index c0d04ee..f791433 100644 --- a/api/http.go +++ b/api/http.go @@ -59,7 +59,7 @@ func (h *Handler) log(w http.ResponseWriter, req *http.Request) { return } defer conn.Close() - h.workloadManager.Subscribe(app, buf) + h.workloadManager.Subscribe(req.Context(), app, buf) } } diff --git a/manager/workload/log.go b/manager/workload/log.go index cb1ee80..96b26ca 100644 --- a/manager/workload/log.go +++ b/manager/workload/log.go @@ -31,13 +31,13 @@ func newLogBroadcaster() *logBroadcaster { } // subscribe subscribes logs of the specific app. -func (l *logBroadcaster) subscribe(app string, buf *bufio.ReadWriter) { +func (l *logBroadcaster) subscribe(ctx context.Context, app string, buf *bufio.ReadWriter) { if _, ok := l.subscribers[app]; !ok { l.subscribers[app] = map[string]*subscriber{} } ID := coreutils.RandomString(8) - ctx, cancel := context.WithCancel(context.TODO()) + ctx, cancel := context.WithCancel(ctx) defer cancel() l.subscribers[app][ID] = &subscriber{buf, cancel} diff --git a/manager/workload/log_test.go b/manager/workload/log_test.go index 5a77f6e..7f68d4a 100644 --- a/manager/workload/log_test.go +++ b/manager/workload/log_test.go @@ -32,7 +32,7 @@ func TestLogBroadcaster(t *testing.T) { return } defer conn.Close() - l.subscribe(app, buf) + l.subscribe(context.TODO(), app, buf) } } diff --git a/manager/workload/manager.go b/manager/workload/manager.go index 2813ee4..96e5706 100644 --- a/manager/workload/manager.go +++ b/manager/workload/manager.go @@ -119,6 +119,6 @@ func (m *Manager) Run(ctx context.Context) error { } // Subscribe subscribes logs -func (m *Manager) Subscribe(app string, buf *bufio.ReadWriter) { - m.logBroadcaster.subscribe(app, buf) +func (m *Manager) Subscribe(ctx context.Context, app string, buf *bufio.ReadWriter) { + m.logBroadcaster.subscribe(ctx, app, buf) } diff --git a/selfmon/register.go b/selfmon/register.go index 44c8011..48c31a6 100644 --- a/selfmon/register.go +++ b/selfmon/register.go @@ -76,5 +76,5 @@ func (m *Selfmon) WithActiveLock(parentCtx context.Context, f func(ctx context.C } func (m *Selfmon) register(ctx context.Context) (<-chan struct{}, func(), error) { - return m.kv.StartEphemeral(ctx, ActiveKey, time.Second*16) + return m.kv.StartEphemeral(ctx, ActiveKey, m.config.HAKeepaliveInterval) } diff --git a/selfmon/selfmon_test.go b/selfmon/selfmon_test.go index 986f76f..b2ba16a 100644 --- a/selfmon/selfmon_test.go +++ b/selfmon/selfmon_test.go @@ -30,6 +30,7 @@ func newMockSelfmon(t *testing.T, withETCD bool) *Selfmon { LockPrefix: "__lock__/selfmon-agent", }, GlobalConnectionTimeout: 5 * time.Second, + HAKeepaliveInterval: 16 * time.Second, } m, err := New(ctx, config) diff --git a/types/config.go b/types/config.go index 42ed778..2fd8f36 100644 --- a/types/config.go +++ b/types/config.go @@ -65,6 +65,7 @@ type Config struct { Etcd coretypes.EtcdConfig `yaml:"etcd"` GlobalConnectionTimeout time.Duration `yaml:"global_connection_timeout" default:"5s"` + HAKeepaliveInterval time.Duration `yaml:"ha_keepalive_interval" default:"16s"` } // GetHealthCheckStatusTTL returns the TTL for health check status. diff --git a/types/config_test.go b/types/config_test.go index dfe909d..51d062e 100644 --- a/types/config_test.go +++ b/types/config_test.go @@ -2,6 +2,7 @@ package types import ( "testing" + "time" "github.com/jinzhu/configor" "github.com/stretchr/testify/assert" @@ -27,4 +28,7 @@ func TestLoadConfig(t *testing.T) { assert.Equal(config.Store, "grpc") assert.Equal(config.Runtime, "docker") assert.Equal(config.KV, "etcd") + + assert.Equal(config.GlobalConnectionTimeout, time.Second * 15) + assert.Equal(config.HAKeepaliveInterval, time.Second * 16) }