Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add detailed timeout options #81

Merged
merged 9 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 21 additions & 18 deletions cage.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"time"

"github.com/loilo-inc/canarycage/awsiface"
"github.com/loilo-inc/canarycage/timeout"
)

type Cage interface {
Expand All @@ -19,33 +20,35 @@ type Time interface {
}

type cage struct {
Env *Envars
Ecs awsiface.EcsClient
Alb awsiface.AlbClient
Ec2 awsiface.Ec2Client
Time Time
MaxWait time.Duration
*Input
Timeout timeout.Manager
}

type Input struct {
Env *Envars
ECS awsiface.EcsClient
ALB awsiface.AlbClient
EC2 awsiface.Ec2Client
Time Time
MaxWait time.Duration
Env *Envars
Ecs awsiface.EcsClient
Alb awsiface.AlbClient
Ec2 awsiface.Ec2Client
Time Time
}

func NewCage(input *Input) Cage {
if input.Time == nil {
input.Time = &timeImpl{}
}
taskRunningWait := (time.Duration)(input.Env.CanaryTaskRunningWait) * time.Second
taskHealthCheckWait := (time.Duration)(input.Env.CanaryTaskHealthCheckWait) * time.Second
taskStoppedWait := (time.Duration)(input.Env.CanaryTaskStoppedWait) * time.Second
serviceStableWait := (time.Duration)(input.Env.ServiceStableWait) * time.Second
return &cage{
Env: input.Env,
Ecs: input.ECS,
Alb: input.ALB,
Ec2: input.EC2,
Time: input.Time,
MaxWait: 15 * time.Minute,
Input: input,
Timeout: timeout.NewManager(
15*time.Minute,
&timeout.Input{
TaskRunningWait: taskRunningWait,
TaskHealthCheckWait: taskHealthCheckWait,
TaskStoppedWait: taskStoppedWait,
ServiceStableWait: serviceStableWait,
}),
}
}
74 changes: 55 additions & 19 deletions canary_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import (
"context"
"fmt"
"strconv"
"time"

"github.com/apex/log"
Expand Down Expand Up @@ -71,7 +72,7 @@
if err := ecs.NewTasksRunningWaiter(c.Ecs).Wait(ctx, &ecs.DescribeTasksInput{
Cluster: &c.Env.Cluster,
Tasks: []string{*c.taskArn},
}, c.MaxWait); err != nil {
}, c.Timeout.TaskRunning()); err != nil {
return err
}
log.Infof("🐣 canary task '%s' is running!", *c.taskArn)
Expand Down Expand Up @@ -134,8 +135,11 @@
containerHasHealthChecks[*definition.Name] = struct{}{}
}
}
for count := 0; count < 10; count++ {
<-c.Time.NewTimer(time.Duration(15) * time.Second).C
healthCheckWait := c.Timeout.TaskHealthCheck()
healthCheckPeriod := 15 * time.Second
countPerPeriod := int(healthCheckWait.Seconds() / 15)
for count := 0; count < countPerPeriod; count++ {
<-c.Time.NewTimer(healthCheckPeriod).C
log.Infof("canary task '%s' waits until %d container(s) become healthy", *c.taskArn, len(containerHasHealthChecks))
if o, err := c.Ecs.DescribeTasks(ctx, &ecs.DescribeTasksInput{
Cluster: &c.Env.Cluster,
Expand Down Expand Up @@ -306,10 +310,36 @@
}
}

func (c *CanaryTask) targetDeregistrationDelay(ctx context.Context) (time.Duration, error) {
deregistrationDelay := 300 * time.Second
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

deregistration_delay.timeout_seconds
The amount of time for Elastic Load Balancing to wait before deregistering a target. The range is 0–3600 seconds. The default value is 300 seconds.
https://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-target-groups.html

Nice 👍

if o, err := c.Alb.DescribeTargetGroupAttributes(ctx, &elbv2.DescribeTargetGroupAttributesInput{
TargetGroupArn: c.target.targetGroupArn,
}); err != nil {
return deregistrationDelay, err

Check warning on line 318 in canary_task.go

View check run for this annotation

Codecov / codecov/patch

canary_task.go#L318

Added line #L318 was not covered by tests
} else {
// find deregistration_delay.timeout_seconds
for _, attr := range o.Attributes {
if *attr.Key == "deregistration_delay.timeout_seconds" {
if value, err := strconv.ParseInt(*attr.Value, 10, 64); err != nil {
return deregistrationDelay, err

Check warning on line 324 in canary_task.go

View check run for this annotation

Codecov / codecov/patch

canary_task.go#L324

Added line #L324 was not covered by tests
} else {
deregistrationDelay = time.Duration(value) * time.Second
}
}
}
}
return deregistrationDelay, nil
}

func (c *CanaryTask) Stop(ctx context.Context) error {
if c.target == nil {
log.Info("no load balancer is attached to service. Skip deregisteration.")
} else {
deregistrationDelay, err := c.targetDeregistrationDelay(ctx)
if err != nil {
log.Errorf("failed to get deregistration delay: %v", err)
log.Errorf("deregistration delay is set to %d seconds", deregistrationDelay)

Check warning on line 341 in canary_task.go

View check run for this annotation

Codecov / codecov/patch

canary_task.go#L340-L341

Added lines #L340 - L341 were not covered by tests
}
log.Infof("deregistering the canary task from target group '%s'...", c.target.targetId)
if _, err := c.Alb.DeregisterTargets(ctx, &elbv2.DeregisterTargetsInput{
TargetGroupArn: c.target.targetGroupArn,
Expand All @@ -319,22 +349,28 @@
Port: c.target.targetPort,
}},
}); err != nil {
return err
}
if err := elbv2.NewTargetDeregisteredWaiter(c.Alb).Wait(ctx, &elbv2.DescribeTargetHealthInput{
TargetGroupArn: c.target.targetGroupArn,
Targets: []elbv2types.TargetDescription{{
AvailabilityZone: c.target.availabilityZone,
Id: c.target.targetId,
Port: c.target.targetPort,
}},
}, c.MaxWait); err != nil {
return err
log.Errorf("failed to deregister the canary task from target group: %v", err)
log.Errorf("continuing to stop the canary task...")

Check warning on line 353 in canary_task.go

View check run for this annotation

Codecov / codecov/patch

canary_task.go#L352-L353

Added lines #L352 - L353 were not covered by tests
} else {
log.Infof("deregister operation accepted. waiting for the canary task to be deregistered...")
deregisterWait := deregistrationDelay + time.Minute // add 1 minute for safety
if err := elbv2.NewTargetDeregisteredWaiter(c.Alb).Wait(ctx, &elbv2.DescribeTargetHealthInput{
TargetGroupArn: c.target.targetGroupArn,
Targets: []elbv2types.TargetDescription{{
AvailabilityZone: c.target.availabilityZone,
Id: c.target.targetId,
Port: c.target.targetPort,
}},
}, deregisterWait); err != nil {
log.Errorf("failed to wait for the canary task deregistered from target group: %v", err)
log.Errorf("continuing to stop the canary task...")

Check warning on line 366 in canary_task.go

View check run for this annotation

Codecov / codecov/patch

canary_task.go#L365-L366

Added lines #L365 - L366 were not covered by tests
} else {
log.Infof(
"canary task '%s' has successfully been deregistered from target group '%s'",
*c.taskArn, *c.target.targetId,
)
}
}
log.Infof(
"canary task '%s' has successfully been deregistered from target group '%s'",
*c.taskArn, c.target.targetId,
)
}
log.Infof("stopping the canary task '%s'...", *c.taskArn)
if _, err := c.Ecs.StopTask(ctx, &ecs.StopTaskInput{
Expand All @@ -346,7 +382,7 @@
if err := ecs.NewTasksStoppedWaiter(c.Ecs).Wait(ctx, &ecs.DescribeTasksInput{
Cluster: &c.Env.Cluster,
Tasks: []string{*c.taskArn},
}, c.MaxWait); err != nil {
}, c.Timeout.TaskStopped()); err != nil {
return err
}
log.Infof("canary task '%s' has successfully been stopped", *c.taskArn)
Expand Down
6 changes: 3 additions & 3 deletions cli/cage/commands/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@
}
cagecli := cage.NewCage(&cage.Input{
Env: envars,
ECS: ecs.NewFromConfig(conf),
EC2: ec2.NewFromConfig(conf),
ALB: elasticloadbalancingv2.NewFromConfig(conf),
Ecs: ecs.NewFromConfig(conf),
Ec2: ec2.NewFromConfig(conf),
Alb: elasticloadbalancingv2.NewFromConfig(conf),

Check warning on line 45 in cli/cage/commands/command.go

View check run for this annotation

Codecov / codecov/patch

cli/cage/commands/command.go#L43-L45

Added lines #L43 - L45 were not covered by tests
})
return cagecli, nil
}
Expand Down
46 changes: 45 additions & 1 deletion cli/cage/commands/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,52 @@ func CanaryTaskIdleDurationFlag(dest *int) *cli.IntFlag {
return &cli.IntFlag{
Name: "canaryTaskIdleDuration",
EnvVars: []string{cage.CanaryTaskIdleDuration},
Usage: "Idle duration seconds for ensuring canary task that has no attached load balancer",
Usage: "duration seconds for waiting canary task that isn't attached to target group considered as ready for serving traffic",
Destination: dest,
Value: 10,
}
}

func TaskRunningWaitFlag(dest *int) *cli.IntFlag {
return &cli.IntFlag{
Name: "taskRunningTimeout",
EnvVars: []string{cage.TaskRunningTimeout},
Usage: "max duration seconds for waiting canary task running",
Destination: dest,
Category: "ADVANCED",
Value: 900, // 15 minutes
}
}

func TaskHealthCheckWaitFlag(dest *int) *cli.IntFlag {
return &cli.IntFlag{
Name: "taskHealthCheckTimeout",
EnvVars: []string{cage.TaskHealthCheckTimeout},
Usage: "max duration seconds for waiting canary task health check",
Destination: dest,
Category: "ADVANCED",
Value: 900,
}
}

func TaskStoppedWaitFlag(dest *int) *cli.IntFlag {
return &cli.IntFlag{
Name: "taskStoppedTimeout",
EnvVars: []string{cage.TaskStoppedTimeout},
Usage: "max duration seconds for waiting canary task stopped",
Destination: dest,
Category: "ADVANCED",
Value: 900,
}
}

func ServiceStableWaitFlag(dest *int) *cli.IntFlag {
return &cli.IntFlag{
Name: "serviceStableTimeout",
EnvVars: []string{cage.ServiceStableTimeout},
Usage: "max duration seconds for waiting service stable",
Destination: dest,
Category: "ADVANCED",
Value: 900,
}
}
4 changes: 4 additions & 0 deletions cli/cage/commands/rollout.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ func (c *CageCommands) RollOut(
Usage: "Update service configurations except for task definiton. Default is false.",
Destination: &updateServiceConf,
},
TaskRunningWaitFlag(&envars.CanaryTaskRunningWait),
TaskHealthCheckWaitFlag(&envars.CanaryTaskHealthCheckWait),
TaskStoppedWaitFlag(&envars.CanaryTaskStoppedWait),
ServiceStableWaitFlag(&envars.ServiceStableWait),
},
Action: func(ctx *cli.Context) error {
dir, _, err := c.requireArgs(ctx, 1, 1)
Expand Down
2 changes: 2 additions & 0 deletions cli/cage/commands/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ func (c *CageCommands) Run(
Flags: []cli.Flag{
RegionFlag(&envars.Region),
ClusterFlag(&envars.Cluster),
TaskRunningWaitFlag(&envars.CanaryTaskRunningWait),
TaskStoppedWaitFlag(&envars.CanaryTaskStoppedWait),
},
Action: func(ctx *cli.Context) error {
dir, rest, err := c.requireArgs(ctx, 3, 100)
Expand Down
1 change: 1 addition & 0 deletions cli/cage/commands/up.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ func (c *CageCommands) Up(
ServiceFlag(&envars.Service),
TaskDefinitionArnFlag(&envars.TaskDefinitionArn),
CanaryTaskIdleDurationFlag(&envars.CanaryTaskIdleDuration),
ServiceStableWaitFlag(&envars.ServiceStableWait),
},
Action: func(ctx *cli.Context) error {
dir, _, err := c.requireArgs(ctx, 1, 1)
Expand Down
28 changes: 18 additions & 10 deletions env.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,20 @@ import (
)

type Envars struct {
_ struct{} `type:"struct"`
CI bool `json:"ci" type:"bool"`
Region string `json:"region" type:"string"`
Cluster string `json:"cluster" type:"string" required:"true"`
Service string `json:"service" type:"string" required:"true"`
CanaryInstanceArn string
TaskDefinitionArn string `json:"nextTaskDefinitionArn" type:"string"`
TaskDefinitionInput *ecs.RegisterTaskDefinitionInput
ServiceDefinitionInput *ecs.CreateServiceInput
CanaryTaskIdleDuration int
_ struct{} `type:"struct"`
CI bool `json:"ci" type:"bool"`
Region string `json:"region" type:"string"`
Cluster string `json:"cluster" type:"string" required:"true"`
Service string `json:"service" type:"string" required:"true"`
CanaryInstanceArn string
TaskDefinitionArn string `json:"nextTaskDefinitionArn" type:"string"`
TaskDefinitionInput *ecs.RegisterTaskDefinitionInput
ServiceDefinitionInput *ecs.CreateServiceInput
CanaryTaskIdleDuration int // sec
CanaryTaskRunningWait int // sec
CanaryTaskHealthCheckWait int // sec
CanaryTaskStoppedWait int // sec
ServiceStableWait int // sec
}

// required
Expand All @@ -35,6 +39,10 @@ const CanaryInstanceArnKey = "CAGE_CANARY_INSTANCE_ARN"
const RegionKey = "CAGE_REGION"
const CanaryTaskIdleDuration = "CAGE_CANARY_TASK_IDLE_DURATION"
const UpdateServiceKey = "CAGE_UPDATE_SERVIEC"
const TaskRunningTimeout = "CAGE_TASK_RUNNING_TIMEOUT"
const TaskHealthCheckTimeout = "CAGE_TASK_HEALTH_CHECK_TIMEOUT"
const TaskStoppedTimeout = "CAGE_TASK_STOPPED_TIMEOUT"
const ServiceStableTimeout = "CAGE_SERVICE_STABLE_TIMEOUT"

func EnsureEnvars(
dest *Envars,
Expand Down
2 changes: 1 addition & 1 deletion rollout.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ func (c *cage) RollOut(ctx context.Context, input *RollOutInput) (*RollOutResult
if err := ecs.NewServicesStableWaiter(c.Ecs).Wait(ctx, &ecs.DescribeServicesInput{
Cluster: &c.Env.Cluster,
Services: []string{c.Env.Service},
}, c.MaxWait); err != nil {
}, c.Timeout.ServiceStable()); err != nil {
return result, err
}
log.Infof("🥴 service '%s' has become to be stable!", c.Env.Service)
Expand Down
Loading