Skip to content

Commit

Permalink
Improving timeout behaviour (#441)
Browse files Browse the repository at this point in the history
* Using a safety timeout that will cause the wait operations to respect Terraform-operation-level timeouts (possibly customised by the user)
  • Loading branch information
JohnSharpe authored Oct 31, 2023
1 parent 145337a commit 739b08f
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 33 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/RedisLabs/terraform-provider-rediscloud
go 1.19

require (
github.com/RedisLabs/rediscloud-go-api v0.5.4
github.com/RedisLabs/rediscloud-go-api v0.6.0
github.com/bflad/tfproviderlint v0.29.0
github.com/hashicorp/go-cty v1.4.1-0.20200414143053-d3edf31b6320
github.com/hashicorp/terraform-plugin-sdk/v2 v2.26.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ github.com/Microsoft/go-winio v0.4.16 h1:FtSW/jqD+l4ba5iPBj9CODVtgfYAD8w2wS923g/
github.com/Microsoft/go-winio v0.4.16/go.mod h1:XB6nPKklQyQ7GC9LdcBEcBl8PF76WugXOPRXwdLnMv0=
github.com/ProtonMail/go-crypto v0.0.0-20210428141323-04723f9f07d7 h1:YoJbenK9C67SkzkDfmQuVln04ygHj3vjZfd9FL+GmQQ=
github.com/ProtonMail/go-crypto v0.0.0-20210428141323-04723f9f07d7/go.mod h1:z4/9nQmJSSwwds7ejkxaJwO37dru3geImFUdJlaLzQo=
github.com/RedisLabs/rediscloud-go-api v0.5.4 h1:whc+IDu74b1AcLC3/TmPmmfmrf8xIu2ufWmqVZ3qC7M=
github.com/RedisLabs/rediscloud-go-api v0.5.4/go.mod h1:cfuU+p/rgB+TObm0cq+AkyxwXWra8JOrPLKKj+nv7lM=
github.com/RedisLabs/rediscloud-go-api v0.6.0 h1:GPIY2mhnpk4LU0M7XB5Du3XmBaG4dyDfaDonFi21RZo=
github.com/RedisLabs/rediscloud-go-api v0.6.0/go.mod h1:cfuU+p/rgB+TObm0cq+AkyxwXWra8JOrPLKKj+nv7lM=
github.com/acomagu/bufpipe v1.0.3 h1:fxAGrHZTgQ9w5QqVItgzwj235/uYZYgbXitB+dLupOk=
github.com/acomagu/bufpipe v1.0.3/go.mod h1:mxdxdup/WdsKVreO5GpW4+M/1CE2sMG4jeGJ2sYmHc4=
github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo=
Expand Down
20 changes: 20 additions & 0 deletions provider/resource_rediscloud_acl_role.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ func resourceRedisCloudAclRoleCreate(ctx context.Context, d *schema.ResourceData
return diag.FromErr(err)
}

// Sometimes ACL Users and Roles flip between Active and Pending a few times after creation/update.
// This delay gives the API a chance to settle
// TODO Ultimately this is an API problem
time.Sleep(10 * time.Second) //lintignore:R018

err = waitForAclRoleToBeActive(ctx, id, api)
if err != nil {
return diag.FromErr(err)
}

return resourceRedisCloudAclRoleRead(ctx, d, meta)
}

Expand Down Expand Up @@ -164,6 +174,16 @@ func resourceRedisCloudAclRoleUpdate(ctx context.Context, d *schema.ResourceData
if err != nil {
return diag.FromErr(err)
}

// Sometimes ACL Users and Roles flip between Active and Pending a few times after creation/update.
// This delay gives the API a chance to settle
// TODO Ultimately this is an API problem
time.Sleep(10 * time.Second) //lintignore:R018

err = waitForAclRoleToBeActive(ctx, id, api)
if err != nil {
return diag.FromErr(err)
}
}

return resourceRedisCloudAclRoleRead(ctx, d, meta)
Expand Down
56 changes: 56 additions & 0 deletions provider/resource_rediscloud_acl_user.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
"log"
"strconv"
"time"
)
Expand Down Expand Up @@ -72,6 +73,21 @@ func resourceRedisCloudAclUserCreate(ctx context.Context, d *schema.ResourceData
return diag.FromErr(err)
}

err = waitForAclUserToBeActive(ctx, id, api)
if err != nil {
return diag.FromErr(err)
}

// Sometimes ACL Users and Roles flip between Active and Pending a few times after creation/update.
// This delay gives the API a chance to settle
// TODO Ultimately this is an API problem
time.Sleep(10 * time.Second) //lintignore:R018

err = waitForAclUserToBeActive(ctx, id, api)
if err != nil {
return diag.FromErr(err)
}

d.SetId(strconv.Itoa(id))

return resourceRedisCloudAclUserRead(ctx, d, meta)
Expand Down Expand Up @@ -125,6 +141,21 @@ func resourceRedisCloudAclUserUpdate(ctx context.Context, d *schema.ResourceData
if err != nil {
return diag.FromErr(err)
}

err = waitForAclUserToBeActive(ctx, id, api)
if err != nil {
return diag.FromErr(err)
}

// Sometimes ACL Users and Roles flip between Active and Pending a few times after creation/update.
// This delay gives the API a chance to settle
// TODO Ultimately this is an API problem
time.Sleep(10 * time.Second) //lintignore:R018

err = waitForAclUserToBeActive(ctx, id, api)
if err != nil {
return diag.FromErr(err)
}
}

return resourceRedisCloudAclUserRead(ctx, d, meta)
Expand Down Expand Up @@ -172,3 +203,28 @@ func resourceRedisCloudAclUserDelete(ctx context.Context, d *schema.ResourceData

return diags
}

func waitForAclUserToBeActive(ctx context.Context, id int, api *apiClient) error {
wait := &retry.StateChangeConf{
Delay: 5 * time.Second,
Pending: []string{users.StatusPending},
Target: []string{users.StatusActive},
Timeout: 5 * time.Minute,

Refresh: func() (result interface{}, state string, err error) {
log.Printf("[DEBUG] Waiting for user %d to be active", id)

user, err := api.client.Users.Get(ctx, id)
if err != nil {
return nil, "", err
}

return redis.StringValue(user.Status), redis.StringValue(user.Status), nil
},
}
if _, err := wait.WaitForStateContext(ctx); err != nil {
return err
}

return nil
}
33 changes: 3 additions & 30 deletions provider/resource_rediscloud_subscription.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"context"
"fmt"
"log"
"os"
"regexp"
"strconv"
"time"
Expand Down Expand Up @@ -712,17 +711,11 @@ func createDatabase(dbName string, idx *int, modules []*subscriptions.CreateModu
}

func waitForSubscriptionToBeActive(ctx context.Context, id int, api *apiClient) error {
// TODO: Set timeout based on the timeouts block, instead of an env var.
timeout, err := getSubscriptionTimeout()
if err != nil {
return err
}

wait := &retry.StateChangeConf{
Delay: 10 * time.Second,
Pending: []string{subscriptions.SubscriptionStatusPending},
Target: []string{subscriptions.SubscriptionStatusActive},
Timeout: timeout,
Timeout: safetyTimeout,

Refresh: func() (result interface{}, state string, err error) {
log.Printf("[DEBUG] Waiting for subscription %d to be active", id)
Expand All @@ -743,16 +736,11 @@ func waitForSubscriptionToBeActive(ctx context.Context, id int, api *apiClient)
}

func waitForSubscriptionToBeDeleted(ctx context.Context, id int, api *apiClient) error {
timeout, err := getSubscriptionTimeout()
if err != nil {
return err
}

wait := &retry.StateChangeConf{
Delay: 10 * time.Second,
Pending: []string{subscriptions.SubscriptionStatusDeleting},
Target: []string{"deleted"},
Timeout: timeout,
Timeout: safetyTimeout,

Refresh: func() (result interface{}, state string, err error) {
log.Printf("[DEBUG] Waiting for subscription %d to be deleted", id)
Expand Down Expand Up @@ -790,7 +778,7 @@ func waitForDatabaseToBeActive(ctx context.Context, subId, id int, api *apiClien
databases.StatusProxyPolicyChangeDraft,
},
Target: []string{databases.StatusActive},
Timeout: 30 * time.Minute,
Timeout: safetyTimeout,

Refresh: func() (result interface{}, state string, err error) {
log.Printf("[DEBUG] Waiting for database %d to be active", id)
Expand Down Expand Up @@ -971,18 +959,3 @@ func readPaymentMethodID(d *schema.ResourceData) (*int, error) {
}
return nil, nil
}

func getSubscriptionTimeout() (time.Duration, error) {
// Allow configuring the subscription timeout via an environment variable.
timeout := 30
val, _ := os.LookupEnv("REDISCLOUD_SUBSCRIPTION_TIMEOUT")
if len(val) != 0 {
envTimeout, err := strconv.Atoi(val)
if err != nil {
return time.Duration(0), err
}
timeout = envTimeout
}
timeoutDuration := time.Duration(timeout) * time.Minute
return timeoutDuration, nil
}
6 changes: 6 additions & 0 deletions provider/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ import (
"time"
)

// This timeout is an absolute maximum used in some of the waitForStatus operations concerning creation and updating
// Subscriptions and Databases. Reads and Deletions have their own, stricter timeouts because they consistently behave
// well. The Terraform operation-level timeout should kick in way before we hit this and kill the task.
// Unfortunately there's no "time-remaining-before-timeout" utility, or we could use that in the wait blocks.
const safetyTimeout = 6 * time.Hour

func setToStringSlice(set *schema.Set) []*string {
var ret []*string
for _, s := range set.List() {
Expand Down

0 comments on commit 739b08f

Please sign in to comment.