Skip to content

Commit

Permalink
IP_COOLDOWN_PERIOD environment variable (#2492)
Browse files Browse the repository at this point in the history
  • Loading branch information
jchen6585 authored Aug 16, 2023
1 parent 2527c0f commit e6173fe
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 31 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,19 @@ This environment variable must be set for both the `aws-vpc-cni-init` and `aws-n

Note that enabling/disabling this feature only affects whether newly created pods have an IPv6 interface created. Therefore, it is recommended that you reboot existing nodes after enabling/disabling this feature. Also note that if you are using this feature in conjunction with `ENABLE_POD_ENI` (Security Groups for Pods), the security group rules will NOT be applied to egressing IPv6 traffic.

----

#### `IP_COOLDOWN_PERIOD` (v1.14.1+)

Type: Integer as a String

Default: `30`

Specifies the number of seconds an IP address is in cooldown after pod deletion. The cooldown period gives network proxies, such as kube-proxy, time to update node iptables rules when the IP was registered as a valid endpoint, such as for a service. Modify this value with caution, as kube-proxy update time scales with the number of nodes and services.

**Note:** 0 is a supported value, however it is highly discouraged.
**Note:** Higher cooldown periods may lead to a higher number of EC2 API calls as IPs are in cooldown cache.

### VPC CNI Feature Matrix

IP Mode | Secondary IP Mode | Prefix Delegation | Security Groups Per Pod | WARM & MIN IP/Prefix Targets | External SNAT
Expand Down
9 changes: 9 additions & 0 deletions cmd/aws-vpc-cni/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ const (
vpcCniInitDonePath = "/vpc-cni-init/done"
defaultEnBandwidthPlugin = false
defaultEnPrefixDelegation = false
defaultIPCooldownPeriod = 30

envHostCniBinPath = "HOST_CNI_BIN_PATH"
envHostCniConfDirPath = "HOST_CNI_CONFDIR_PATH"
Expand All @@ -99,6 +100,7 @@ const (
envEnIPv6 = "ENABLE_IPv6"
envEnIPv6Egress = "ENABLE_V6_EGRESS"
envRandomizeSNAT = "AWS_VPC_K8S_CNI_RANDOMIZESNAT"
envIPCooldownPeriod = "IP_COOLDOWN_PERIOD"
)

// NetConfList describes an ordered list of networks.
Expand Down Expand Up @@ -347,6 +349,13 @@ func validateEnvVars() bool {
}
}

// Validate that IP_COOLDOWN_PERIOD is a valid integer
ipCooldownPeriod, err, input := utils.GetIntFromStringEnvVar(envIPCooldownPeriod, defaultIPCooldownPeriod)
if err != nil || ipCooldownPeriod < 0 {
log.Errorf("IP_COOLDOWN_PERIOD MUST be a valid positive integer. %s is invalid", input)
return false
}

prefixDelegationEn := utils.GetBoolAsStringEnvVar(envEnPrefixDelegation, defaultEnPrefixDelegation)
warmIPTarget := utils.GetEnv(envWarmIPTarget, "0")
warmPrefixTarget := utils.GetEnv(envWarmPrefixTarget, "0")
Expand Down
65 changes: 38 additions & 27 deletions pkg/ipamd/datastore/data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"golang.org/x/sys/unix"

"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger"
"github.com/aws/amazon-vpc-cni-k8s/utils"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)
Expand All @@ -35,9 +36,8 @@ const (
// minENILifeTime is the shortest time before we consider deleting a newly created ENI
minENILifeTime = 1 * time.Minute

// addressCoolingPeriod is used to ensure an IP not get assigned to a Pod if this IP is used by a different Pod
// in addressCoolingPeriod
addressCoolingPeriod = 30 * time.Second
// envIPCooldownPeriod (default 30 seconds) specifies the time after pod deletion before an IP can be assigned to a new pod
envIPCooldownPeriod = "IP_COOLDOWN_PERIOD"

// DuplicatedENIError is an error when caller tries to add an duplicate ENI to data store
DuplicatedENIError = "data store: duplicate ENI"
Expand Down Expand Up @@ -249,12 +249,12 @@ type CidrStats struct {
}

// Gets number of assigned IPs and the IPs in cooldown from a given CIDR
func (cidr *CidrInfo) GetIPStatsFromCidr() CidrStats {
func (cidr *CidrInfo) GetIPStatsFromCidr(ipCooldownPeriod time.Duration) CidrStats {
stats := CidrStats{}
for _, addr := range cidr.IPAddresses {
if addr.Assigned() {
stats.AssignedIPs++
} else if addr.inCoolingPeriod() {
} else if addr.inCoolingPeriod(ipCooldownPeriod) {
stats.CooldownIPs++
}
}
Expand All @@ -266,9 +266,18 @@ func (addr AddressInfo) Assigned() bool {
return !addr.IPAMKey.IsZero()
}

// InCoolingPeriod checks whether an addr is in addressCoolingPeriod
func (addr AddressInfo) inCoolingPeriod() bool {
return time.Since(addr.UnassignedTime) <= addressCoolingPeriod
// getCooldownPeriod returns the time duration in seconds configured by the IP_COOLDOWN_PERIOD env variable
func getCooldownPeriod() time.Duration {
cooldownVal, err, _ := utils.GetIntFromStringEnvVar(envIPCooldownPeriod, 30)
if err != nil {
return 30 * time.Second
}
return time.Duration(cooldownVal) * time.Second
}

// InCoolingPeriod checks whether an addr is in ipCooldownPeriod
func (addr AddressInfo) inCoolingPeriod(ipCooldownPeriod time.Duration) bool {
return time.Since(addr.UnassignedTime) <= ipCooldownPeriod
}

// ENIPool is a collection of ENI, keyed by ENI ID
Expand Down Expand Up @@ -304,15 +313,16 @@ type PodIPInfo struct {

// DataStore contains node level ENI/IP
type DataStore struct {
total int
assigned int
allocatedPrefix int
eniPool ENIPool
lock sync.Mutex
log logger.Logger
backingStore Checkpointer
netLink netlinkwrapper.NetLink
isPDEnabled bool
total int
assigned int
allocatedPrefix int
eniPool ENIPool
lock sync.Mutex
log logger.Logger
backingStore Checkpointer
netLink netlinkwrapper.NetLink
isPDEnabled bool
ipCooldownPeriod time.Duration
}

// ENIInfos contains ENI IP information
Expand Down Expand Up @@ -342,11 +352,12 @@ func prometheusRegister() {
func NewDataStore(log logger.Logger, backingStore Checkpointer, isPDEnabled bool) *DataStore {
prometheusRegister()
return &DataStore{
eniPool: make(ENIPool),
log: log,
backingStore: backingStore,
netLink: netlinkwrapper.NewNetLink(),
isPDEnabled: isPDEnabled,
eniPool: make(ENIPool),
log: log,
backingStore: backingStore,
netLink: netlinkwrapper.NewNetLink(),
isPDEnabled: isPDEnabled,
ipCooldownPeriod: getCooldownPeriod(),
}
}

Expand Down Expand Up @@ -842,7 +853,7 @@ func (ds *DataStore) GetIPStats(addressFamily string) *DataStoreStats {
}
for _, cidr := range AssignedCIDRs {
if addressFamily == "4" && ((ds.isPDEnabled && cidr.IsPrefix) || (!ds.isPDEnabled && !cidr.IsPrefix)) {
cidrStats := cidr.GetIPStatsFromCidr()
cidrStats := cidr.GetIPStatsFromCidr(ds.ipCooldownPeriod)
stats.AssignedIPs += cidrStats.AssignedIPs
stats.CooldownIPs += cidrStats.CooldownIPs
stats.TotalIPs += cidr.Size()
Expand Down Expand Up @@ -952,7 +963,7 @@ func (ds *DataStore) getDeletableENI(warmIPTarget, minimumIPTarget, warmPrefixTa
continue
}

if eni.hasIPInCooling() {
if eni.hasIPInCooling(ds.ipCooldownPeriod) {
ds.log.Debugf("ENI %s cannot be deleted because has IPs in cooling", eni.ID)
continue
}
Expand Down Expand Up @@ -999,10 +1010,10 @@ func (e *ENI) isTooYoung() bool {
}

// HasIPInCooling returns true if an IP address was unassigned recently.
func (e *ENI) hasIPInCooling() bool {
func (e *ENI) hasIPInCooling(ipCooldownPeriod time.Duration) bool {
for _, assignedaddr := range e.AvailableIPv4Cidrs {
for _, addr := range assignedaddr.IPAddresses {
if addr.inCoolingPeriod() {
if addr.inCoolingPeriod(ipCooldownPeriod) {
return true
}
}
Expand Down Expand Up @@ -1355,7 +1366,7 @@ func (ds *DataStore) getUnusedIP(availableCidr *CidrInfo) (string, error) {
//Check if there is any IP out of cooldown
var cachedIP string
for _, addr := range availableCidr.IPAddresses {
if !addr.Assigned() && !addr.inCoolingPeriod() {
if !addr.Assigned() && !addr.inCoolingPeriod(ds.ipCooldownPeriod) {
//if the IP is out of cooldown and not assigned then cache the first available IP
//continue cleaning up the DB, this is to avoid stale entries and a new thread :)
if cachedIP == "" {
Expand Down
15 changes: 11 additions & 4 deletions pkg/ipamd/datastore/data_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package datastore
import (
"errors"
"net"
"os"
"testing"
"time"

Expand Down Expand Up @@ -888,6 +889,8 @@ func TestPodIPv4AddressWithPDEnabled(t *testing.T) {
}

func TestGetIPStatsV4(t *testing.T) {
os.Setenv(envIPCooldownPeriod, "1")
defer os.Unsetenv(envIPCooldownPeriod)
ds := NewDataStore(Testlog, NullCheckpoint{}, false)

_ = ds.AddENI("eni-1", 1, true, false, false)
Expand Down Expand Up @@ -925,8 +928,9 @@ func TestGetIPStatsV4(t *testing.T) {
*ds.GetIPStats("4"),
)

// wait 30s (cooldown period)
time.Sleep(30 * time.Second)
assert.Equal(t, ds.ipCooldownPeriod, 1*time.Second)
// wait 1s (cooldown period)
time.Sleep(ds.ipCooldownPeriod)

assert.Equal(t,
DataStoreStats{
Expand All @@ -939,6 +943,8 @@ func TestGetIPStatsV4(t *testing.T) {
}

func TestGetIPStatsV4WithPD(t *testing.T) {
os.Setenv(envIPCooldownPeriod, "1")
defer os.Unsetenv(envIPCooldownPeriod)
ds := NewDataStore(Testlog, NullCheckpoint{}, true)

_ = ds.AddENI("eni-1", 1, true, false, false)
Expand Down Expand Up @@ -976,8 +982,9 @@ func TestGetIPStatsV4WithPD(t *testing.T) {
*ds.GetIPStats("4"),
)

// wait 30s (cooldown period)
time.Sleep(30 * time.Second)
assert.Equal(t, ds.ipCooldownPeriod, 1*time.Second)
// wait 1s (cooldown period)
time.Sleep(ds.ipCooldownPeriod)

assert.Equal(t,
DataStoreStats{
Expand Down
14 changes: 14 additions & 0 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@ func GetBoolAsStringEnvVar(env string, defaultVal bool) bool {
return defaultVal
}

// Parse environment variable and return integer representation of string, or default value if environment variable is unset
func GetIntFromStringEnvVar(env string, defaultVal int) (int, error, string) {
if val, ok := os.LookupEnv(env); ok {
parsedVal, err := strconv.Atoi(val)
if err == nil {
return parsedVal, nil, val
}
log.Errorf("Failed to parse variable %s with value %s as integer", env, val)
return -1, err, val
}
// Environment variable is not set, so return default value
return defaultVal, nil, ""
}

// If environment variable is set, return set value, otherwise return default value
func GetEnv(env, defaultVal string) string {
if val, ok := os.LookupEnv(env); ok {
Expand Down
20 changes: 20 additions & 0 deletions utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ import (
const (
defaultPathEnv = "/host/opt/cni/bin"
defaultBoolEnv = false
defaultIntEnv = 30

envPath = "Path"
envBool = "Bool"
envInt = "Integer"
)

// Validate that GetBoolAsStringEnvVar runs against acceptable format input without error
Expand Down Expand Up @@ -45,3 +47,21 @@ func TestGetEnv(t *testing.T) {
tmp = GetEnv(envPath, defaultPathEnv)
assert.Equal(t, tmp, "/host/opt/cni/bin/test")
}

// Validate that GetIntFromStringEnvVar runs against acceptable format input without error
func TestGetIntFromStringEnvVar(t *testing.T) {
// Test environment flag variable not set
tmp, _, _ := GetIntFromStringEnvVar(envInt, defaultIntEnv)
assert.Equal(t, tmp, defaultIntEnv)

// Test basic Integer as string set with acceptable format
os.Setenv(envInt, "20")
tmp, _, _ = GetIntFromStringEnvVar(envInt, defaultIntEnv)
assert.Equal(t, tmp, 20)

// Test basic Integer as string set with unacceptable format
os.Setenv(envInt, "2O")
defer os.Unsetenv(envInt)
tmp, _, _ = GetIntFromStringEnvVar(envInt, defaultIntEnv)
assert.Equal(t, tmp, -1)
}

0 comments on commit e6173fe

Please sign in to comment.