-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat!: throttle with retries provider changes #1230
Changes from all commits
7691bf5
a10a239
8a557b3
196ce38
0f27c31
cf09f5f
87ad0f4
7e6264f
5e4b845
8350956
6d20dd1
461878c
56242a6
1d963fa
e8acd9e
8ed33f3
ecac6a4
f6d4650
956e595
db8dc1b
78a8269
73db33b
5bfccc3
5196394
37e0e93
b1cb354
599854a
8945156
0544fd3
d8f5690
f91cb70
aca8362
66adc8a
cc9064d
6da7fef
840d290
8ec7bc5
b152c03
6bdfff9
3b27006
afa32f4
6ee88e2
1ed2f56
fcf0b92
53c02ea
e9d745d
b685958
1ceddcb
e228953
7d3dd64
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1937,50 +1937,41 @@ func (tr TestRun) assignConsumerPubKey(action assignConsumerPubKeyAction, verbos | |
tr.waitBlocks(chainID("provi"), 2, 30*time.Second) | ||
} | ||
|
||
// slashThrottleDequeue polls slash queue sizes until nextQueueSize is achieved | ||
type slashThrottleDequeue struct { | ||
chain chainID | ||
currentQueueSize int | ||
nextQueueSize int | ||
// slashMeterReplenishmentAction polls the slash meter on provider until value is achieved | ||
type slashMeterReplenishmentAction struct { | ||
targetValue int64 | ||
// panic if timeout is exceeded | ||
timeout time.Duration | ||
} | ||
|
||
func (tr TestRun) waitForSlashThrottleDequeue( | ||
action slashThrottleDequeue, | ||
func (tr TestRun) waitForSlashMeterReplenishment( | ||
action slashMeterReplenishmentAction, | ||
verbose bool, | ||
) { | ||
timeout := time.Now().Add(action.timeout) | ||
initialGlobalQueueSize := int(tr.getGlobalSlashQueueSize()) | ||
initialSlashMeter := tr.getSlashMeter() | ||
|
||
if initialGlobalQueueSize != action.currentQueueSize { | ||
panic(fmt.Sprintf("wrong initial queue size: %d - expected global queue: %d\n", initialGlobalQueueSize, action.currentQueueSize)) | ||
if initialSlashMeter >= 0 { | ||
panic(fmt.Sprintf("No need to wait for slash meter replenishment, current value: %d", initialSlashMeter)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a bad idea to panic on? It makes the tests flakey, because if they happen to run slowly, the slash meter might be already-full, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm I don't see a good way to decouple the logic of the slash meter away from time. Any test for throttling must have a pretty good and mostly deterministic control over chain time |
||
} | ||
|
||
for { | ||
globalQueueSize := int(tr.getGlobalSlashQueueSize()) | ||
chainQueueSize := int(tr.getConsumerChainPacketQueueSize(action.chain)) | ||
slashMeter := tr.getSlashMeter() | ||
if verbose { | ||
fmt.Printf("waiting for packed queue size to reach: %d - current: %d\n", action.nextQueueSize, globalQueueSize) | ||
fmt.Printf("waiting for slash meter to be replenished, current value: %d\n", slashMeter) | ||
} | ||
|
||
// check if global queue size is equal to chain queue size | ||
if globalQueueSize == chainQueueSize && globalQueueSize == action.nextQueueSize { //nolint:gocritic // this is the comparison that we want here. | ||
// check if meter has reached target value | ||
if slashMeter >= action.targetValue { | ||
break | ||
} | ||
|
||
if time.Now().After(timeout) { | ||
panic(fmt.Sprintf("\n\n\nwaitForSlashThrottleDequeuemethod has timed out after: %s\n\n", action.timeout)) | ||
panic(fmt.Sprintf("\n\nwaitForSlashMeterReplenishment has timed out after: %s\n\n", action.timeout)) | ||
} | ||
|
||
time.Sleep(500 * time.Millisecond) | ||
tr.WaitTime(5 * time.Second) | ||
} | ||
// wair for 2 blocks to be created | ||
// allowing the jailing to be incorporated into voting power | ||
tr.waitBlocks(action.chain, 2, time.Minute) | ||
} | ||
|
||
func uintPointer(i uint) *uint { | ||
return &i | ||
} | ||
|
||
// GetPathNameForGorelayer returns the name of the path between two given chains used by Gorelayer. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -168,19 +168,6 @@ func (tr TestRun) getChainState(chain chainID, modelState ChainState) ChainState | |
chainState.ProviderKeys = &providerKeys | ||
} | ||
|
||
if modelState.GlobalSlashQueueSize != nil { | ||
globalQueueSize := tr.getGlobalSlashQueueSize() | ||
chainState.GlobalSlashQueueSize = &globalQueueSize | ||
} | ||
|
||
if modelState.ConsumerChainQueueSizes != nil { | ||
consumerChainQueueSizes := map[chainID]uint{} | ||
for c := range *modelState.ConsumerChainQueueSizes { | ||
consumerChainQueueSizes[c] = tr.getConsumerChainPacketQueueSize(c) | ||
} | ||
chainState.ConsumerChainQueueSizes = &consumerChainQueueSizes | ||
} | ||
|
||
if modelState.RegisteredConsumerRewardDenoms != nil { | ||
registeredConsumerRewardDenoms := tr.getRegisteredConsumerRewardDenoms(chain) | ||
chainState.RegisteredConsumerRewardDenoms = ®isteredConsumerRewardDenoms | ||
|
@@ -667,9 +654,10 @@ func (tr TestRun) getProviderAddressFromConsumer(consumerChain chainID, validato | |
return addr | ||
} | ||
|
||
func (tr TestRun) getGlobalSlashQueueSize() uint { | ||
func (tr TestRun) getSlashMeter() int64 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this take the chain as an argument instead of always using provi? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Imo no because the slash meter is only relevant to the provider. If we ever plan to have multiple providers in these tests then that's a different story, but I don't see that happening |
||
//#nosec G204 -- Bypass linter warning for spawning subprocess with cmd arguments. | ||
cmd := exec.Command("docker", "exec", tr.containerConfig.instanceName, tr.chainConfigs[chainID("provi")].binaryName, | ||
cmd := exec.Command("docker", "exec", | ||
tr.containerConfig.instanceName, tr.chainConfigs[chainID("provi")].binaryName, | ||
|
||
"query", "provider", "throttle-state", | ||
`--node`, tr.getQueryNode(chainID("provi")), | ||
|
@@ -680,26 +668,8 @@ func (tr TestRun) getGlobalSlashQueueSize() uint { | |
log.Fatal(err, "\n", string(bz)) | ||
} | ||
|
||
packets := gjson.Get(string(bz), "packets").Array() | ||
return uint(len(packets)) | ||
} | ||
|
||
func (tr TestRun) getConsumerChainPacketQueueSize(consumerChain chainID) uint { | ||
//#nosec G204 -- Bypass linter warning for spawning subprocess with cmd arguments. | ||
cmd := exec.Command("docker", "exec", tr.containerConfig.instanceName, tr.chainConfigs[chainID("provi")].binaryName, | ||
|
||
"query", "provider", "throttled-consumer-packet-data", | ||
string(consumerChain), | ||
`--node`, tr.getQueryNode(chainID("provi")), | ||
`-o`, `json`, | ||
) | ||
bz, err := cmd.CombinedOutput() | ||
if err != nil { | ||
log.Fatal(err, "\n", string(bz)) | ||
} | ||
|
||
size := gjson.Get(string(bz), "size").Uint() | ||
return uint(size) | ||
slashMeter := gjson.Get(string(bz), "slash_meter") | ||
return slashMeter.Int() | ||
} | ||
|
||
func (tr TestRun) getRegisteredConsumerRewardDenoms(chain chainID) []string { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,7 +56,8 @@ func stepsDowntime(consumerName string) []Step { | |
chainID(consumerName): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 509, | ||
validatorID("bob"): 500, | ||
// Bob's stake may or may not be slashed at this point depending on comet vs cometmock | ||
// See https://github.com/cosmos/interchain-security/issues/1304 | ||
validatorID("carol"): 501, | ||
}, | ||
}, | ||
|
@@ -278,7 +279,7 @@ func stepsThrottledDowntime(consumerName string) []Step { | |
validator: validatorID("bob"), | ||
}, | ||
state: State{ | ||
// slash packet queued on consumer, but powers not affected on either chain yet | ||
// slash packet queued for bob on consumer, but powers not affected on either chain yet | ||
chainID("provi"): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
|
@@ -312,11 +313,6 @@ func stepsThrottledDowntime(consumerName string) []Step { | |
validatorID("bob"): 0, // bob is jailed | ||
validatorID("carol"): 500, | ||
}, | ||
// no provider throttling engaged yet | ||
GlobalSlashQueueSize: uintPointer(0), | ||
ConsumerChainQueueSizes: &map[chainID]uint{ | ||
chainID(consumerName): uint(0), | ||
}, | ||
}, | ||
chainID(consumerName): ChainState{ | ||
// VSC packet applying jailing is not yet relayed to consumer | ||
|
@@ -328,13 +324,13 @@ func stepsThrottledDowntime(consumerName string) []Step { | |
}, | ||
}, | ||
}, | ||
// Invoke carol downtime slash on consumer | ||
{ | ||
action: downtimeSlashAction{ | ||
chain: chainID(consumerName), | ||
validator: validatorID("carol"), | ||
}, | ||
state: State{ | ||
// powers not affected on either chain yet | ||
chainID("provi"): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
|
@@ -343,10 +339,9 @@ func stepsThrottledDowntime(consumerName string) []Step { | |
}, | ||
}, | ||
chainID(consumerName): ChainState{ | ||
// VSC packet applying jailing is not yet relayed to consumer | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
validatorID("bob"): 500, | ||
validatorID("bob"): 500, // VSC packet jailing bob is not yet relayed to consumer | ||
validatorID("carol"): 500, | ||
}, | ||
}, | ||
|
@@ -364,42 +359,35 @@ func stepsThrottledDowntime(consumerName string) []Step { | |
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
validatorID("bob"): 0, | ||
validatorID("carol"): 500, // not slashed due to throttling | ||
}, | ||
GlobalSlashQueueSize: uintPointer(1), // carol's slash request is throttled | ||
ConsumerChainQueueSizes: &map[chainID]uint{ | ||
chainID(consumerName): uint(1), | ||
validatorID("carol"): 500, // slash packet for carol recv by provider, carol not slashed due to throttling | ||
}, | ||
}, | ||
chainID(consumerName): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
validatorID("bob"): 0, | ||
validatorID("bob"): 0, // VSC packet applying bob jailing is also relayed and recv by consumer | ||
validatorID("carol"): 500, | ||
}, | ||
}, | ||
}, | ||
}, | ||
// TODO(Shawn): Improve this test to have the consumer retry it's downtime slash, and to assert queue size on consumer. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note this TODO, the e2e test ends without retrying the slash packet. This will be addressed in a separate PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to confirm, we are removing the Queue size from the chain state, despite this TODO, right? It reads to me as if that would reintroduce it, but maybe I'm missing context There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question, we are removing the queues from the provider in this PR. The queues exist on the consumers as of #1024, so incoming e2e test will assert queue size on consumer |
||
// See https://github.com/cosmos/interchain-security/issues/1103 and https://github.com/cosmos/interchain-security/issues/1233 | ||
{ | ||
action: slashThrottleDequeue{ | ||
chain: chainID(consumerName), | ||
currentQueueSize: 1, | ||
nextQueueSize: 0, | ||
action: slashMeterReplenishmentAction{ | ||
targetValue: 0, // We just want slash meter to be non-negative | ||
|
||
// Slash meter replenish fraction is set to 10%, replenish period is 20 seconds, see config.go | ||
// Meter is initially at 10%, decremented to -23% from bob being jailed. It'll then take three replenishments | ||
// for meter to become positive again. 3*20 = 60 seconds + buffer = 80 seconds | ||
timeout: 80 * time.Second, | ||
// for meter to become positive again. 3*20 = 60 seconds + buffer = 100 seconds | ||
timeout: 100 * time.Second, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just curious, was there a reason the buffer needs to be increased? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is an artifact of #1304 and trying to get this to work with cometmock |
||
}, | ||
state: State{ | ||
chainID("provi"): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
validatorID("bob"): 0, | ||
validatorID("carol"): 0, // Carol is jailed upon packet being handled on provider | ||
}, | ||
GlobalSlashQueueSize: uintPointer(0), // slash packets dequeued | ||
ConsumerChainQueueSizes: &map[chainID]uint{ | ||
chainID(consumerName): 0, | ||
validatorID("carol"): 500, // Carol still not slashed, packet must be retried | ||
}, | ||
}, | ||
chainID(consumerName): ChainState{ | ||
|
@@ -412,36 +400,5 @@ func stepsThrottledDowntime(consumerName string) []Step { | |
}, | ||
}, | ||
}, | ||
// A block is incremented each action, hence why VSC is committed on provider, | ||
// and can now be relayed as packet to consumer | ||
{ | ||
action: relayPacketsAction{ | ||
chainA: chainID("provi"), | ||
chainB: chainID(consumerName), | ||
port: "provider", | ||
channel: 0, | ||
}, | ||
state: State{ | ||
chainID("provi"): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
validatorID("bob"): 0, | ||
validatorID("carol"): 0, | ||
}, | ||
GlobalSlashQueueSize: uintPointer(0), | ||
ConsumerChainQueueSizes: &map[chainID]uint{ | ||
chainID(consumerName): 0, | ||
}, | ||
}, | ||
chainID(consumerName): ChainState{ | ||
ValPowers: &map[validatorID]uint{ | ||
validatorID("alice"): 511, | ||
// throttled update gets to consumer | ||
validatorID("bob"): 0, | ||
validatorID("carol"): 0, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The slash packet queue size of the provider is no longer relevant, but the slash meter value is relevant. I've replaced this action to wait on slash meter being replenished