From b73884f1a8709d3091081d047065103283e64e68 Mon Sep 17 00:00:00 2001 From: Marius Poke Date: Wed, 29 Nov 2023 10:31:21 +0100 Subject: [PATCH] docs: update docs for jail throttling v2 (#1443) * refactor adr 002 for better understanding * refactor ADR 008 * update throttling params * add docstring to TestBasicSlashPacketThrottling * update features and releases * add comments to TestMultiConsumerSlashPacketThrottling * Update docs/docs/adrs/adr-008-throttle-retries.md Co-authored-by: MSalopek * add review suggestions * replace trailing with subsequent * add upcoming versions * add notes on backward compatibility --------- Co-authored-by: MSalopek --- FEATURES.md | 29 ++-- RELEASES.md | 28 +++- docs/docs/adrs/adr-002-throttle.md | 158 +++++++++++++-------- docs/docs/adrs/adr-008-throttle-retries.md | 104 +++++++++----- docs/docs/introduction/params.md | 34 +++-- tests/integration/throttle.go | 36 ++++- 6 files changed, 261 insertions(+), 128 deletions(-) diff --git a/FEATURES.md b/FEATURES.md index d536a58643..97e36f643e 100644 --- a/FEATURES.md +++ b/FEATURES.md @@ -2,17 +2,18 @@ The following table indicates the major ICS features available in the [currently active releases](./RELEASES.md#version-matrix): -| Feature | Release | `v1.2.0-multiden` | `v2.0.0` | `v2.1.0-provider-lsm` | `v2.4.0-lsm` | `v3.1.0` | -|---------|---------|------------------:|---------:|----------------------:|-------------:|---------:| -| [Channel initialization: new chains](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#channel-initialization-new-chains) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Validator set update](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#validator-set-update) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Completion of unbonding operations](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#completion-of-unbonding-operations) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Consumer initiated slashing](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#consumer-initiated-slashing) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Reward distribution](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#reward-distribution) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Consumer chain removal](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/methods.md#consumer-chain-removal) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Key assignment](https://github.com/cosmos/interchain-security/issues/26) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Jail throttling](https://github.com/cosmos/interchain-security/issues/404) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Soft opt-out](https://github.com/cosmos/interchain-security/issues/851) | | ✅ | ✅ | ✅ | ✅ | ✅ | -| [Channel initialization: existing chains](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#channel-initialization-existing-chains) (aka [Standalone to consumer changeover](https://github.com/cosmos/interchain-security/issues/756))| | ❌ | ✅ | ✅ | ✅ | ✅ | -| [Cryptographic verification of equivocation](https://github.com/cosmos/interchain-security/issues/732) | | ❌ | ❌ | ❌ | ✅ | ❌ | - +| Feature | `v1.2.0-multiden` | `v2.0.0` | `v2.1.0-provider-lsm` | `v2.4.0-lsm` | `v3.1.0` | `v3.2.0` | `v3.3.0` | `v4.0.0` | +|---------|------------------:|---------:|----------------------:|-------------:|---------:|---------:|---------:|---------:| +| [Channel initialization: new chains](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#channel-initialization-new-chains) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Validator set update](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#validator-set-update) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Completion of unbonding operations](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#completion-of-unbonding-operations) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Consumer initiated slashing](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#consumer-initiated-slashing) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Reward distribution](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#reward-distribution) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Consumer chain removal](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/methods.md#consumer-chain-removal) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Key assignment](https://github.com/cosmos/interchain-security/issues/26) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Jail throttling](https://github.com/cosmos/interchain-security/issues/404) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Soft opt-out](https://github.com/cosmos/interchain-security/issues/851) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Channel initialization: existing chains](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/overview_and_basic_concepts.md#channel-initialization-existing-chains) (aka [Standalone to consumer changeover](https://github.com/cosmos/interchain-security/issues/756)) | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [Cryptographic verification of equivocation](https://github.com/cosmos/interchain-security/issues/732) | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | +| [Jail throttling with retries](https://github.com/cosmos/interchain-security/issues/713) - consumer-side changes | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | +| [Jail throttling with retries](https://github.com/cosmos/interchain-security/issues/713) - [provider-side changes](https://github.com/cosmos/interchain-security/issues/1102) | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | diff --git a/RELEASES.md b/RELEASES.md index ee382b0309..9716546410 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -7,6 +7,7 @@ - [Stable Release Policy](#stable-release-policy) - [Version Matrix](#version-matrix) - [Backwards Compatibility](#backwards-compatibility) + - [Notes](#notes) ## Semantic Versioning @@ -64,6 +65,8 @@ All missing minor release versions have been discontinued. | `v2.4.x-lsm` | June 09, 2024 | | `v3.1.x` | July 10, 2024 | | `v3.2.x` | July 10, 2024 | +| `v3.3.x` | July 10, 2024 | +| `v4.0.x` | TBA | **Note**: As of [Gaia v12.0.0](https://github.com/cosmos/gaia/releases/tag/v12.0.0), the Cosmos Hub uses a fork of Cosmos SDK ([v0.45.16-ics-lsm](https://github.com/cosmos/cosmos-sdk/releases/tag/v0.45.16-ics-lsm)) @@ -83,6 +86,9 @@ Versions of Golang, IBC, Cosmos SDK and CometBFT used by ICS in the currently ac | [v2.1.0-provider-lsm](https://github.com/cosmos/interchain-security/releases/tag/v2.1.0-provider-lsm) | 1.19 | v4.4.2 | v0.45.16-ics-lsm | v0.34.28 | Provider only (Cosmos Hub specific) | | [v2.4.0-lsm](https://github.com/cosmos/interchain-security/releases/tag/v2.4.0-lsm) | 1.19 | v4.4.2 | v0.45.16-ics-lsm | v0.34.28 | Provider only (Cosmos Hub specific) | | [v3.1.0](https://github.com/cosmos/interchain-security/releases/tag/v3.1.0) | 1.20 | v7.1.0 | v0.47.3 | v0.37.2 | +| v3.2.0 | 1.20 | v7.3.0 | v0.47.5 | v0.37.2 | +| v3.3.0 | 1.20 | v7.3.0 | v0.47.5 | v0.37.2 | +| v4.0.0 | 1.20 | v7.3.0 | v0.47.5 | v0.37.2 | Provider on >= v4.0.0 backwards compatible with consumers >= v3.2.0 | **Note:** For a list of major ICS features available in the currently active releases, see [FEATURES.md](./FEATURES.md). @@ -92,8 +98,20 @@ A MAJOR version of ICS will always be backwards compatible with the previous MAJ The following table indicates the compatibility of currently active releases: -| Consumer | Provider | `v2.0.0` | `v2.1.0-provider-lsm` | `v2.4.0-lsm` | `v3.1.0` | -|----------|----------|--------:|----------------------:|----------------------:|---------:| -| `v1.2.0-multiden` || ✅ | ✅ | ✅ | ✅ | -| `v2.0.0` || ✅ | ✅ | ✅ | ✅ | -| `v3.1.0` || ✅ | ✅ | ✅ | ✅ | +| Consumer | Provider | `v2.0.0` | `v2.1.0-provider-lsm` | `v2.4.0-lsm` | `v3.1.0` | `v3.2.0` | `v3.3.0` | `v4.0.0` | +|----------|----------|----------|-----------------------|--------------|----------|----------|----------|----------| +| `v1.2.0-multiden` || ✅ (1) | ✅ (1) | ✅ (1) | ✅ (1),(2) | ✅ (1),(2) | ✅ (1),(2),(4) | ❌ | +| `v2.0.0` || ✅ | ✅ | ✅ | ✅ (2) | ✅ (2) | ✅ (2),(4) | ❌ | +| `v3.1.0` || ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (4) | ❌ | +| `v3.2.0` || ✅ | ✅ | ✅ | ✅ | ✅ | ✅ (4) | ✅ | +| `v3.3.0` || ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| `v4.0.0` || ✅ (3) | ✅ (3) | ✅ (3) | ✅ (3) | ✅ | ✅ | ✅ | + +#### Notes + +The following adjustments must be made to the CCV consumer genesis state that is obtained from the provider chain after the spawn time is reached in order for the consumer chain to start without errors. + +- (1) Remove “preCCV” key +- (2) Remove “prehash_key_before_comparison” keys +- (3) Add .params.retry_delay_period: “3600s” +- (4) Use `interchain-security-cd genesis transform` to transform the consumer genesis file obtained from the provider. \ No newline at end of file diff --git a/docs/docs/adrs/adr-002-throttle.md b/docs/docs/adrs/adr-002-throttle.md index e60c619ca9..fe3d0a30b0 100644 --- a/docs/docs/adrs/adr-002-throttle.md +++ b/docs/docs/adrs/adr-002-throttle.md @@ -9,6 +9,7 @@ title: Jail Throttling * 2023-01-26: Initial Draft * 2023-02-07: Property refined, ADR ready to review/merge +* 2023-11-22: Refactor for better understanding ## Status @@ -16,58 +17,97 @@ Accepted ## Context -The CCV spec is based around the assumption that the provider binary and all consumers binaries are non-malicious, and follow the defined protocols. In practice, this assumption may not hold. A malicious consumer binary could potentially include code which is able to send many slash/jail packets at once to the provider. +The CCV spec is based around the assumption that the provider binary and all consumers binaries are non-malicious, and follow the defined protocols. +In practice, this assumption may not hold. +A malicious consumer binary could potentially include code which is able to send many slash/jail packets at once to the provider. -Before the throttling feature was implemented, the following attack was possible. Attacker(s) would create provider validators just below the provider's active set. Using a malicious consumer binary, slash packets would be relayed to the provider, that would slash/jail a significant portion (or all) of honest validator at once. Control of the provider would then pass over to the attackers' validators. This enables the attacker(s) to halt the provider. Or even worse, commit arbitrary state on the provider, potentially stealing all tokens bridged to the provider over IBC. +Before the throttling feature was implemented, the following attack was possible. +Attacker(s) would create provider validators just below the provider's active set. +Using a malicious consumer binary, slash packets would be relayed to the provider, that would slash/jail a significant portion (or all) of honest validator at once. +Control of the provider would then pass over to the attackers' validators. +This enables the attacker(s) to halt the provider. +Or even worse, commit arbitrary state on the provider, potentially stealing all tokens bridged to the provider over IBC. ## Decision -The throttling feature was designed to slow down the mentioned attack from above, allowing validators and the community to appropriately respond to the attack. Ie. this feature limits (enforced by on-chain params) the rate that the provider validator set can be jailed over time. +The throttling feature was designed to slow down the mentioned attack from above, allowing validators and the community to appropriately respond to the attack, +i.e., this feature limits (enforced by on-chain params) the rate that the provider validator set can be jailed over time. -### State Required - Slash Meter +### Required State -There exists one slash meter on the provider which stores an amount of voting power (integer), corresponding to an allowance of validators that can be jailed over time. This meter is initialized to a certain value on genesis, decremented by the amount of voting power jailed whenever a slash packet is handled, and periodically replenished as decided by on-chain params. +**Slash meter:** There exists one slash meter on the provider which stores an amount of voting power (integer), corresponding to an allowance of validators that can be jailed over time. +This meter is initialized to a certain value on genesis, decremented by the amount of voting power jailed whenever a slash packet is handled, and periodically replenished as decided by on-chain params. -### State Required - Global entry queue +**Global entry queue:** There exists a single queue which stores "global slash entries". +These entries allow the provider to appropriately handle slash packets sent from any consumer in FIFO ordering. +This queue is responsible for coordinating the order that slash packets (from multiple chains) are handled over time. -There exists a single queue which stores "global slash entries". These entries allow the provider to appropriately handle slash packets sent from any consumer in FIFO ordering. This queue is responsible for coordinating the order that slash packets (from multiple chains) are handled over time. +**Per-chain data queue:** For each established consumer, there exists a queue which stores "throttled packet data", +i.e.,pending slash packet data is queued together with pending VSC matured packet data in FIFO ordering. +Order is enforced by IBC sequence number. +These "per-chain" queues are responsible for coordinating the order that slash packets are handled in relation to VSC matured packets from the same chain. -### State Required - Per-chain data queue +_Note:_ The reason for a multiple-queue design is the _VSC Maturity and Slashing Order_ property (see [spec](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/system_model_and_properties.md#consumer-initiated-slashing)). +There are other ways to ensure such a property (like a queue of linked lists, etc.), but the proposed approach seemed to be the most understandable and easiest to implement with a KV store. -For each established consumer, there exists a queue which stores "throttled packet data". Ie. pending slash packet data is queued together with pending VSC matured packet data in FIFO ordering. Order is enforced by IBC sequence number. These "per-chain" queues are responsible for coordinating the order that slash packets are handled in relation to VSC matured packets from the same chain. +### Params -### Reasoning - Multiple queues +`SlashMeterReplenishPeriod` -- the period after which the slash meter is replenished. -For reasoning on why this feature was implemented with multiple queues, see [spec](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/system_model_and_properties.md#consumer-initiated-slashing). Specifically the section on _VSC Maturity and Slashing Order_. There are other ways to ensure such a property (like a queue of linked lists, etc.), but the implemented protocol seemed to be the most understandable and easiest to implement with a KV store. +`SlashMeterReplenishFraction` -- the portion (in range [0, 1]) of total voting power that is replenished to the slash meter when a replenishment occurs. This param also serves as a maximum fraction of total voting power that the slash meter can hold. -### Protocol Overview - OnRecvSlashPacket +`MaxThrottledPackets` -- the maximum amount of throttled slash or vsc matured packets that can be queued from a single consumer before the provider chain halts, it should be set to a large value. +This param would allow provider binaries to panic deterministically in the event that packet throttling results in a large amount of state-bloat. In such a scenario, packet throttling could prevent a violation of safety caused by a malicious consumer, at the cost of provider liveness. + +### Protocol Overview + +#### OnRecvSlashPacket Upon the provider receiving a slash packet from any of the established consumers during block execution, two things occur: 1. A global slash entry is queued. 2. The data of such a packet is added to the per-chain queue. -### Protocol Overview - OnRecvVSCMaturedPacket +#### OnRecvVSCMaturedPacket Upon the provider receiving a VSCMatured packet from any of the established consumers during block execution, the VSCMatured packet data is added to the per-chain queue. -### Endblocker Step 1 - Slash Meter Replenishment +#### Endblocker + +In the `EndBlock` of the provider CCV module, there are three actions performed: + +- replenish the slash meter; +- handle the leading `VSCMaturedPackets`; +- and handle the throttle queues. + +##### Slash Meter Replenishment -Once the slash meter becomes not full, it'll be replenished after `SlashMeterReplenishPeriod (param)` by incrementing the meter with its allowance for the replenishment block, where `allowance` = `SlashMeterReplenishFraction (param)` * `currentTotalVotingPower`. The slash meter will never exceed its current allowance (fn of the total voting power for the block) in value. Note a few things: +Once the slash meter becomes not full, it'll be replenished after `SlashMeterReplenishPeriod` by incrementing the meter with its allowance for the replenishment block, where `allowance` = `SlashMeterReplenishFraction` * `currentTotalVotingPower`. +The slash meter will never exceed its current allowance (function of the total voting power for the block) in value. -1. The slash meter can go negative in value, and will do so when handling a single slash packet that jails a validator with significant voting power. In such a scenario, the slash meter may take multiple replenishment periods to once again reach a positive value (or 0), meaning no other slash packets may be handled for multiple replenishment periods. -2. Total voting power of a chain changes over time, especially as validators are jailed. As validators are jailed, total voting power decreases, and so does the jailing allowance. See below for more detailed throttling property discussion. -3. The voting power allowance added to the slash meter during replenishment will always be greater than or equal to 1. If the `SlashMeterReplenishFraction (param)` is set too low, integer rounding will put this minimum value into effect. That is, if `SlashMeterReplenishFraction` * `currentTotalVotingPower` < 1, then the effective allowance would be 1. This min value of allowance ensures that there's some packets handled over time, even if that is a very long time. It's a crude solution to an edge case caused by too small of a replenishment fraction. +Note a few things: -The behavior described above is achieved by executing `CheckForSlashMeterReplenishment()` every endblock, BEFORE `HandleThrottleQueues()` is executed. +1. The slash meter can go negative in value, and will do so when handling a single slash packet that jails a validator with significant voting power. + In such a scenario, the slash meter may take multiple replenishment periods to once again reach a positive value (or 0), meaning no other slash packets may be handled for multiple replenishment periods. +2. Total voting power of a chain changes over time, especially as validators are jailed. + As validators are jailed, total voting power decreases, and so does the jailing allowance. + See below for more detailed throttling property discussion. +3. The voting power allowance added to the slash meter during replenishment will always be greater than or equal to 1. + If the `SlashMeterReplenishFraction` is set too low, integer rounding will put this minimum value into effect. + That is, if `SlashMeterReplenishFraction` * `currentTotalVotingPower` < 1, then the effective allowance would be 1. + This min value of allowance ensures that there's some packets handled over time, even if that is a very long time. + It's a crude solution to an edge case caused by too small of a replenishment fraction. -### Endblocker Step 2 - HandleLeadingVSCMaturedPackets +The behavior described above is achieved by executing `CheckForSlashMeterReplenishment()` every `EndBlock`, BEFORE `HandleThrottleQueues()` is executed. -Every block it is possible that VSCMatured packet data was queued before any slash packet data. Since this "leading" VSCMatured packet data does not have to be throttled (see _VSC Maturity and Slashing Order_), we can handle all VSCMatured packet data at the head of the queue, before the any throttling or packet data handling logic executes. +##### Handle Leading VSCMaturedPackets -### Endblocker Step 3 - HandleThrottleQueues +In every block, it is possible that `VSCMaturedPacket` data was queued before any slash packet data. +Since this "leading" VSCMatured packet data does not have to be throttled (see _VSC Maturity and Slashing Order_), we can handle all VSCMatured packet data at the head of the queue, before the any throttling or packet data handling logic executes. -Every endblocker the following pseudo-code is executed to handle data from the throttle queues. +##### Handle Throttle Queues + +In every `EndBlock`, the following logic is executed to handle data from the throttle queues. ```typescript meter := getSlashMeter() @@ -91,55 +131,59 @@ while meter.IsPositiveOrZero() && entriesExist() { ### System Properties -All CCV system properties should be maintained by implementing this feature, see: [CCV spec - Consumer Initiated Slashing](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/system_model_and_properties.md#consumer-initiated-slashing). +All CCV system properties should be maintained by implementing this feature, see [CCV spec - Consumer Initiated Slashing](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/system_model_and_properties.md#consumer-initiated-slashing). -One implementation-specific property introduced is that if any of the chain-specific packet data queues become larger than `MaxThrottledPackets (param)`, then the provider binary will panic, and the provider chain will halt. Therefore this param should be set carefully. See `SetThrottledPacketDataSize`. This behavior ensures that if the provider binaries are queuing up more packet data than machines can handle, the provider chain halts deterministically between validators. +One implementation-specific property introduced is that if any of the chain-specific packet data queues become larger than `MaxThrottledPackets`, then the provider binary will panic, and the provider chain will halt. +Therefore this param should be set carefully. See `SetThrottledPacketDataSize`. +This behavior ensures that if the provider binaries are queuing up more packet data than machines can handle, the provider chain halts deterministically between validators. ### Main Throttling Property Using on-chain params and the sub protocol defined, slash packet throttling is implemented such that the following property holds under some conditions. -First, we define the following: +First, we introduce the following definitions: * A consumer initiated slash attack "starts" when the first slash packet from such an attack is received by the provider. * The "initial validator set" for the attack is the validator set that existed on the provider when the attack started. -* There is a list of honest validators s.t if they are jailed, `X`% of the initial validator set will be jailed. +* There is a list of honest validators such that if they are jailed, `X`% of the initial validator set will be jailed. -For the following property to hold, these assumptions must be true: +For the Throttling Property to hold, the following assumptions must be true: 1. We assume the total voting power of the chain (as a function of delegations) does not increase over the course of the attack. 2. No validator has more than `SlashMeterReplenishFraction` of total voting power on the provider. -3. `SlashMeterReplenishFraction` is large enough that `SlashMeterReplenishFraction` * `currentTotalVotingPower` > 1. Ie. the replenish fraction is set high enough that we can ignore the effects of rounding. +3. `SlashMeterReplenishFraction` is large enough that `SlashMeterReplenishFraction` * `currentTotalVotingPower` > 1, + i.e., the replenish fraction is set high enough that we can ignore the effects of rounding. 4. `SlashMeterReplenishPeriod` is sufficiently longer than the time it takes to produce a block. _Note if these assumptions do not hold, throttling will still slow down the described attack in most cases, just not in a way that can be succinctly described. It's possible that more complex properties can be defined._ -Property: - -> The time it takes to jail/tombstone `X`% of the initial validator set will be greater than or equal to `(X * SlashMeterReplenishPeriod / SlashMeterReplenishFraction) - 2 * SlashMeterReplenishPeriod` - -Intuition: - -Let's use the following notation: - -* $C$: Number of replenishment cycles -* $P$: $\text{SlashMeterReplenishPeriod}$ -* $F$: $\text{SlashMeterReplenishFraction}$ -* $V_{\mathit{max}}$: Max power of a validator as a fraction of total voting power - -In $C$ number of replenishment cycles, the fraction of total voting power that can be removed, $a$, is $a \leq F \cdot C + V_{\mathit{max}}$ (where $V_{\mathit{max}}$ is there to account for the power fraction of the last validator removed, one which pushes the meter to the negative value). - -So, we need at least $C \geq \frac{a - V_{\mathit{max}}}{F}$ cycles to remove $a$ fraction of the total voting power. - -Since we defined the start of the attack to be the moment when the first slash request arrives, then $F$ fraction of the initial validator set can be jailed immediately. For the remaining $X - F$ fraction of the initial validator set to be jailed, it takes at least $C \geq \frac{(X - F) - V_{\mathit{max}}}{F}$ cycles. Using the assumption that $V_{\mathit{max}} \leq F$ (assumption 2), we get $C \geq \frac{X - 2F}{F}$ cycles. - -In order to execute $C$ cycles, we need $C \cdot P$ time. - -Thus, jailing the remaining $X - F$ fraction of the initial validator set corresponds to $\frac{P \cdot (X - 2F)}{F}$ time. - -In other words, the attack must take at least $\frac{P \cdot X}{F} - 2P$ time (in the units of replenish period $P$). - -This property is useful because it allows us to reason about the time it takes to jail a certain percentage of the initial provider validator set from consumer initiated slash requests. For example, if `SlashMeterReplenishFraction` is set to 0.06, then it takes no less than 4 replenishment periods to jail 33% of the initial provider validator set on the Cosmos Hub. Note that as of writing this on 11/29/22, the Cosmos Hub does not have a validator with more than 6% of total voting power. +**Throttling Property**: The time it takes to jail/tombstone `X`% of the initial validator set will be greater than or equal to +$\mathit{SlashMeterReplenishPeriod} \cdot \frac{X}{\mathit{SlashMeterReplenishFraction}} - 2 \cdot \mathit{SlashMeterReplenishPeriod}$. + +> **Intuition** +> +> Let's use the following notation: +> +> * $C$: Number of replenishment cycles +> * $P$: $\mathit{SlashMeterReplenishPeriod}$ +> * $F$: $\mathit{SlashMeterReplenishFraction}$ +> * $V_{\mathit{max}}$: Max power of a validator as a fraction of total voting power +> +> In $C$ number of replenishment cycles, the fraction of total voting power that can be removed, $a$, is $a \leq F \cdot C + V_{\mathit{max}}$ (where $V_{\mathit{max}}$ is there to account for the power fraction of the last validator removed, one which pushes the meter to the negative value). +> +> So, we need at least $C \geq \frac{a - V_{\mathit{max}}}{F}$ cycles to remove $a$ fraction of the total voting power. +> +> Since we defined the start of the attack to be the moment when the first slash request arrives, then $F$ fraction of the initial validator set can be jailed immediately. For the remaining $X - F$ fraction of the initial validator set to be jailed, it takes at least $C \geq \frac{(X - F) - V_{\mathit{max}}}{F}$ cycles. Using the assumption that $V_{\mathit{max}} \leq F$ (assumption 2), we get $C \geq \frac{X - 2F}{F}$ cycles. +> +> In order to execute $C$ cycles, we need $C \cdot P$ time. +> +> Thus, jailing the remaining $X - F$ fraction of the initial validator set corresponds to $\frac{P \cdot (X - 2F)}{F}$ time. +> +> In other words, the attack must take at least $\frac{P \cdot X}{F} - 2P$ time (in the units of replenish period $P$). + +This property is useful because it allows us to reason about the time it takes to jail a certain percentage of the initial provider validator set from consumer initiated slash requests. +For example, if `SlashMeterReplenishFraction` is set to `0.06`, then it takes no less than 4 replenishment periods to jail 33% of the initial provider validator set on the Cosmos Hub. +Note that as of writing this on 11/29/22, the Cosmos Hub does not have a validator with more than 6% of total voting power. Note also that 4 replenishment period is a worst case scenario that depends on well crafted attack timings. @@ -160,7 +204,9 @@ In summary, the throttling mechanism as designed has desirable properties whethe ### Negative -* Throttling introduces a vector for a malicious consumer chain to halt the provider, see issue below. However, this is sacrificing liveness in a edge case scenario for the sake of security. As an improvement, [using retries](https://github.com/cosmos/interchain-security/issues/713) would fully prevent this attack vector. +* Throttling introduces a vector for a malicious consumer chain to halt the provider, see issue below. + However, this is sacrificing liveness in a edge case scenario for the sake of security. + As an improvement, [using retries](https://github.com/cosmos/interchain-security/issues/713) would fully prevent this attack vector. ### Neutral diff --git a/docs/docs/adrs/adr-008-throttle-retries.md b/docs/docs/adrs/adr-008-throttle-retries.md index 1faf7bd7ee..7c5015d512 100644 --- a/docs/docs/adrs/adr-008-throttle-retries.md +++ b/docs/docs/adrs/adr-008-throttle-retries.md @@ -21,44 +21,66 @@ For context on why the throttling mechanism exists, see [ADR 002](./adr-002-thro Note the terms slash throttling and jail throttling are synonymous, since in replicated security a `SlashPacket` simply jails a validator for downtime infractions. -Currently the throttling mechanism is designed so that provider logic (slash meter, etc.) dictates how many slash packets can be handled over time. Throttled slash packets are persisted on the provider, leading to multiple possible issues. Namely: +Currently the throttling mechanism is designed so that provider logic (slash meter, etc.) dictates how many `SlashPackets` can be handled over time. +Throttled `SlashPackets` are persisted on the provider, leading to multiple possible issues. Namely: -* If slash or vsc matured packets are actually throttled/queued on the provider, state can grow and potentially lead to a DoS attack. We have short term solutions around this, but overall they come with their own weaknesses. See [#594](https://github.com/cosmos/interchain-security/issues/594). -* If a jailing attack described in [ADR 002](adr-002-throttle.md) were actually to be carried out with the current throttling design, we'd likely have to halt the provider, and perform an emergency upgrade and/or migration to clear the queues of slash packets that were deemed to be malicious. Alternatively, validators would just have to _tough it out_ and wait for the queues to clear, during which all/most validators would be jailed. Right after being jailed, vals would have to unjail themselves promptly to ensure safety. The synchronous coordination required to maintain safety in such a scenario is not ideal. +* If `SlashPackets` or `VSCMaturedPackets` are actually throttled/queued on the provider, state can grow and potentially lead to a DoS attack. + We have short term solutions around this, but overall they come with their own weaknesses. + See [#594](https://github.com/cosmos/interchain-security/issues/594). +* If a jailing attack described in [ADR 002](adr-002-throttle.md) were actually to be carried out with the current throttling design, we'd likely have to halt the provider, and perform an emergency upgrade and/or migration to clear the queues of `SlashPackets` that were deemed to be malicious. + Alternatively, validators would just have to _tough it out_ and wait for the queues to clear, during which all/most validators would be jailed. + Right after being jailed, validators would have to unjail themselves promptly to ensure safety. + The coordination required to maintain safety in such a scenario is not ideal. -So what's the solution? We can improve the throttling mechanism to instead queue/persist relevant data on each consumer, and have consumers retry slash requests as needed. +As as solution, we can improve the throttling mechanism to instead queue/persist relevant data on each consumer, and have consumers retry slash requests as needed. ## Decision ### Consumer changes -Note the consumer already queues up both slash and vsc matured packets via `AppendPendingPacket`. Those packets are dequeued every endblock in `SendPackets` and sent to the provider. +Note the consumer already queues up both `SlashPackets` and `VSCMaturedPackets` via `AppendPendingPacket`. +Those packets are dequeued in every `EndBlock` in `SendPackets` and sent to the provider. -Instead, we will now introduce the following logic on endblock: +Instead, we will now introduce the following logic on `EndBlock`: -* Slash packets will always be sent to the provider once they're at the head of the queue. However, once sent, the consumer will not send any trailing vsc matured packets from the queue until the provider responds with an ack that the slash packet has been handled (ie. val was jailed). That is, slash packets block the sending of trailing vsc matured packets in the consumer queue. -* If two slash packets are at the head of the queue, the consumer will send the first slash packet, and then wait for a success ack from the provider before sending the second slash packet. This seems like it'd simplify implementation. -* VSC matured packets at the head of the queue (ie. NOT trailing a slash packet) can be sent immediately, and do not block any other packets in the queue, since the provider always handles them immediately. +* Slash packets will always be sent to the provider once they're at the head of the queue. + However, once sent, the consumer will not send any subsequent `VSCMaturedPackets` from the queue until the provider responds with an acknowledgement that the sent `SlashPacket` has been handled, i.e., validator was jailed. + That is, `SlashPackets` block the sending of subsequent `VSCMaturedPackets` in the consumer queue. +* If two `SlashPackets` are at the head of the queue, the consumer will send the first `SlashPacket`, and then wait for a success acknowledgement from the provider before sending the second `SlashPacket`. + This seems like it'd simplify implementation. +* `VSCMaturedPackets` at the head of the queue (i.e., NOT following a `SlashPacket`) can be sent immediately, and do not block any other packets in the queue, since the provider always handles them immediately. -To prevent the provider from having to keep track of what slash packets have been rejected, the consumer will have to retry the sending of slash packets over some period of time. This can be achieved with an on-chain consumer param. The suggested param value would probably be 1/2 of the provider's `SlashMeterReplenishmentPeriod`, although it doesn't matter too much as long as the param value is sane. +To prevent the provider from having to keep track of what `SlashPackets` have been rejected, the consumer will have to retry the sending of `SlashPackets` over some period of time. +This can be achieved with an on-chain consumer param, i.e., `RetryDelayPeriod`. +To reduce the amount of redundant re-sends, we recommend setting `RetryDelayPeriod ~ SlashMeterReplenishmentPeriod`, i.e., waiting for the provider slash meter to be replenished before resending the rejected `SlashPacket`. -Note to prevent weird edge case behavior, a retry would not be attempted until either a success ack or failure ack has been recv from the provider. +Note to prevent weird edge case behavior, a retry would not be attempted until either a success or failure acknowledgement has been received from the provider. -With the behavior described, we maintain very similar behavior to the current throttling mechanism regarding the timing that slash and vsc matured packets are handled on the provider. Obviously the queueing and blocking logic is moved, and the two chains would have to send more messages between one another (only in the case the throttling mechanism is triggered). +With the behavior described, we maintain very similar behavior to the previous throttling mechanism regarding the timing that `SlashPackets` and `VSCMaturedPackets` are handled on the provider. +Obviously the queueing and blocking logic is moved, and the two chains would have to send more messages between one another (only in the case the throttling mechanism is triggered). -In the normal case, when no or a few slash packets are being sent, the VSCMaturedPackets will not be delayed, and hence unbonding will not be delayed. +In the normal case, when no or a few `SlashPackets` are being sent, the `VSCMaturedPackets` will not be delayed, and hence unbonding will not be delayed. -For implementation of this design, see [throttle_retry.go](../../../x/ccv/consumer/keeper/throttle_retry.go). +For the implementation of this design, see [throttle_retry.go](https://github.com/cosmos/interchain-security/blob/fec3eccad59416cbdb6844e279f59e3f81242888/x/ccv/consumer/keeper/throttle_retry.go). -### Consumer pending packets storage optimization +#### Consumer pending packets storage optimization -In addition to the mentioned consumer changes above. An optimization will need to be made to the consumer's pending packets storage to properly implement the feature from this ADR. +In addition to the mentioned consumer changes, an optimization will need to be made to the consumer's pending packets storage to properly implement the feature from this ADR. -The consumer ccv module previously queued "pending packets" to be sent on each endblocker in [SendPackets](https://github.com/cosmos/interchain-security/blob/3bc4e7135066d848aac60b0787364c07157fd36d/x/ccv/consumer/keeper/relay.go#L178). These packets are queued in state with a protobuf list of `ConsumerPacketData`. For a single append operation, the entire list is deserialized, then a packet is appended to that list, and the list is serialized again. See older version of [AppendPendingPacket](https://github.com/cosmos/interchain-security/blob/05c2dae7c6372b1252b9e97215d07c6aa7618f33/x/ccv/consumer/keeper/keeper.go#L606). That is, a single append operation has O(N) complexity, where N is the size of the list. +The consumer ccv module previously queued "pending packets" to be sent in each `EndBlock` in [SendPackets](https://github.com/cosmos/interchain-security/blob/3bc4e7135066d848aac60b0787364c07157fd36d/x/ccv/consumer/keeper/relay.go#L178). +These packets are queued in state with a protobuf list of `ConsumerPacketData`. +For a single append operation, the entire list is deserialized, then a packet is appended to that list, and the list is serialized again. +See older version of [AppendPendingPacket](https://github.com/cosmos/interchain-security/blob/05c2dae7c6372b1252b9e97215d07c6aa7618f33/x/ccv/consumer/keeper/keeper.go#L606). +That is, a single append operation has O(N) complexity, where N is the size of the list. -This poor append performance isn't a problem when the pending packets list is small. But with this ADR being implemented, the pending packets list could potentially grow to the order of thousands of entries, in the scenario that a slash packet is bouncing. +This poor append performance isn't a problem when the pending packets list is small. +But with this ADR being implemented, the pending packets list could potentially grow to the order of thousands of entries when `SlashPackets` need to be resent. -We can improve the append time for this queue by converting it from a protobuf-esq list, to a queue implemented with sdk-esq code. The idea is to persist an uint64 index that will be incremented each time you queue up a packet. You can think of this as storing the tail of the queue. Then, packet data will be keyed by that index, making the data naturally ordered byte-wise for sdk's iterator. The index will also be stored in the packet data value bytes, so that the index can later be used to delete certain packets from the queue. +We can improve the append time for this queue by converting it from a protobuf-esq list, to a queue implemented with sdk-esq code. +The idea is to persist an uint64 index that will be incremented each time you queue up a packet. +You can think of this as storing the tail of the queue. +Then, packet data will be keyed by that index, making the data naturally ordered byte-wise for sdk's iterator. +The index will also be stored in the packet data value bytes, so that the index can later be used to delete certain packets from the queue. Two things are achieved with this approach: @@ -67,33 +89,44 @@ Two things are achieved with this approach: ### Provider changes -The main change needed for the provider is the removal of queuing logic for slash and vsc matured packets upon being received. +The main change needed for the provider is the removal of queuing logic for `SlashPackets` and `VSCMaturedPackets` upon being received. -Instead, the provider will consult the slash meter to determine if a slash packet can be handled immediately. If not, the provider will return an ack message to the consumer communicating that the slash packet could not be handled, and needs to be sent again in the future (retried). +Instead, the provider will consult the slash meter to determine if a `SlashPacket` can be handled immediately. +If not, the provider will return an acknowledgement message to the consumer communicating that the `SlashPacket` could not be handled, and needs to be sent again in the future (retried). -VSCMatured packets will always be handled immediately upon being received by the provider. +`VSCMaturedPackets` will always be handled immediately upon being received by the provider. Note [spec](https://github.com/cosmos/ibc/blob/main/spec/app/ics-028-cross-chain-validation/system_model_and_properties.md#consumer-initiated-slashing). Specifically the section on _VSC Maturity and Slashing Order_. Previously the onus was on the provider to maintain this property via queuing packets and handling them FIFO. -Now this property will be maintained by the consumer sending packets in the correct order, and blocking the sending of VSCMatured packets as needed. Then, the ordered IBC channel will ensure that Slash/VSCMatured packets are received in the correct order on the provider. +Now this property will be maintained by the consumer sending packets in the correct order, and blocking the sending of `VSCMaturedPackets` as needed. Then, the ordered IBC channel will ensure that `SlashPackets` and `VSCMaturedPackets` are received in the correct order on the provider. -The provider's main responsibility regarding throttling will now be to determine if a recv slash packet can be handled via slash meter etc., and appropriately ack to the sending consumer. +The provider's main responsibility regarding throttling will now be to determine if a received `SlashPacket` can be handled via slash meter etc., and appropriately acknowledge to the sending consumer. -### Why the provider can handle VSCMatured packets immediately +#### Handling `VSCMaturedPackets` immediately -First we answer, what does a VSCMatured packet communicate to the provider? A VSCMatured packet communicates that a VSC has been applied to a consumer long enough that infractions committed on the consumer could have been submitted. +#### Why the provider can handle VSCMatured packets immediately -If the consumer is following the queuing/blocking protocol described. No bad behavior occurs, `VSC Maturity and Slashing Order` property is maintained. +A `VSCMaturedPacket` communicates to the provider that sufficient time passed on the consumer since the corresponding `VSCPacket` has been applied (on the consumer) such that infractions committed on the consumer could have been submitted. -If a consumer sends VSCMatured packets too leniently: The consumer is malicious and sending duplicate vsc matured packets, or sending the packets sooner than the ccv protocol specifies. In this scenario, the provider needs to handle vsc matured packets immediately to prevent DOS, state bloat, or other issues. The only possible negative outcome is that the malicious consumer may not be able to jail a validator who should have been jailed. The malicious behavior only creates a negative outcome for the chain that is being malicious. +If the consumer is following the queuing/blocking protocol described, then no bad behavior occurs and the _VSC Maturity and Slashing Order_ property is maintained. -If a consumer blocks the sending of VSCMatured packets: The consumer is malicious and blocking vsc matured packets that should have been sent. This will block unbonding only up until the VSC timeout period has elapsed. At that time, the consumer is removed. Again the malicious behavior only creates a negative outcome for the chain that is being malicious. +If a consumer sends `VSCMaturedPackets` too leniently -- the consumer is malicious and sends duplicate `VSCMaturedPackets`, or sends the packets sooner than the CCV protocol specifies -- then the provider needs to handle `VSCMaturedPackets` immediately to prevent DOS, state bloat, or other issues. +The only possible negative outcome is that the malicious consumer may not be able to jail a validator who should have been jailed. +The malicious behavior only creates a negative outcome for the consumer chain that is being malicious. + +If a consumer blocks the sending of `VSCMaturedPackets`, then unbonding operations on the provider will be delayed, but only until the VSC timeout period has elapsed. +At that time, the consumer is removed. +Again the malicious behavior only creates a negative outcome for the consumer chain that is being malicious. ### Splitting of PRs and Upgrade Order -This feature will implement consumer changes in [#1024](https://github.com/cosmos/interchain-security/pull/1024). Note these changes should be deployed to prod for all consumers before the provider changes are deployed to prod. That is the consumer changes in #1024 are compatible with the current ("v1") provider implementation of throttling that's running on the Cosmos Hub as of July 2023. +This feature will implement consumer changes in [#1024](https://github.com/cosmos/interchain-security/pull/1024). + +❗***These changes should be deployed to production for all consumers before the provider changes are deployed to production.*** + +In other words, the consumer changes in [#1024](https://github.com/cosmos/interchain-security/pull/1024) are compatible with the current ("v1") provider implementation of throttling that's running on the Cosmos Hub as of July 2023. -Once all consumers have deployed the changes in #1024, the provider changes from (TBD) can be deployed to prod, fully enabling v2 throttling. +Once all consumers have deployed the changes in #1024, the provider changes from [#1321](https://github.com/cosmos/interchain-security/pull/1321) can be deployed to production, fully enabling v2 throttling. ## Consequences @@ -101,12 +134,13 @@ Once all consumers have deployed the changes in #1024, the provider changes from * Consumers still aren't trustless, but the provider is now less susceptible to mismanaged or malicious consumers. * Recovering from the "jailing attack" is more elegant. * Some issues like [#1001](https://github.com/cosmos/interchain-security/issues/1001) will now be handled implicitly by the improved throttling mechanism. -* Slash and vsc matured packets can be handled immediately once recv by the provider if the slash meter allows. -* In general, we reduce the amount of computation that happens in the provider end-blocker. +* `SlashPackets` and `VSCMaturedPackets` can be handled immediately once received by the provider if the slash meter allows. +* In general, we reduce the amount of computation that happens in the provider `EndBlock`. ### Positive -* We no longer have to reason about a "global queue" and a "chain specific queue", and keeping those all in-sync. Now slash and vsc matured packet queuing is handled on each consumer individually. +* We no longer have to reason about a "global queue" and a "chain specific queue", and keeping those all in-sync. + Now `SlashPackets` and `VSCMaturedPackets` queuing is handled on each consumer individually. * Due to the above, the throttling protocol becomes less complex overall. * We no longer have to worry about throttle related DoS attack on the provider, since no queuing exists on the provider. @@ -117,7 +151,7 @@ Once all consumers have deployed the changes in #1024, the provider changes from ### Neutral -* Core throttling logic on the provider remains unchanged, ie. slash meter, replenishment cycles, etc. +* Core throttling logic on the provider remains unchanged, i.e., slash meter, replenishment cycles, etc. ## References diff --git a/docs/docs/introduction/params.md b/docs/docs/introduction/params.md index 9f68cc706c..9b89a1d038 100644 --- a/docs/docs/introduction/params.md +++ b/docs/docs/introduction/params.md @@ -14,10 +14,10 @@ The parameters necessary for Interchain Security (ICS) are defined in ICS relies on the following time-based parameters. ### ProviderUnbondingPeriod -is the unbonding period on the provider chain as configured during chain genesis. This parameter can later be changed via governance. +`ProviderUnbondingPeriod` is the unbonding period on the provider chain as configured during chain genesis. This parameter can later be changed via governance. ### ConsumerUnbondingPeriod -is the unbonding period on the consumer chain. +`ConsumerUnbondingPeriod` is the unbonding period on the consumer chain. :::info `ConsumerUnbondingPeriod` is set via the `ConsumerAdditionProposal` governance proposal to add a new consumer chain. @@ -34,7 +34,7 @@ Unbonding operations (such as undelegations) are completed on the provider only ### TrustingPeriodFraction -is used to calculate the `TrustingPeriod` of created IBC clients on both provider and consumer chains. +`TrustingPeriodFraction` is used to calculate the `TrustingPeriod` of created IBC clients on both provider and consumer chains. Setting `TrustingPeriodFraction` to `0.5` would result in the following: @@ -49,7 +49,7 @@ Note that a light clients must be updated within the `TrustingPeriod` in order t For more details, see the [IBC specification of Tendermint clients](https://github.com/cosmos/ibc/blob/main/spec/client/ics-007-tendermint-client/README.md). ### CCVTimeoutPeriod -is the period used to compute the timeout timestamp when sending IBC packets. +`CCVTimeoutPeriod` is the period used to compute the timeout timestamp when sending IBC packets. For more details, see the [IBC specification of Channel & Packet Semantics](https://github.com/cosmos/ibc/blob/main/spec/core/ics-004-channel-and-packet-semantics/README.md#sending-packets). @@ -62,14 +62,14 @@ CCVTimeoutPeriod may have different values on the provider and consumer chains. - `CCVTimeoutPeriod` on the consumer is initial set via the `ConsumerAdditionProposal` ### InitTimeoutPeriod -is the maximum allowed duration for CCV channel initialization to execute. +`InitTimeoutPeriod` is the maximum allowed duration for CCV channel initialization to execute. For any consumer chain, if the CCV channel is not established within `InitTimeoutPeriod` then the consumer chain will be removed and therefore will not be secured by the provider chain. The countdown starts when the `spawn_time` specified in the `ConsumerAdditionProposal` is reached. -### `VscTimeoutPeriod` -is the provider-side param that enables the provider to timeout VSC packets even when a consumer chain is not live. +### VscTimeoutPeriod +`VscTimeoutPeriod` is the provider-side param that enables the provider to timeout VSC packets even when a consumer chain is not live. If the `VscTimeoutPeriod` is ever reached for a consumer chain that chain will be considered not live and removed from interchain security. :::tip @@ -77,10 +77,10 @@ If the `VscTimeoutPeriod` is ever reached for a consumer chain that chain will b ::: ### BlocksPerDistributionTransmission -is the number of blocks between rewards transfers from the consumer to the provider. +`BlocksPerDistributionTransmission` is the number of blocks between rewards transfers from the consumer to the provider. ### TransferPeriodTimeout -is the period used to compute the timeout timestamp when sending IBC transfer packets from a consumer to the provider. +`TransferPeriodTimeout` is the period used to compute the timeout timestamp when sending IBC transfer packets from a consumer to the provider. If this timeout expires, then the transfer is attempted again after `BlocksPerDistributionTransmission` blocks. - `TransferPeriodTimeout` on the consumer is initial set via the `ConsumerAdditionProposal` gov proposal to add the consumer @@ -90,16 +90,26 @@ If this timeout expires, then the transfer is attempted again after `BlocksPerDi ## Slash Throttle Parameters ### SlashMeterReplenishPeriod -exists on the provider such that once the slash meter becomes not-full, the slash meter is replenished after this period has elapsed. +`SlashMeterReplenishPeriod` exists on the provider such that once the slash meter becomes not-full, the slash meter is replenished after this period has elapsed. The meter is replenished to an amount equal to the slash meter allowance for that block, or `SlashMeterReplenishFraction * CurrentTotalVotingPower`. ### SlashMeterReplenishFraction -exists on the provider as the portion (in range [0, 1]) of total voting power that is replenished to the slash meter when a replenishment occurs. +`SlashMeterReplenishFraction` exists on the provider as the portion (in range [0, 1]) of total voting power that is replenished to the slash meter when a replenishment occurs. This param also serves as a maximum fraction of total voting power that the slash meter can hold. The param is set/persisted as a string, and converted to a `sdk.Dec` when used. ### MaxThrottledPackets -exists on the provider as the maximum amount of throttled slash or vsc matured packets that can be queued from a single consumer before the provider chain halts, it should be set to a large value. + +`MaxThrottledPackets` exists on the provider as the maximum amount of throttled slash or vsc matured packets that can be queued from a single consumer before the provider chain halts, it should be set to a large value. This param would allow provider binaries to panic deterministically in the event that packet throttling results in a large amount of state-bloat. In such a scenario, packet throttling could prevent a violation of safety caused by a malicious consumer, at the cost of provider liveness. + +:::info +`MaxThrottledPackets` was deprecated in ICS versions >= v3.2.0 due to the implementation of [ADR-008](../adrs/adr-008-throttle-retries.md). +::: + +### RetryDelayPeriod + +`RetryDelayPeriod` exists on the consumer for **ICS versions >= v3.2.0** (introduced by the implementation of [ADR-008](../adrs/adr-008-throttle-retries.md)) and is the period at which the consumer retries to send a `SlashPacket` that was rejected by the provider. + diff --git a/tests/integration/throttle.go b/tests/integration/throttle.go index 71ee3f606b..f186077a07 100644 --- a/tests/integration/throttle.go +++ b/tests/integration/throttle.go @@ -30,10 +30,34 @@ func (s *CCVTestSuite) TestBasicSlashPacketThrottling() { expectedAllowanceAfterFirstSlash int64 expectedReplenishesTillPositive int }{ - {"0.2", 800, -200, 600, 1}, - {"0.1", 400, -600, 300, 3}, // 600/300 = 2, so 3 replenishes to reach positive - {"0.05", 200, -800, 150, 6}, - {"0.01", 40, -960, 30, 33}, // 960/30 = 32, so 33 replenishes to reach positive + { + "0.2", + 800, // replenishFraction * totalPower: 0.2 * 4000 + -200, // expectedMeterBeforeFirstSlash - power(V0): 800 - 1000 + 600, // replenishFraction * newTotalPower: 0.2 * 3000 + 1, // ceil((200+1)/600) + }, + { + "0.1", + 400, // replenishFraction * totalPower: 0.1 * 4000 + -600, // expectedMeterBeforeFirstSlash - power(V0): 400 - 1000 + 300, // replenishFraction * newTotalPower: 0.1 * 3000 + 3, // ceil((600+1)/300) + }, + { + "0.05", + 200, // replenishFraction * totalPower: 0.05 * 4000 + -800, // expectedMeterBeforeFirstSlash - power(V0): 200 - 1000 + 150, // replenishFraction * newTotalPower: 0.05 * 3000 + 6, // ceil((800+1)/150) + }, + { + "0.01", + 40, // replenishFraction * totalPower: 0.01 * 4000 + -960, // expectedMeterBeforeFirstSlash - power(V0): 40 - 1000 + 30, // replenishFraction * newTotalPower: 0.01 * 3000 + 33, // ceil((960+1)/30) + }, } for _, tc := range testCases { @@ -209,10 +233,10 @@ func (s *CCVTestSuite) TestMultiConsumerSlashPacketThrottling() { s.confirmValidatorJailed(valsToSlash[0], true) // Packets were bounced for the second and third consumers. - s.confirmValidatorNotJailed(valsToSlash[1], 1000) + s.confirmValidatorNotJailed(valsToSlash[1], 1000) // each validator has 1000 power from the setup s.confirmValidatorNotJailed(valsToSlash[2], 1000) - // Total power is now 3000 + // Total power is now 3000 (as one validator was jailed) s.Require().Equal(int64(3000), providerStakingKeeper.GetLastTotalPower(s.providerCtx()).Int64())