-
-
Notifications
You must be signed in to change notification settings - Fork 608
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ratelimits: Auto pause zombie clients #7763
Changes from all commits
d122f18
e966a9a
d6fed53
fb3a966
bdc2a13
5bd14ab
2a02e4d
03c3e71
73712f0
d860f88
542b7c5
7857899
35a6f63
6c4e2fe
97aebfc
061f498
a5ce802
8a28d9c
a207b74
f983373
49b959f
37fb882
794a9bf
9a59484
bfbc51a
5ccd589
4bce078
12bf5e5
85f21e8
a79d881
cc01553
fb547cd
30ea374
2e65dc3
4c7be8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,6 +122,7 @@ type RegistrationAuthorityImpl struct { | |
orderAges *prometheus.HistogramVec | ||
inflightFinalizes prometheus.Gauge | ||
certCSRMismatch prometheus.Counter | ||
pauseCounter *prometheus.CounterVec | ||
} | ||
|
||
var _ rapb.RegistrationAuthorityServer = (*RegistrationAuthorityImpl)(nil) | ||
|
@@ -241,6 +242,12 @@ func NewRegistrationAuthorityImpl( | |
}) | ||
stats.MustRegister(certCSRMismatch) | ||
|
||
pauseCounter := prometheus.NewCounterVec(prometheus.CounterOpts{ | ||
Name: "paused_pairs", | ||
Help: "Number of times a pause operation is performed, labeled by paused=[bool], repaused=[bool], grace=[bool]", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps it's too much for the help string here, but it would be good to document what Alternately, we could remove |
||
}, []string{"paused", "repaused", "grace"}) | ||
stats.MustRegister(pauseCounter) | ||
|
||
issuersByNameID := make(map[issuance.NameID]*issuance.Certificate) | ||
for _, issuer := range issuers { | ||
issuersByNameID[issuer.NameID()] = issuer | ||
|
@@ -276,6 +283,7 @@ func NewRegistrationAuthorityImpl( | |
orderAges: orderAges, | ||
inflightFinalizes: inflightFinalizes, | ||
certCSRMismatch: certCSRMismatch, | ||
pauseCounter: pauseCounter, | ||
} | ||
return ra | ||
} | ||
|
@@ -1810,15 +1818,17 @@ func (ra *RegistrationAuthorityImpl) recordValidation(ctx context.Context, authI | |
} | ||
|
||
// countFailedValidation increments the failed authorizations per domain per | ||
// account rate limit. There is no reason to surface errors from this function | ||
// to the Subscriber, spends against this limit are best effort. | ||
func (ra *RegistrationAuthorityImpl) countFailedValidation(ctx context.Context, regId int64, name string) { | ||
// account rate limit. If the AutomaticallyPauseZombieClients feature has been | ||
// enabled, it also increments the failed authorizations for pausing per domain | ||
// per account rate limit. There is no reason to surface errors from this | ||
Comment on lines
1820
to
+1823
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. // countFailedValidation increments the FailedAuthorizationsPerDomainPerAccount. If the AutomaticallyPauseZombieClients feature has been enabled, it also increments the FailedAuthorizationsForPausingPerDomainPerAccountTransaction rate limit |
||
// function to the Subscriber, spends against this limit are best effort. | ||
func (ra *RegistrationAuthorityImpl) countFailedValidation(ctx context.Context, regId int64, ident identifier.ACMEIdentifier) { | ||
if ra.limiter == nil || ra.txnBuilder == nil { | ||
// Limiter is disabled. | ||
return | ||
} | ||
|
||
txn, err := ra.txnBuilder.FailedAuthorizationsPerDomainPerAccountSpendOnlyTransaction(regId, name) | ||
txn, err := ra.txnBuilder.FailedAuthorizationsPerDomainPerAccountSpendOnlyTransaction(regId, ident.Value) | ||
if err != nil { | ||
ra.log.Warningf("building rate limit transaction for the %s rate limit: %s", ratelimits.FailedAuthorizationsPerDomainPerAccount, err) | ||
} | ||
|
@@ -1830,6 +1840,54 @@ func (ra *RegistrationAuthorityImpl) countFailedValidation(ctx context.Context, | |
} | ||
ra.log.Warningf("spending against the %s rate limit: %s", ratelimits.FailedAuthorizationsPerDomainPerAccount, err) | ||
} | ||
|
||
pgporada marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if features.Get().AutomaticallyPauseZombieClients { | ||
txn, err = ra.txnBuilder.FailedAuthorizationsForPausingPerDomainPerAccountTransaction(regId, ident.Value) | ||
if err != nil { | ||
ra.log.Warningf("building rate limit transaction for the %s rate limit: %s", ratelimits.FailedAuthorizationsForPausingPerDomainPerAccount, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize this is a copy of some existing code, but I think both line 1833 and this line should instead return an error. I know this function tries to avoid returning errors to the caller, but a failure to build the rate limit transaction represents some sort of internal logic error, and that should become a 500 (which helps ensure it shows up in our metrics, and gets logged in the WFE with some useful context). Also, now that there are two places within this function where we look for and discard |
||
} | ||
|
||
decision, err := ra.limiter.Spend(ctx, txn) | ||
if err != nil { | ||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { | ||
return | ||
} | ||
ra.log.Warningf("spending against the %s rate limit: %s", ratelimits.FailedAuthorizationsForPausingPerDomainPerAccount, err) | ||
} | ||
|
||
if decision.Result(ra.clk.Now()) != nil { | ||
resp, err := ra.SA.PauseIdentifiers(ctx, &sapb.PauseRequest{ | ||
RegistrationID: regId, | ||
Identifiers: []*corepb.Identifier{ | ||
{ | ||
Type: string(ident.Type), | ||
Value: ident.Value, | ||
}, | ||
}, | ||
}) | ||
if err != nil { | ||
ra.log.Warningf("failed to pause %d/%q: %s", regId, ident.Value, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's another place where we should simply return the error, and let the caller filter out |
||
} | ||
ra.pauseCounter.With(prometheus.Labels{ | ||
"paused": strconv.FormatBool(resp.Paused > 0), | ||
"repaused": strconv.FormatBool(resp.Repaused > 0), | ||
"grace": strconv.FormatBool(resp.Paused <= 0 && resp.Repaused <= 0), | ||
}).Inc() | ||
} | ||
} | ||
} | ||
|
||
// resetAccountPausingLimit resets bucket to maximum capacity for given account. | ||
// There is no reason to surface errors from this function to the Subscriber. | ||
func (ra *RegistrationAuthorityImpl) resetAccountPausingLimit(ctx context.Context, regId int64, ident identifier.ACMEIdentifier) { | ||
pgporada marked this conversation as resolved.
Show resolved
Hide resolved
|
||
bucketKey, err := ratelimits.NewRegIdDomainBucketKey(ratelimits.FailedAuthorizationsForPausingPerDomainPerAccount, regId, ident.Value) | ||
if err != nil { | ||
ra.log.Warningf("creating bucket key for regID=[%d] identifier=[%s]: %s", regId, ident.Value, err) | ||
} | ||
err = ra.limiter.Reset(ctx, bucketKey) | ||
if err != nil { | ||
ra.log.Warningf("resetting bucket for regID=[%d] identifier=[%s]: %s", regId, ident.Value, err) | ||
} | ||
} | ||
|
||
// PerformValidation initiates validation for a specific challenge associated | ||
|
@@ -1953,9 +2011,12 @@ func (ra *RegistrationAuthorityImpl) PerformValidation( | |
if prob != nil { | ||
challenge.Status = core.StatusInvalid | ||
challenge.Error = prob | ||
go ra.countFailedValidation(vaCtx, authz.RegistrationID, authz.Identifier.Value) | ||
go ra.countFailedValidation(vaCtx, authz.RegistrationID, authz.Identifier) | ||
} else { | ||
challenge.Status = core.StatusValid | ||
if features.Get().AutomaticallyPauseZombieClients { | ||
ra.resetAccountPausingLimit(vaCtx, authz.RegistrationID, authz.Identifier) | ||
} | ||
} | ||
challenge.Validated = &vStart | ||
authz.Challenges[challIndex] = *challenge | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't parse this sentence. Specifically "to automatically track limiter to be the authoritative source ..." seems to be an editing error?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Weird this looks like some kind of hybrid of what was there and what I put in my suggestion: #7763 (comment)