Skip to content

Commit

Permalink
ra: wait for validations on clean shutdown (#7854)
Browse files Browse the repository at this point in the history
This reduces the number of validations that get left indefinitely in
"pending" state.

Rename `DrainFinalize()` to `Drain()` to indicate that it now covers
more cases than just finalize.
  • Loading branch information
jsha authored Dec 2, 2024
1 parent d64132e commit 5cdfa3e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cmd/boulder-ra/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ func main() {
apc,
issuerCerts,
)
defer rai.DrainFinalize()
defer rai.Drain()

policyErr := rai.LoadRateLimitPoliciesFile(c.RA.RateLimitPoliciesFilename)
cmd.FailOnError(policyErr, "Couldn't load rate limit policies file")
Expand Down
20 changes: 15 additions & 5 deletions ra/ra.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ type RegistrationAuthorityImpl struct {
maxNames int
orderLifetime time.Duration
finalizeTimeout time.Duration
finalizeWG sync.WaitGroup
drainWG sync.WaitGroup

issuersByNameID map[issuance.NameID]*issuance.Certificate
purger akamaipb.AkamaiPurgerClient
Expand Down Expand Up @@ -1010,15 +1010,15 @@ func (ra *RegistrationAuthorityImpl) FinalizeOrder(ctx context.Context, req *rap
//
// We track this goroutine's lifetime in a waitgroup global to this RA, so
// that it can wait for all goroutines to drain during shutdown.
ra.finalizeWG.Add(1)
ra.drainWG.Add(1)
go func() {
_, err := ra.issueCertificateOuter(ctx, proto.Clone(order).(*corepb.Order), csr, logEvent)
if err != nil {
// We only log here, because this is in a background goroutine with
// no parent goroutine waiting for it to receive the error.
ra.log.AuditErrf("Asynchronous finalization failed: %s", err.Error())
}
ra.finalizeWG.Done()
ra.drainWG.Done()
}()
return order, nil
} else {
Expand Down Expand Up @@ -1904,8 +1904,11 @@ func (ra *RegistrationAuthorityImpl) PerformValidation(
}

// Dispatch to the VA for service
ra.drainWG.Add(1)
vaCtx := context.Background()
go func(authz core.Authorization) {
defer ra.drainWG.Done()

// We will mutate challenges later in this goroutine to change status and
// add error, but we also return a copy of authz immediately. To avoid a
// data race, make a copy of the challenges slice here for mutation.
Expand Down Expand Up @@ -2803,6 +2806,13 @@ func (ra *RegistrationAuthorityImpl) GetAuthorization(ctx context.Context, req *
return authz, nil
}

func (ra *RegistrationAuthorityImpl) DrainFinalize() {
ra.finalizeWG.Wait()
// Drain blocks until all detached goroutines are done.
//
// The RA runs detached goroutines for challenge validation and finalization,
// so that ACME responses can be returned to the user promptly while work continues.
//
// The main goroutine should call this before exiting to avoid canceling the work
// being done in detached goroutines.
func (ra *RegistrationAuthorityImpl) Drain() {
ra.drainWG.Wait()
}

0 comments on commit 5cdfa3e

Please sign in to comment.