Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VA: Use performValidation for IsCAAValid remote checks #7850

Merged
merged 8 commits into from
Nov 28, 2024
9 changes: 1 addition & 8 deletions features/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,9 @@ type Config struct {

// EnforceMultiCAA causes the VA to kick off remote CAA rechecks when true.
// When false, no remote CAA rechecks will be performed. The primary VA will
// make a valid/invalid decision with the results. The primary VA will
// return an early decision if MultiCAAFullResults is false.
// make a valid/invalid decision with the results.
EnforceMultiCAA bool

// MultiCAAFullResults will cause the main VA to block and wait for all of
// the remote VA CAA recheck results instead of returning early if the
// number of failures is greater than the number allowed by MPIC.
// Only used when EnforceMultiCAA is true.
MultiCAAFullResults bool

// MultipleCertificateProfiles, when enabled, triggers the following
// behavior:
// - SA.NewOrderAndAuthzs: upon receiving a NewOrderRequest with a
Expand Down
1 change: 0 additions & 1 deletion test/config-next/va.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
},
"features": {
"EnforceMultiCAA": true,
"MultiCAAFullResults": true,
"DOH": true
},
"remoteVAs": [
Expand Down
182 changes: 16 additions & 166 deletions va/caa.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@ package va
import (
"context"
"fmt"
"math/rand/v2"
"net/url"
"regexp"
"strings"
"sync"
"time"

"github.com/miekg/dns"
"google.golang.org/protobuf/proto"

"github.com/letsencrypt/boulder/bdns"
"github.com/letsencrypt/boulder/core"
corepb "github.com/letsencrypt/boulder/core/proto"
berrors "github.com/letsencrypt/boulder/errors"
"github.com/letsencrypt/boulder/features"
bgrpc "github.com/letsencrypt/boulder/grpc"
"github.com/letsencrypt/boulder/identifier"
"github.com/letsencrypt/boulder/probs"
vapb "github.com/letsencrypt/boulder/va/proto"
Expand Down Expand Up @@ -80,14 +79,6 @@ func (va *ValidationAuthorityImpl) IsCAAValid(ctx context.Context, req *vapb.IsC
va.log.AuditObject("CAA check result", logEvent)
}()

var remoteCAAResults chan *remoteVAResult
if features.Get().EnforceMultiCAA {
if remoteVACount := len(va.remoteVAs); remoteVACount > 0 {
remoteCAAResults = make(chan *remoteVAResult, remoteVACount)
go va.performRemoteCAACheck(ctx, req, remoteCAAResults)
}
}

internalErr = va.checkCAA(ctx, acmeID, params)

// Stop the clock for local check latency.
Expand All @@ -97,31 +88,22 @@ func (va *ValidationAuthorityImpl) IsCAAValid(ctx context.Context, req *vapb.IsC
logEvent.InternalError = internalErr.Error()
prob = detailedError(internalErr)
prob.Detail = fmt.Sprintf("While processing CAA for %s: %s", req.Domain, prob.Detail)
} else if remoteCAAResults != nil {
if !features.Get().EnforceMultiCAA && features.Get().MultiCAAFullResults {
// If we're not going to enforce multi CAA but we are logging the
// differentials then collect and log the remote results in a separate go
// routine to avoid blocking the primary VA.
go func() {
_ = va.processRemoteCAAResults(
req.Domain,
req.AccountURIID,
string(challType),
remoteCAAResults)
}()
} else if features.Get().EnforceMultiCAA {
remoteProb := va.processRemoteCAAResults(
req.Domain,
req.AccountURIID,
string(challType),
remoteCAAResults)

// If the remote result was a non-nil problem then fail the CAA check
if remoteProb != nil {
prob = remoteProb
va.log.Infof("CAA check failed due to remote failures: identifier=%v err=%s",
req.Domain, remoteProb)
}

if features.Get().EnforceMultiCAA {
op := func(ctx context.Context, remoteva RemoteVA, req proto.Message) (remoteResult, error) {
checkRequest, ok := req.(*vapb.IsCAAValidRequest)
if !ok {
return nil, fmt.Errorf("got type %T, want *vapb.IsCAAValidRequest", req)
}
return remoteva.IsCAAValid(ctx, checkRequest)
}
remoteProb := va.performRemoteOperation(ctx, op, req)
// If the remote result was a non-nil problem then fail the CAA check
if remoteProb != nil {
prob = remoteProb
va.log.Infof("CAA check failed due to remote failures: identifier=%v err=%s",
req.Domain, remoteProb)
}
}

Expand All @@ -139,138 +121,6 @@ func (va *ValidationAuthorityImpl) IsCAAValid(ctx context.Context, req *vapb.IsC
}
}

// processRemoteCAAResults evaluates a primary VA result, and a channel of
// remote VA problems to produce a single overall validation result based on
// configured feature flags. The overall result is calculated based on the VA's
// configured `maxRemoteFailures` value.
//
// If the `MultiCAAFullResults` feature is enabled then
// `processRemoteCAAResults` will expect to read a result from the
// `remoteResultsChan` channel for each VA and will not produce an overall
// result until all remote VAs have responded. In this case
// `logRemoteDifferentials` will also be called to describe the differential
// between the primary and all of the remote VAs.
//
// If the `MultiCAAFullResults` feature flag is not enabled then
// `processRemoteCAAResults` will potentially return before all remote VAs have
// had a chance to respond. This happens if the success or failure threshold is
// met. This doesn't allow for logging the differential between the primary and
// remote VAs but is more performant.
func (va *ValidationAuthorityImpl) processRemoteCAAResults(
domain string,
acctID int64,
challengeType string,
remoteResultsChan <-chan *remoteVAResult) *probs.ProblemDetails {

required := len(va.remoteVAs) - va.maxRemoteFailures
good := 0
bad := 0

var remoteResults []*remoteVAResult
var firstProb *probs.ProblemDetails
// Due to channel behavior this could block indefinitely and we rely on gRPC
// honoring the context deadline used in client calls to prevent that from
// happening.
for result := range remoteResultsChan {
// Add the result to the slice
remoteResults = append(remoteResults, result)
if result.Problem == nil {
good++
} else {
bad++
// Store the first non-nil problem to return later (if `MultiCAAFullResults`
// is enabled).
if firstProb == nil {
firstProb = result.Problem
}
}

// If MultiCAAFullResults isn't enabled then return early whenever the
// success or failure threshold is met.
if !features.Get().MultiCAAFullResults {
if good >= required {
return nil
} else if bad > va.maxRemoteFailures {
modifiedProblem := *result.Problem
modifiedProblem.Detail = "During secondary CAA checking: " + firstProb.Detail
return &modifiedProblem
}
}

// If we haven't returned early because of MultiCAAFullResults being
// enabled we need to break the loop once all of the VAs have returned a
// result.
if len(remoteResults) == len(va.remoteVAs) {
break
}
}
// If we are using `features.MultiCAAFullResults` then we haven't returned
// early and can now log the differential between what the primary VA saw and
// what all of the remote VAs saw.
va.logRemoteResults(
domain,
acctID,
challengeType,
remoteResults)

// Based on the threshold of good/bad return nil or a problem.
if good >= required {
return nil
} else if bad > va.maxRemoteFailures {
modifiedProblem := *firstProb
modifiedProblem.Detail = "During secondary CAA checking: " + firstProb.Detail
va.metrics.prospectiveRemoteCAACheckFailures.Inc()
return &modifiedProblem
}

// This condition should not occur - it indicates the good/bad counts didn't
// meet either the required threshold or the maxRemoteFailures threshold.
return probs.ServerInternal("Too few remote IsCAAValid RPC results")
}

// performRemoteCAACheck calls `isCAAValid` for each of the configured remoteVAs
// in a random order. The provided `results` chan should have an equal size to
// the number of remote VAs. The CAA checks will be performed in separate
// go-routines. If the result `error` from a remote `isCAAValid` RPC is nil or a
// nil `ProblemDetails` instance it is written directly to the `results` chan.
// If the err is a canceled error it is treated as a nil error. Otherwise the
// error/problem is written to the results channel as-is.
func (va *ValidationAuthorityImpl) performRemoteCAACheck(
ctx context.Context,
req *vapb.IsCAAValidRequest,
results chan<- *remoteVAResult) {
for _, i := range rand.Perm(len(va.remoteVAs)) {
remoteVA := va.remoteVAs[i]
go func(rva RemoteVA) {
result := &remoteVAResult{
VAHostname: rva.Address,
}
res, err := rva.IsCAAValid(ctx, req)
if err != nil {
if core.IsCanceled(err) {
// Handle the cancellation error.
result.Problem = probs.ServerInternal("Remote VA IsCAAValid RPC canceled")
} else {
// Handle validation error.
va.log.Errf("Remote VA %q.IsCAAValid failed: %s", rva.Address, err)
result.Problem = probs.ServerInternal("Remote VA IsCAAValid RPC failed")
}
} else if res.Problem != nil {
prob, err := bgrpc.PBToProblemDetails(res.Problem)
if err != nil {
va.log.Infof("Remote VA %q.IsCAAValid returned malformed problem: %s", rva.Address, err)
result.Problem = probs.ServerInternal(
fmt.Sprintf("Remote VA IsCAAValid RPC returned malformed result: %s", err))
} else {
va.log.Infof("Remote VA %q.IsCAAValid returned problem: %s", rva.Address, prob)
result.Problem = prob
}
}
results <- result
}(remoteVA)
}
}

// checkCAA performs a CAA lookup & validation for the provided identifier. If
// the CAA lookup & validation fail a problem is returned.
func (va *ValidationAuthorityImpl) checkCAA(
Expand Down
Loading
Loading