Skip to content

Commit

Permalink
metrics: histogram for upstream resolve duration (#71)
Browse files Browse the repository at this point in the history
adds metric `leng_upstream_request_duration_*` metrics to keep track of
duration of upstream resolving
  • Loading branch information
cottand authored Nov 10, 2024
1 parent 20f09ef commit db020fc
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 6 deletions.
5 changes: 5 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type Metrics struct {
Path string
HighCardinalityEnabled bool
ResetPeriodMinutes int64
HistogramsEnabled bool
}

type DnsOverHttpServer struct {
Expand Down Expand Up @@ -112,6 +113,9 @@ interval = 200
# question cache capacity, 0 for infinite but not recommended (this is used for storing logs)
questioncachecap = 5000
# timeout for upstream DNS queries, in ms
timeout = 5000
# manual whitelist entries - comments for reference
whitelist = [
# "getsentry.com",
Expand Down Expand Up @@ -170,6 +174,7 @@ followCnameDepth = 12
path = "/metrics"
# see https://cottand.github.io/leng/Prometheus-Metrics.html
highCardinalityEnabled = false
histogramsEnabled = false
resetPeriodMinutes = 60
[DnsOverHttpServer]
Expand Down
2 changes: 1 addition & 1 deletion grimd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func integrationTest(changeConfig func(c *Config), test func(client *dns.Client,

changeConfig(&config)

cancelMetrics := metric.Start(config.Metrics.ResetPeriodMinutes, config.Metrics.HighCardinalityEnabled)
cancelMetrics := metric.Start(config.Metrics.ResetPeriodMinutes, config.Metrics.HighCardinalityEnabled, false)
defer cancelMetrics()
quitActivation := make(chan bool)
actChannel := make(chan *ActivationHandler)
Expand Down
26 changes: 25 additions & 1 deletion internal/metric/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@ var (
Name: "upstream_request_doh",
Help: "Upstream DoH requests - only works when DoH configured",
}, []string{"success"})

RequestUpstreamResolveDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: Namespace,
Name: "upstream_request_duration",
Help: "Upstream requests duration in seconds, by request type",
Buckets: []float64{0.0001, 0.0025, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10},
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 32,
}, []string{"upstream_type"})

CustomDNSConfigReload = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: Namespace,
Expand All @@ -79,6 +89,7 @@ var (
}

configHighCardinality = false
configHistograms = false
)

func init() {
Expand All @@ -92,8 +103,12 @@ func init() {
)
}

func Start(resetPeriodMinutes int64, highCardinality bool) (closeChan context.CancelFunc) {
func Start(resetPeriodMinutes int64, highCardinality bool, histogramsEnabled bool) (closeChan context.CancelFunc) {
configHighCardinality = highCardinality
if histogramsEnabled {
prometheus.MustRegister(RequestUpstreamResolveDuration)
configHistograms = true
}
ctx, cancel := context.WithCancel(context.Background())
mark := time.Now()

Expand Down Expand Up @@ -161,3 +176,12 @@ func ReportDNSRespond(remote net.IP, message *dns.Msg, blocked bool, cached bool
cachedResponseCounter.Inc()
}
}

func ReportUpstreamResolve(upstreamType string, duration time.Duration) {
if !configHistograms {
return
}
RequestUpstreamResolveDuration.
With(prometheus.Labels{"upstream_type": upstreamType}).
Observe(duration.Seconds())
}
6 changes: 5 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ func main() {
loggingState.cleanUp()
}()

cancelMetrics := metric.Start(config.Metrics.ResetPeriodMinutes, config.Metrics.HighCardinalityEnabled)
cancelMetrics := metric.Start(
config.Metrics.ResetPeriodMinutes,
config.Metrics.HighCardinalityEnabled,
config.Metrics.HistogramsEnabled,
)

lengActive = true
quitActivation := make(chan bool)
Expand Down
10 changes: 7 additions & 3 deletions resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type Resolver struct {
// Lookup will ask each nameserver in top-to-bottom fashion, starting a new request
// in every second, and return as early as possible (have an answer).
// It returns an error if no request has succeeded.
func (r *Resolver) Lookup(net string, req *dns.Msg, timeout int, interval int, nameServers []string, DoH string) (message *dns.Msg, err error) {
func (resolver *Resolver) Lookup(net string, req *dns.Msg, timeout int, interval int, nameServers []string, DoH string) (message *dns.Msg, err error) {
logger.Debugf("Lookup %s, timeout: %d, interval: %d, nameservers: %v, Using DoH: %v", net, timeout, interval, nameServers, DoH != "")

question := req.Question[0]
Expand All @@ -50,10 +50,13 @@ func (r *Resolver) Lookup(net string, req *dns.Msg, timeout int, interval int, n
"q_name": question.Name,
})

mark := time.Now()

//Is DoH enabled
if DoH != "" {
//First try and use DOH. Privacy First
ans, err := r.DoHLookup(DoH, timeout, req)
ans, err := resolver.DoHLookup(DoH, timeout, req)
metric.ReportUpstreamResolve("doh", time.Since(mark))
if err == nil {
// No error so result is ok
metricUpstreamResolveCounter.With(
Expand Down Expand Up @@ -129,14 +132,15 @@ func (r *Resolver) Lookup(net string, req *dns.Msg, timeout int, interval int, n
"rcode": dns.RcodeToString[r.answer.Rcode],
"upstream": r.nameserver,
}).Inc()
metric.ReportUpstreamResolve(net, time.Since(mark))
return r.answer, nil
default:
return nil, ResolvError{qname, net, nameServers}
}
}

// DoHLookup performs a DNS lookup over https
func (r *Resolver) DoHLookup(url string, timeout int, req *dns.Msg) (msg *dns.Msg, err error) {
func (resolver *Resolver) DoHLookup(url string, timeout int, req *dns.Msg) (msg *dns.Msg, err error) {
qname := req.Question[0].Name

defer func() {
Expand Down

0 comments on commit db020fc

Please sign in to comment.