Skip to content

Commit

Permalink
Improve DNS lookup debugging (#70)
Browse files Browse the repository at this point in the history
Rework DNS lookups in order to improve debugging
* Create a unified reverse DNS lookup function.
* Add lookup timing logging
* Add a scrape ID to the logger to identify concurrent scrapes.

Signed-off-by: SuperQ <[email protected]>
  • Loading branch information
SuperQ authored Mar 14, 2024
1 parent 00b65c6 commit ac8b58b
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 59 deletions.
43 changes: 36 additions & 7 deletions collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ import (
"net"
"os"
"path"
"slices"
"sort"
"strings"
"sync/atomic"
"time"

"github.com/facebook/time/ntp/chrony"
Expand All @@ -41,6 +44,9 @@ var (
),
prometheus.GaugeValue,
}

// Globally track scrapes to provide better logging context.
scrapeID atomic.Uint64
)

// Exporter collects chrony stats from the given server and exports
Expand Down Expand Up @@ -136,14 +142,18 @@ func (e Exporter) dial() (net.Conn, error, func()) {

// Collect implements prometheus.Collector.
func (e Exporter) Collect(ch chan<- prometheus.Metric) {
logger := log.With(e.logger, "scrape_id", scrapeID.Add(1))
start := time.Now()
level.Debug(logger).Log("msg", "Scrape starting")
var up float64
defer func() {
level.Debug(logger).Log("msg", "Scrape completed", "seconds", time.Since(start).Seconds())
ch <- upMetric.mustNewConstMetric(up)
}()
conn, err, cleanup := e.dial()
defer cleanup()
if err != nil {
level.Debug(e.logger).Log("msg", "Couldn't connect to chrony", "address", e.address, "err", err)
level.Debug(logger).Log("msg", "Couldn't connect to chrony", "address", e.address, "err", err)
return
}

Expand All @@ -152,26 +162,45 @@ func (e Exporter) Collect(ch chan<- prometheus.Metric) {
client := chrony.Client{Sequence: 1, Connection: conn}

if e.collectSources {
err = e.getSourcesMetrics(ch, client)
err = e.getSourcesMetrics(logger, ch, client)
if err != nil {
level.Debug(e.logger).Log("msg", "Couldn't get sources", "err", err)
level.Debug(logger).Log("msg", "Couldn't get sources", "err", err)
up = 0
}
}

if e.collectTracking {
err = e.getTrackingMetrics(ch, client)
err = e.getTrackingMetrics(logger, ch, client)
if err != nil {
level.Debug(e.logger).Log("msg", "Couldn't get tracking", "err", err)
level.Debug(logger).Log("msg", "Couldn't get tracking", "err", err)
up = 0
}
}

if e.collectServerstats {
err = e.getServerstatsMetrics(ch, client)
err = e.getServerstatsMetrics(logger, ch, client)
if err != nil {
level.Debug(e.logger).Log("msg", "Couldn't get serverstats", "err", err)
level.Debug(logger).Log("msg", "Couldn't get serverstats", "err", err)
up = 0
}
}
}

func (e Exporter) dnsLookup(logger log.Logger, address net.IP) string {
start := time.Now()
defer func() {
level.Debug(logger).Log("msg", "DNS lookup took", "seconds", time.Since(start).Seconds())
}()
if !e.dnsLookups {
return address.String()
}
names, err := net.LookupAddr(address.String())
if err != nil || len(names) < 1 {
return address.String()
}
for i, name := range names {
names[i] = strings.TrimRight(name, ".")
}
sort.Strings(names)
return strings.Join(slices.Compact(names), ",")
}
27 changes: 14 additions & 13 deletions collector/serverstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"

"github.com/facebook/time/ntp/chrony"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
)
Expand Down Expand Up @@ -137,50 +138,50 @@ var (
}
)

func (e Exporter) getServerstatsMetrics(ch chan<- prometheus.Metric, client chrony.Client) error {
func (e Exporter) getServerstatsMetrics(logger log.Logger, ch chan<- prometheus.Metric, client chrony.Client) error {
packet, err := client.Communicate(chrony.NewServerStatsPacket())
if err != nil {
return err
}
level.Debug(e.logger).Log("msg", "Got 'serverstats' response", "serverstats_packet", packet.GetStatus())
level.Debug(logger).Log("msg", "Got 'serverstats' response", "serverstats_packet", packet.GetStatus())

serverstats, ok := packet.(*chrony.ReplyServerStats3)
if !ok {
return fmt.Errorf("got wrong 'serverstats' response: %q", packet)
}

ch <- serverstatsNTPHits.mustNewConstMetric(float64(serverstats.NTPHits))
level.Debug(e.logger).Log("msg", "Serverstats NTP Hits", "ntp_hits", serverstats.NTPHits)
level.Debug(logger).Log("msg", "Serverstats NTP Hits", "ntp_hits", serverstats.NTPHits)

ch <- serverstatsNKEHits.mustNewConstMetric(float64(serverstats.NKEHits))
level.Debug(e.logger).Log("msg", "Serverstats NKE Hits", "nke_hits", serverstats.NKEHits)
level.Debug(logger).Log("msg", "Serverstats NKE Hits", "nke_hits", serverstats.NKEHits)

ch <- serverstatsCMDHits.mustNewConstMetric(float64(serverstats.CMDHits))
level.Debug(e.logger).Log("msg", "Serverstats CMD Hits", "cmd_hits", serverstats.CMDHits)
level.Debug(logger).Log("msg", "Serverstats CMD Hits", "cmd_hits", serverstats.CMDHits)

ch <- serverstatsNTPDrops.mustNewConstMetric(float64(serverstats.NTPDrops))
level.Debug(e.logger).Log("msg", "Serverstats NTP Drops", "ntp_drops", serverstats.NTPDrops)
level.Debug(logger).Log("msg", "Serverstats NTP Drops", "ntp_drops", serverstats.NTPDrops)

ch <- serverstatsNKEDrops.mustNewConstMetric(float64(serverstats.NKEDrops))
level.Debug(e.logger).Log("msg", "Serverstats NKE Drops", "nke_drops", serverstats.NKEDrops)
level.Debug(logger).Log("msg", "Serverstats NKE Drops", "nke_drops", serverstats.NKEDrops)

ch <- serverstatsCMDDrops.mustNewConstMetric(float64(serverstats.CMDDrops))
level.Debug(e.logger).Log("msg", "Serverstats CMD Drops", "cmd_drops", serverstats.CMDDrops)
level.Debug(logger).Log("msg", "Serverstats CMD Drops", "cmd_drops", serverstats.CMDDrops)

ch <- serverstatsLogDrops.mustNewConstMetric(float64(serverstats.LogDrops))
level.Debug(e.logger).Log("msg", "Serverstats Log Drops", "log_drops", serverstats.LogDrops)
level.Debug(logger).Log("msg", "Serverstats Log Drops", "log_drops", serverstats.LogDrops)

ch <- serverstatsNTPAuthHits.mustNewConstMetric(float64(serverstats.NTPAuthHits))
level.Debug(e.logger).Log("msg", "Serverstats Authenticated Packets", "auth_hits", serverstats.NTPAuthHits)
level.Debug(logger).Log("msg", "Serverstats Authenticated Packets", "auth_hits", serverstats.NTPAuthHits)

ch <- serverstatsNTPInterleavedHits.mustNewConstMetric(float64(serverstats.NTPInterleavedHits))
level.Debug(e.logger).Log("msg", "Serverstats Interleaved Packets", "interleaved_hits", serverstats.NTPInterleavedHits)
level.Debug(logger).Log("msg", "Serverstats Interleaved Packets", "interleaved_hits", serverstats.NTPInterleavedHits)

ch <- serverstatsNTPTimestamps.mustNewConstMetric(float64(serverstats.NTPTimestamps))
level.Debug(e.logger).Log("msg", "Serverstats Timestamps Held", "ntp_timestamps_held", serverstats.NTPTimestamps)
level.Debug(logger).Log("msg", "Serverstats Timestamps Held", "ntp_timestamps_held", serverstats.NTPTimestamps)

ch <- serverstatsNTPSpanSeconds.mustNewConstMetric(float64(serverstats.NTPSpanSeconds))
level.Debug(e.logger).Log("msg", "Serverstats Timestamps Span", "ntp_timestamps_span", serverstats.NTPSpanSeconds)
level.Debug(logger).Log("msg", "Serverstats Timestamps Span", "ntp_timestamps_span", serverstats.NTPSpanSeconds)

return nil
}
18 changes: 5 additions & 13 deletions collector/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@ import (
"encoding/binary"
"fmt"
"math"
"net"
"sort"
"strings"

"github.com/facebook/time/ntp/chrony"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
)
Expand Down Expand Up @@ -92,12 +90,12 @@ var (
}
)

func (e Exporter) getSourcesMetrics(ch chan<- prometheus.Metric, client chrony.Client) error {
func (e Exporter) getSourcesMetrics(logger log.Logger, ch chan<- prometheus.Metric, client chrony.Client) error {
packet, err := client.Communicate(chrony.NewSourcesPacket())
if err != nil {
return err
}
level.Debug(e.logger).Log("msg", "Got 'sources' response", "sources_packet", packet.GetStatus())
level.Debug(logger).Log("msg", "Got 'sources' response", "sources_packet", packet.GetStatus())

sources, ok := packet.(*chrony.ReplySources)
if !ok {
Expand All @@ -107,7 +105,7 @@ func (e Exporter) getSourcesMetrics(ch chan<- prometheus.Metric, client chrony.C
results := make([]chrony.ReplySourceData, sources.NSources)

for i := 0; i < int(sources.NSources); i++ {
level.Debug(e.logger).Log("msg", "Fetching source", "source", i)
level.Debug(logger).Log("msg", "Fetching source", "source", i)
packet, err = client.Communicate(chrony.NewSourceDataPacket(int32(i)))
if err != nil {
return fmt.Errorf("Failed to get sourcedata response: %d", i)
Expand All @@ -121,13 +119,7 @@ func (e Exporter) getSourcesMetrics(ch chan<- prometheus.Metric, client chrony.C

for _, r := range results {
sourceAddress := r.IPAddr.String()
sourceName := sourceAddress
if e.dnsLookups {
// Ignore reverse lookup errors.
sourceNames, _ := net.LookupAddr(sourceAddress)
sort.Strings(sourceNames)
sourceName = strings.Join(sourceNames, ",")
}
sourceName := e.dnsLookup(logger, r.IPAddr)

if r.Mode == chrony.SourceModeRef && r.IPAddr.To4() != nil {
sourceName = chrony.RefidToString(binary.BigEndian.Uint32(r.IPAddr))
Expand Down
44 changes: 18 additions & 26 deletions collector/tracking.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ package collector
import (
"fmt"
"net"
"strings"

"github.com/facebook/time/ntp/chrony"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/prometheus/client_golang/prometheus"
)
Expand Down Expand Up @@ -162,74 +162,66 @@ var (
}
)

func (e Exporter) chronyFormatName(tracking chrony.Tracking) string {
func (e Exporter) trackingFormatName(logger log.Logger, tracking chrony.Tracking) string {
if tracking.IPAddr.IsUnspecified() {
return chrony.RefidToString(tracking.RefID)
}
if !e.dnsLookups {
return tracking.IPAddr.String()
}

names, err := net.LookupAddr(tracking.IPAddr.String())
if err != nil || len(names) < 1 {
return tracking.IPAddr.String()
}
return strings.TrimRight(names[0], ".")
return e.dnsLookup(logger, tracking.IPAddr)
}

func (e Exporter) getTrackingMetrics(ch chan<- prometheus.Metric, client chrony.Client) error {
func (e Exporter) getTrackingMetrics(logger log.Logger, ch chan<- prometheus.Metric, client chrony.Client) error {
packet, err := client.Communicate(chrony.NewTrackingPacket())
if err != nil {
return err
}
level.Debug(e.logger).Log("msg", "Got 'tracking' response", "tracking_packet", packet.GetStatus())
level.Debug(logger).Log("msg", "Got 'tracking' response", "tracking_packet", packet.GetStatus())

tracking, ok := packet.(*chrony.ReplyTracking)
if !ok {
return fmt.Errorf("Got wrong 'tracking' response: %q", packet)
}

ch <- trackingInfo.mustNewConstMetric(1.0, tracking.IPAddr.String(), e.chronyFormatName(tracking.Tracking), chrony.RefidAsHEX(tracking.RefID))
ch <- trackingInfo.mustNewConstMetric(1.0, tracking.IPAddr.String(), e.trackingFormatName(logger, tracking.Tracking), chrony.RefidAsHEX(tracking.RefID))

ch <- trackingLastOffset.mustNewConstMetric(tracking.LastOffset)
level.Debug(e.logger).Log("msg", "Tracking Last Offset", "offset", tracking.LastOffset)
level.Debug(logger).Log("msg", "Tracking Last Offset", "offset", tracking.LastOffset)

ch <- trackingRefTime.mustNewConstMetric(float64(tracking.RefTime.UnixNano()) / 1e9)
level.Debug(e.logger).Log("msg", "Tracking Ref Time", "ref_time", tracking.RefTime)
level.Debug(logger).Log("msg", "Tracking Ref Time", "ref_time", tracking.RefTime)

ch <- trackingSystemTime.mustNewConstMetric(float64(tracking.CurrentCorrection))
level.Debug(e.logger).Log("msg", "Tracking System Time", "system_time", tracking.CurrentCorrection)
level.Debug(logger).Log("msg", "Tracking System Time", "system_time", tracking.CurrentCorrection)

remoteTracking := 1.0
if tracking.IPAddr.Equal(trackingLocalIP) {
remoteTracking = 0.0
}
ch <- trackingRemoteTracking.mustNewConstMetric(remoteTracking)
level.Debug(e.logger).Log("msg", "Tracking is remote", "bool_value", remoteTracking)
level.Debug(logger).Log("msg", "Tracking is remote", "bool_value", remoteTracking)

ch <- trackingRMSOffset.mustNewConstMetric(tracking.RMSOffset)
level.Debug(e.logger).Log("msg", "Tracking RMS Offset", "rms_offset", tracking.RMSOffset)
level.Debug(logger).Log("msg", "Tracking RMS Offset", "rms_offset", tracking.RMSOffset)

ch <- trackingRootDelay.mustNewConstMetric(tracking.RootDelay)
level.Debug(e.logger).Log("msg", "Tracking Root delay", "root_delay", tracking.RootDelay)
level.Debug(logger).Log("msg", "Tracking Root delay", "root_delay", tracking.RootDelay)

ch <- trackingRootDispersion.mustNewConstMetric(tracking.RootDispersion)
level.Debug(e.logger).Log("msg", "Tracking Root dispersion", "root_dispersion", tracking.RootDispersion)
level.Debug(logger).Log("msg", "Tracking Root dispersion", "root_dispersion", tracking.RootDispersion)

ch <- trackingFrequency.mustNewConstMetric(tracking.FreqPPM)
level.Debug(e.logger).Log("msg", "Tracking Frequency", "frequency", tracking.FreqPPM)
level.Debug(logger).Log("msg", "Tracking Frequency", "frequency", tracking.FreqPPM)

ch <- trackingResidualFrequency.mustNewConstMetric(tracking.ResidFreqPPM)
level.Debug(e.logger).Log("msg", "Tracking Residual Frequency", "residual_frequency", tracking.ResidFreqPPM)
level.Debug(logger).Log("msg", "Tracking Residual Frequency", "residual_frequency", tracking.ResidFreqPPM)

ch <- trackingSkew.mustNewConstMetric(tracking.SkewPPM)
level.Debug(e.logger).Log("msg", "Tracking Skew", "skew", tracking.SkewPPM)
level.Debug(logger).Log("msg", "Tracking Skew", "skew", tracking.SkewPPM)

ch <- trackingUpdateInterval.mustNewConstMetric(tracking.LastUpdateInterval)
level.Debug(e.logger).Log("msg", "Tracking Last Update Interval", "update_interval", tracking.LastUpdateInterval)
level.Debug(logger).Log("msg", "Tracking Last Update Interval", "update_interval", tracking.LastUpdateInterval)

ch <- trackingStratum.mustNewConstMetric(float64(tracking.Stratum))
level.Debug(e.logger).Log("msg", "Tracking Stratum", "stratum", tracking.Stratum)
level.Debug(logger).Log("msg", "Tracking Stratum", "stratum", tracking.Stratum)

return nil
}

0 comments on commit ac8b58b

Please sign in to comment.