diff --git a/prober/http.go b/prober/http.go index 232214c3..aa003d0f 100644 --- a/prober/http.go +++ b/prober/http.go @@ -18,6 +18,7 @@ import ( "compress/gzip" "context" "crypto/tls" + "crypto/x509" "errors" "fmt" "io" @@ -296,6 +297,10 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr Name: "probe_http_last_modified_timestamp_seconds", Help: "Returns the Last-Modified HTTP response header in unixtime", }) + probeFailureCounter = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "probe_http_failures_total", + Help: "Counts number of probe http failures by reason", + }, []string{"reason"}) ) registry.MustRegister(durationGaugeVec) @@ -306,6 +311,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr registry.MustRegister(statusCodeGauge) registry.MustRegister(probeHTTPVersionGauge) registry.MustRegister(probeFailedDueToRegex) + registry.MustRegister(probeFailureCounter) httpConfig := module.HTTP @@ -316,6 +322,12 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr targetURL, err := url.Parse(target) if err != nil { level.Error(logger).Log("msg", "Could not parse target URL", "err", err) + var ue *url.Error + if errors.As(err, &ue) { + probeFailureCounter.WithLabelValues(labelFromUrlError("urlparse", ue)).Inc() + } else { + probeFailureCounter.WithLabelValues("urlparse_error").Inc() + } return false } @@ -328,6 +340,21 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr ip, lookupTime, err = chooseProtocol(ctx, module.HTTP.IPProtocol, module.HTTP.IPProtocolFallback, targetHost, registry, logger) durationGaugeVec.WithLabelValues("resolve").Add(lookupTime) if err != nil { + var de *net.DNSError + if errors.As(err, &de) { + switch { + case de.IsNotFound: + probeFailureCounter.WithLabelValues("dns_not_found").Inc() + case de.IsTemporary: + probeFailureCounter.WithLabelValues("dns_temporary_failure").Inc() + case de.IsTimeout: + probeFailureCounter.WithLabelValues("dns_timeout").Inc() + default: + probeFailureCounter.WithLabelValues("dns_error").Inc() + } + } else { + probeFailureCounter.WithLabelValues("dns_error").Inc() + } level.Error(logger).Log("msg", "Error resolving address", "err", err) return false } @@ -351,6 +378,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr client, err := pconfig.NewClientFromConfig(httpClientConfig, "http_probe", pconfig.WithKeepAlivesDisabled()) if err != nil { level.Error(logger).Log("msg", "Error generating HTTP client", "err", err) + probeFailureCounter.WithLabelValues("client_error").Inc() return false } @@ -358,12 +386,14 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr noServerName, err := pconfig.NewRoundTripperFromConfig(httpClientConfig, "http_probe", pconfig.WithKeepAlivesDisabled()) if err != nil { level.Error(logger).Log("msg", "Error generating HTTP client without ServerName", "err", err) + probeFailureCounter.WithLabelValues("client_error").Inc() return false } jar, err := cookiejar.New(&cookiejar.Options{PublicSuffixList: publicsuffix.List}) if err != nil { level.Error(logger).Log("msg", "Error generating cookiejar", "err", err) + probeFailureCounter.WithLabelValues("cookiejar_error").Inc() return false } client.Jar = jar @@ -414,6 +444,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr body_file, err := os.Open(httpConfig.BodyFile) if err != nil { level.Error(logger).Log("msg", "Error creating request", "err", err) + probeFailureCounter.WithLabelValues("request_creation_error").Inc() return } defer body_file.Close() @@ -423,6 +454,12 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr request, err := http.NewRequest(httpConfig.Method, targetURL.String(), body) if err != nil { level.Error(logger).Log("msg", "Error creating request", "err", err) + var ue *url.Error + if errors.As(err, &ue) { + probeFailureCounter.WithLabelValues(labelFromUrlError("request_creation", ue)).Inc() + } else { + probeFailureCounter.WithLabelValues("request_creation_error").Inc() + } return } request.Host = origHost @@ -468,6 +505,22 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr resp = &http.Response{} if err != nil { level.Error(logger).Log("msg", "Error for HTTP request", "err", err) + var authorityError x509.UnknownAuthorityError + var hostnameError x509.HostnameError + var certInvalidError x509.CertificateInvalidError + var ue *url.Error + switch { + case errors.As(err, &authorityError): + probeFailureCounter.WithLabelValues("request_certificate_unknown_authority").Inc() + case errors.As(err, &hostnameError): + probeFailureCounter.WithLabelValues("request_certificate_hostname_mismatch").Inc() + case errors.As(err, &certInvalidError): + probeFailureCounter.WithLabelValues("request_certificate_invalid").Inc() + case errors.As(err, &ue): + probeFailureCounter.WithLabelValues(labelFromUrlError("request", ue)).Inc() + default: + probeFailureCounter.WithLabelValues("request_error").Inc() + } } } else { requestErrored := (err != nil) @@ -506,6 +559,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr dec, err := getDecompressionReader(httpConfig.Compression, resp.Body) if err != nil { level.Info(logger).Log("msg", "Failed to get decompressor for HTTP response body", "err", err) + probeFailureCounter.WithLabelValues("decompression_error").Inc() success = false } else if dec != nil { // Since we are replacing the original resp.Body with the decoder, we need to make sure @@ -547,6 +601,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr _, err = io.Copy(io.Discard, byteCounter) if err != nil { level.Info(logger).Log("msg", "Failed to read HTTP response body", "err", err) + probeFailureCounter.WithLabelValues("read_body_error").Inc() success = false } @@ -586,6 +641,7 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr } if !found { level.Error(logger).Log("msg", "Invalid HTTP version number", "version", resp.Proto) + probeFailureCounter.WithLabelValues("invalid_http_version").Inc() success = false } } @@ -645,10 +701,12 @@ func ProbeHTTP(ctx context.Context, target string, module config.Module, registr probeSSLLastInformation.WithLabelValues(getFingerprint(resp.TLS), getSubject(resp.TLS), getIssuer(resp.TLS), getDNSNames(resp.TLS)).Set(1) if httpConfig.FailIfSSL { level.Error(logger).Log("msg", "Final request was over SSL") + probeFailureCounter.WithLabelValues("final_request_ssl").Inc() success = false } } else if httpConfig.FailIfNotSSL && success { level.Error(logger).Log("msg", "Final request was not over SSL") + probeFailureCounter.WithLabelValues("final_request_not_ssl").Inc() success = false } @@ -677,3 +735,15 @@ func getDecompressionReader(algorithm string, origBody io.ReadCloser) (io.ReadCl return nil, errors.New("unsupported compression algorithm") } } + +func labelFromUrlError(prefix string, err *url.Error) string { + op := strings.ToLower(err.Op) + switch { + case err.Timeout(): + return fmt.Sprintf("%s_%s_timeout", prefix, op) + case err.Temporary(): + return fmt.Sprintf("%s_%s_temporary_failure", prefix, op) + default: + return fmt.Sprintf("%s_%s_error", prefix, op) + } +} diff --git a/prober/http_test.go b/prober/http_test.go index 05ca36d8..ea2c1c38 100644 --- a/prober/http_test.go +++ b/prober/http_test.go @@ -102,6 +102,24 @@ func TestValidHTTPVersion(t *testing.T) { if result != test.ShouldSucceed { t.Fatalf("Test %v had unexpected result: %s", i, body) } + if !test.ShouldSucceed { + mfs, err := registry.Gather() + if err != nil { + t.Fatal(err) + } + + expectedResults := map[string]float64{ + "probe_http_failures_total": float64(1), + } + checkRegistryResults(expectedResults, mfs, t) + + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": "invalid_http_version", + }, + } + checkRegistryLabels(expectedLabels, mfs, t) + } } } @@ -511,7 +529,20 @@ func TestHandlingOfCompressionSetting(t *testing.T) { "probe_http_content_length": float64(tc.contentLength), "probe_http_uncompressed_body_length": float64(tc.uncompressedBodyLength), } + + if tc.expectFailure { + expectedResults["probe_http_failures_total"] = float64(1) + } checkRegistryResults(expectedResults, mfs, t) + + if tc.expectFailure { + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": "read_body_error", + }, + } + checkRegistryLabels(expectedLabels, mfs, t) + } }) } } @@ -547,6 +578,7 @@ func TestMaxResponseLength(t *testing.T) { expectFailure: true, expectedMetrics: map[string]float64{ "probe_http_content_length": float64(max + 1), + "probe_http_failures_total": 1, }, }, "short compressed": { @@ -564,6 +596,7 @@ func TestMaxResponseLength(t *testing.T) { expectedMetrics: map[string]float64{ "probe_http_content_length": float64(longGzippedPayload.Len()), "probe_http_uncompressed_body_length": max, // it should stop decompressing at max bytes + "probe_http_failures_total": 1, }, }, } @@ -632,6 +665,14 @@ func TestMaxResponseLength(t *testing.T) { } checkRegistryResults(tc.expectedMetrics, mfs, t) + if tc.expectFailure { + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": "read_body_error", + }, + } + checkRegistryLabels(expectedLabels, mfs, t) + } }) } } @@ -832,9 +873,17 @@ func TestFailIfNotSSL(t *testing.T) { t.Fatal(err) } expectedResults := map[string]float64{ - "probe_http_ssl": 0, + "probe_http_ssl": 0, + "probe_http_failures_total": 1, } checkRegistryResults(expectedResults, mfs, t) + + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": "final_request_not_ssl", + }, + } + checkRegistryLabels(expectedLabels, mfs, t) } type logRecorder struct { @@ -885,16 +934,18 @@ func TestFailIfNotSSLLogMsg(t *testing.T) { badServerURL := fmt.Sprintf("http://%s/", listener.Addr().String()) for title, tc := range map[string]struct { - Config config.Module - URL string - Success bool - MessageExpected bool + Config config.Module + URL string + Success bool + MessageExpected bool + ProbeFailureReason string }{ "SSL expected, message": { - Config: config.Module{HTTP: config.HTTPProbe{IPProtocolFallback: true, FailIfNotSSL: true}}, - URL: goodServer.URL, - Success: false, - MessageExpected: true, + Config: config.Module{HTTP: config.HTTPProbe{IPProtocolFallback: true, FailIfNotSSL: true}}, + URL: goodServer.URL, + Success: false, + MessageExpected: true, + ProbeFailureReason: "final_request_not_ssl", }, "No SSL expected, no message": { Config: config.Module{HTTP: config.HTTPProbe{IPProtocolFallback: true, FailIfNotSSL: false}}, @@ -903,10 +954,11 @@ func TestFailIfNotSSLLogMsg(t *testing.T) { MessageExpected: false, }, "SSL expected, no message": { - Config: config.Module{HTTP: config.HTTPProbe{IPProtocolFallback: true, FailIfNotSSL: true}}, - URL: badServerURL, - Success: false, - MessageExpected: false, + Config: config.Module{HTTP: config.HTTPProbe{IPProtocolFallback: true, FailIfNotSSL: true}}, + URL: badServerURL, + Success: false, + MessageExpected: false, + ProbeFailureReason: "request_get_error", }, } { t.Run(title, func(t *testing.T) { @@ -922,6 +974,24 @@ func TestFailIfNotSSLLogMsg(t *testing.T) { if seen := recorder.msgs[Msg]; seen != tc.MessageExpected { t.Fatalf("SSL message expected=%v, seen=%v", tc.MessageExpected, seen) } + if !tc.Success { + mfs, err := registry.Gather() + if err != nil { + t.Fatal(err) + } + expectedResults := map[string]float64{ + "probe_http_ssl": 0, + "probe_http_failures_total": 1, + } + checkRegistryResults(expectedResults, mfs, t) + + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": tc.ProbeFailureReason, + }, + } + checkRegistryLabels(expectedLabels, mfs, t) + } }) } } @@ -1211,9 +1281,16 @@ func TestFailIfSelfSignedCA(t *testing.T) { t.Fatal(err) } expectedResults := map[string]float64{ - "probe_http_ssl": 0, + "probe_http_ssl": 0, + "probe_http_failures_total": 1, } checkRegistryResults(expectedResults, mfs, t) + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": "request_certificate_unknown_authority", + }, + } + checkRegistryLabels(expectedLabels, mfs, t) } func TestSucceedIfSelfSignedCA(t *testing.T) { @@ -1544,3 +1621,102 @@ func TestBody(t *testing.T) { } } } + +func TestFailureMetricOnTimeout(t *testing.T) { + if testing.Short() { + t.Skip("skipping network dependent test") + } + // Create a server that will fail due to timeout. + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(1000 * time.Millisecond) // Introducing delay + w.WriteHeader(http.StatusOK) + io.WriteString(w, "Delayed response") + })) + defer ts.Close() + + // Follow redirect, should succeed with 200. + registry := prometheus.NewRegistry() + testCTX, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + result := ProbeHTTP(testCTX, ts.URL, + config.Module{Timeout: 10 * time.Millisecond, HTTP: config.HTTPProbe{IPProtocolFallback: true, HTTPClientConfig: pconfig.DefaultHTTPClientConfig}}, registry, log.NewNopLogger()) + if result { + t.Fatalf("expected probe to fail due to timeout") + } + + mfs, err := registry.Gather() + if err != nil { + t.Fatal(err) + } + + expectedResults := map[string]float64{ + "probe_http_failures_total": float64(1), + } + checkRegistryResults(expectedResults, mfs, t) + + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": "request_get_timeout", + }, + } + checkRegistryLabels(expectedLabels, mfs, t) + +} + +func TestFailureMetricOnInvalidRequest(t *testing.T) { + if testing.Short() { + t.Skip("skipping network dependent test") + } + + tests := []struct { + name string + method string + url string + failuresTotal float64 + reason string + }{ + { + name: "invalid method", + method: "GET T", + url: "http://localhost", + failuresTotal: 1, + reason: "request_creation_error", + }, + { + name: "invalid url", + method: "GET", + url: ":", + failuresTotal: 1, + reason: "dns_not_found", + }, + } + + for _, test := range tests { + t.Log(test.name) + registry := prometheus.NewRegistry() + testCTX, cancel := context.WithTimeout(context.Background(), 10*time.Millisecond) + defer cancel() + result := ProbeHTTP(testCTX, test.url, + config.Module{Timeout: time.Second, HTTP: config.HTTPProbe{Method: test.method, IPProtocolFallback: true}}, registry, log.NewNopLogger()) + if result { + t.Fatalf("expected probe to fail due to timeout") + } + + mfs, err := registry.Gather() + if err != nil { + t.Fatal(err) + } + + expectedResults := map[string]float64{ + "probe_http_failures_total": test.failuresTotal, + } + checkRegistryResults(expectedResults, mfs, t) + + expectedLabels := map[string]map[string]string{ + "probe_http_failures_total": { + "reason": test.reason, + }, + } + checkRegistryLabels(expectedLabels, mfs, t) + } +} diff --git a/prober/utils_test.go b/prober/utils_test.go index 1b1c41dc..554ac3f2 100644 --- a/prober/utils_test.go +++ b/prober/utils_test.go @@ -37,7 +37,11 @@ import ( func checkRegistryResults(expRes map[string]float64, mfs []*dto.MetricFamily, t *testing.T) { res := make(map[string]float64) for i := range mfs { - res[mfs[i].GetName()] = mfs[i].Metric[0].GetGauge().GetValue() + if mfs[i].GetType() == dto.MetricType_GAUGE { + res[mfs[i].GetName()] = mfs[i].Metric[0].GetGauge().GetValue() + } else if mfs[i].GetType() == dto.MetricType_COUNTER { + res[mfs[i].GetName()] = mfs[i].Metric[0].GetCounter().GetValue() + } } for k, v := range expRes { val, ok := res[k]