From 9c9d15af5f543ab70ea32d5ede42b6b18e45c3bd Mon Sep 17 00:00:00 2001 From: Igor Peshansky Date: Fri, 26 Jul 2024 04:22:13 -0400 Subject: [PATCH] More logging. --- receiver/dcgmreceiver/client.go | 2 ++ receiver/dcgmreceiver/scraper.go | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/receiver/dcgmreceiver/client.go b/receiver/dcgmreceiver/client.go index 6c1122294..1a2e97c7b 100644 --- a/receiver/dcgmreceiver/client.go +++ b/receiver/dcgmreceiver/client.go @@ -384,9 +384,11 @@ func (client *dcgmClient) collectDeviceMetrics() (map[uint][]dcgmMetric, error) var err scrapererror.ScrapeErrors gpuMetrics := make(map[uint][]dcgmMetric) for _, gpuIndex := range client.deviceIndices { + client.logger.Debugf("Polling DCGM daemon for GPU %d", gpuIndex) retry := true for i := 0; retry && i < client.maxRetries; i++ { fieldValues, pollErr := dcgmGetLatestValuesForFields(gpuIndex, client.enabledFieldIDs) + client.logger.Debugf("Got %d field values", len(fieldValues)) if pollErr == nil { gpuMetrics[gpuIndex], retry = client.appendMetrics(gpuMetrics[gpuIndex], gpuIndex, fieldValues) if retry { diff --git a/receiver/dcgmreceiver/scraper.go b/receiver/dcgmreceiver/scraper.go index 499b2cbcd..611618b6c 100644 --- a/receiver/dcgmreceiver/scraper.go +++ b/receiver/dcgmreceiver/scraper.go @@ -180,7 +180,13 @@ func (s *dcgmScraper) scrape(_ context.Context) (pmetric.Metrics, error) { return s.mb.Emit(), err } + s.settings.Logger.Sugar().Info(fmt.Sprintf("Client created: %v; collecting metrics", s.client)) deviceMetrics, err := s.client.collectDeviceMetrics() + if err != nil { + s.settings.Logger.Sugar().Warn(fmt.Sprintf("Metrics not collected; err=%v", err)) + return s.mb.Emit(), err + } + s.settings.Logger.Sugar().Info(fmt.Sprintf("Metrics collected: %d", len(deviceMetrics))) now := pcommon.NewTimestampFromTime(time.Now()) for gpuIndex, gpuMetrics := range deviceMetrics { @@ -188,6 +194,7 @@ func (s *dcgmScraper) scrape(_ context.Context) (pmetric.Metrics, error) { for _, metric := range gpuMetrics { metricsByName[metric.name] = append(metricsByName[metric.name], metric) } + s.settings.Logger.Sugar().Warn(fmt.Sprintf("Got %d unique metrics: %v", len(metricsByName), metricsByName)) metrics := make(map[string]dcgmMetric) for name, points := range metricsByName { slices.SortStableFunc(points, func(a, b dcgmMetric) int {