diff --git a/cmd/vGPUmonitor/feedback.go b/cmd/vGPUmonitor/feedback.go index 969fcb885..5a567df4a 100644 --- a/cmd/vGPUmonitor/feedback.go +++ b/cmd/vGPUmonitor/feedback.go @@ -232,27 +232,27 @@ func Observe(lister *nvidia.ContainerLister) { utilizationSwitch := c.Info.GetUtilizationSwitch() if CheckBlocking(utSwitchOn, priority, c) { if recentKernel >= 0 { - klog.Infof("utSwitchon=%v", utSwitchOn) - klog.Infof("Setting Blocking to on %v", idx) + klog.V(5).Infof("utSwitchon=%v", utSwitchOn) + klog.V(5).Infof("Setting Blocking to on %v", idx) c.Info.SetRecentKernel(-1) } } else { if recentKernel < 0 { - klog.Infof("utSwitchon=%v", utSwitchOn) - klog.Infof("Setting Blocking to off %v", idx) + klog.V(5).Infof("utSwitchon=%v", utSwitchOn) + klog.V(5).Infof("Setting Blocking to off %v", idx) c.Info.SetRecentKernel(0) } } if CheckPriority(utSwitchOn, priority, c) { if utilizationSwitch != 1 { - klog.Infof("utSwitchon=%v", utSwitchOn) - klog.Infof("Setting UtilizationSwitch to on %v", idx) + klog.V(5).Infof("utSwitchon=%v", utSwitchOn) + klog.V(5).Infof("Setting UtilizationSwitch to on %v", idx) c.Info.SetUtilizationSwitch(1) } } else { if utilizationSwitch != 0 { - klog.Infof("utSwitchon=%v", utSwitchOn) - klog.Infof("Setting UtilizationSwitch to off %v", idx) + klog.V(5).Infof("utSwitchon=%v", utSwitchOn) + klog.V(5).Infof("Setting UtilizationSwitch to off %v", idx) c.Info.SetUtilizationSwitch(0) } } diff --git a/cmd/vGPUmonitor/main.go b/cmd/vGPUmonitor/main.go index 2b477afb1..a0689b14f 100644 --- a/cmd/vGPUmonitor/main.go +++ b/cmd/vGPUmonitor/main.go @@ -110,7 +110,7 @@ func start() { } func initMetrics(ctx context.Context, containerLister *nvidia.ContainerLister) error { - klog.Info("Initializing metrics for vGPUmonitor") + klog.V(4).Info("Initializing metrics for vGPUmonitor") reg := prometheus.NewRegistry() //reg := prometheus.NewPedanticRegistry() @@ -137,7 +137,7 @@ func initMetrics(ctx context.Context, containerLister *nvidia.ContainerLister) e // Graceful shutdown on context cancellation <-ctx.Done() - klog.Info("Shutting down metrics server") + klog.V(4).Info("Shutting down metrics server") if err := server.Shutdown(context.Background()); err != nil { return err } diff --git a/cmd/vGPUmonitor/metrics.go b/cmd/vGPUmonitor/metrics.go index 0841f3dd6..98c3651b1 100644 --- a/cmd/vGPUmonitor/metrics.go +++ b/cmd/vGPUmonitor/metrics.go @@ -313,18 +313,18 @@ func (cc ClusterManagerCollector) collectPodAndContainerInfo(ch chan<- prometheu for _, pod := range pods { podContainers, found := containerMap[string(pod.UID)] if !found { - klog.V(4).Infof("No containers found for pod %s/%s", pod.Namespace, pod.Name) + klog.V(5).Infof("No containers found for pod %s/%s", pod.Namespace, pod.Name) continue } - klog.V(2).Infof("Processing Pod %s/%s", pod.Namespace, pod.Name) + klog.V(5).Infof("Processing Pod %s/%s", pod.Namespace, pod.Name) // Iterate through each container in the Pod for _, ctr := range pod.Spec.Containers { // Find the matching container for _, c := range podContainers { if c.ContainerName == ctr.Name { - klog.V(2).Infof("Processing Container %s in Pod %s/%s", ctr.Name, pod.Namespace, pod.Name) + klog.V(5).Infof("Processing Container %s in Pod %s/%s", ctr.Name, pod.Namespace, pod.Name) if err := cc.collectContainerMetrics(ch, pod, ctr, c, nowSec); err != nil { klog.Errorf("Failed to collect metrics for container %s in Pod %s/%s: %v", ctr.Name, pod.Namespace, pod.Name, err) } @@ -334,7 +334,7 @@ func (cc ClusterManagerCollector) collectPodAndContainerInfo(ch chan<- prometheu } } - klog.V(2).Infof("Finished collecting metrics for %d pods", len(pods)) + klog.V(4).Infof("Finished collecting metrics for %d pods", len(pods)) return nil } @@ -409,7 +409,7 @@ func (cc ClusterManagerCollector) collectContainerMetrics(ch chan<- prometheus.M } } - klog.V(2).Infof("Successfully collected metrics for Pod %s/%s, Container %s", pod.Namespace, pod.Name, ctr.Name) + klog.V(5).Infof("Successfully collected metrics for Pod %s/%s, Container %s", pod.Namespace, pod.Name, ctr.Name) return nil }