diff --git a/test/awsneuron/neuron_metrics_test.go b/test/awsneuron/neuron_metrics_test.go index 2ce9719f0..a969d9bf1 100644 --- a/test/awsneuron/neuron_metrics_test.go +++ b/test/awsneuron/neuron_metrics_test.go @@ -45,6 +45,9 @@ var expectedDimsToMetrics = map[string][]string{ }, "ClusterName-InstanceId-NeuronDevice-NodeName": { NodeNeuronDeviceHwEccEvents, + NodeNeuronCoreLimit, NodeNeuronCoreUsageTotalAlloc, NodeNeuronCoreRequest, + NodeNeuronCoreReservedCapacity, NodeNeuronCoreUnreservedCapacity, NodeNeuronCoreAvailableCapacity, + PodNeuronCoreLimit, PodNeuronCoreUsageTotalAlloc, PodNeuronCoreRequest, PodNeuronCoreReservedCapacity, }, } diff --git a/test/awsneuron/resources/metrics_list.go b/test/awsneuron/resources/metrics_list.go index ce4094597..2996c6946 100644 --- a/test/awsneuron/resources/metrics_list.go +++ b/test/awsneuron/resources/metrics_list.go @@ -45,4 +45,16 @@ const ( NodeExecutionStatusFailedToQueue = "node_neuron_execution_status_failed_to_queue" NodeNeuronDeviceRuntimeMemoryUsed = "node_neurondevice_runtime_memory_used_bytes" NodeNeuronExecutionLatency = "node_neuron_execution_latency" + + NodeNeuronCoreLimit = "node_neuroncore_limit" + NodeNeuronCoreUsageTotalAlloc = "node_neuroncore_usage_total" + NodeNeuronCoreRequest = "node_neuroncore_request" + NodeNeuronCoreReservedCapacity = "node_neuroncore_reserved_capacity" + NodeNeuronCoreUnreservedCapacity = "node_neuroncore_unreserved_capacity" + NodeNeuronCoreAvailableCapacity = "node_neuroncore_available_capacity" + + PodNeuronCoreLimit = "pod_neuroncore_limit" + PodNeuronCoreUsageTotalAlloc = "pod_neuroncore_usage_total" + PodNeuronCoreRequest = "pod_neuroncore_request" + PodNeuronCoreReservedCapacity = "pod_neuroncore_reserved_capacity" ) diff --git a/test/metric_value_benchmark/eks_resources/test_schemas/node_neuroncore.json b/test/metric_value_benchmark/eks_resources/test_schemas/node_neuroncore.json index 62b522224..23d2a4244 100644 --- a/test/metric_value_benchmark/eks_resources/test_schemas/node_neuroncore.json +++ b/test/metric_value_benchmark/eks_resources/test_schemas/node_neuroncore.json @@ -26,7 +26,13 @@ "node_neuroncore_memory_usage_runtime_memory": {}, "node_neuroncore_memory_usage_tensors": {}, "node_neuroncore_memory_usage_total": {}, - "node_neuroncore_utilization": {} + "node_neuroncore_utilization": {}, + "node_neuroncore_limit": {}, + "node_neuroncore_usage_total": {}, + "node_neuroncore_request": {}, + "node_neuroncore_reserved_capacity": {}, + "node_neuroncore_unreserved_capacity": {}, + "node_neuroncore_available_capacity": {} }, "required": [ "ClusterName", diff --git a/test/metric_value_benchmark/eks_resources/test_schemas/pod_neuroncore.json b/test/metric_value_benchmark/eks_resources/test_schemas/pod_neuroncore.json index 2d00322c4..1e685271f 100644 --- a/test/metric_value_benchmark/eks_resources/test_schemas/pod_neuroncore.json +++ b/test/metric_value_benchmark/eks_resources/test_schemas/pod_neuroncore.json @@ -29,7 +29,11 @@ "pod_neuroncore_memory_usage_runtime_memory": {}, "pod_neuroncore_memory_usage_tensors": {}, "pod_neuroncore_memory_usage_total": {}, - "pod_neuroncore_utilization": {} + "pod_neuroncore_utilization": {}, + "pod_neuroncore_limit": {}, + "pod_neuroncore_usage_total": {}, + "pod_neuroncore_request": {}, + "pod_neuroncore_reserved_capacity": {} }, "required": [ "ClusterName",