Skip to content

Commit

Permalink
add gpu power consumption chart
Browse files Browse the repository at this point in the history
  • Loading branch information
henrygd committed Nov 9, 2024
1 parent bcb7de1 commit 03de735
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 21 deletions.
2 changes: 1 addition & 1 deletion beszel/internal/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func (a *Agent) Run(pubKey []byte, addr string) {
// initialize GPU manager
if os.Getenv("GPU") == "true" {
if gm, err := NewGPUManager(); err != nil {
slog.Error("GPU manager", "err", err)
slog.Warn("GPU", "err", err)
} else {
a.gpuManager = gm
}
Expand Down
29 changes: 12 additions & 17 deletions beszel/internal/agent/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ func (gm *GPUManager) parseNvidiaData(output []byte) {
}
// update gpu data
gpu := gm.GpuDataMap[id]
gpu.Temperature += temp
gpu.MemoryUsed += memoryUsage / 1.024
gpu.MemoryTotal += totalMemory / 1.024
gpu.Temperature = temp
gpu.MemoryUsed = memoryUsage / 1.024
gpu.MemoryTotal = totalMemory / 1.024
gpu.Usage += usage
gpu.Power += power
gpu.Count++
Expand All @@ -120,7 +120,7 @@ func (gm *GPUManager) startAmdCollector() {

// collectAmdStats runs rocm-smi in a loop and passes the output to parseAmdData
func (gm *GPUManager) collectAmdStats() error {
cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 4.7; done")
cmd := exec.Command("/bin/sh", "-c", "while true; do rocm-smi --showid --showtemp --showuse --showpower --showproductname --showmeminfo vram --json; sleep 3.7; done")
// Set up a pipe to capture stdout
stdout, err := cmd.StdoutPipe()
if err != nil {
Expand Down Expand Up @@ -168,9 +168,9 @@ func (gm *GPUManager) parseAmdData(rocmSmiInfo *map[string]RocmSmiJson) {
gm.GpuDataMap[v.ID] = &system.GPUData{Name: v.Name}
}
gpu := gm.GpuDataMap[v.ID]
gpu.Temperature += temp
gpu.MemoryUsed += memoryUsage
gpu.MemoryTotal += totalMemory
gpu.Temperature = temp
gpu.MemoryUsed = memoryUsage
gpu.MemoryTotal = totalMemory
gpu.Usage += usage
gpu.Power += power
gpu.Count++
Expand All @@ -185,19 +185,14 @@ func (gm *GPUManager) GetCurrentData() map[string]system.GPUData {
gpuData := make(map[string]system.GPUData, len(gm.GpuDataMap))
for id, gpu := range gm.GpuDataMap {
// sum the data
gpu.Temperature = twoDecimals(gpu.Temperature / gpu.Count)
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed / gpu.Count)
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal / gpu.Count)
gpu.Temperature = twoDecimals(gpu.Temperature)
gpu.MemoryUsed = twoDecimals(gpu.MemoryUsed)
gpu.MemoryTotal = twoDecimals(gpu.MemoryTotal)
gpu.Usage = twoDecimals(gpu.Usage / gpu.Count)
gpu.Power = twoDecimals(gpu.Power / gpu.Count)
gpuData[id] = *gpu
// reset the data
gpu.Temperature = 0
gpu.MemoryUsed = 0
gpu.MemoryTotal = 0
gpu.Usage = 0
gpu.Power = 0
gpu.Count = 0
// reset the count
gpu.Count = 1
}
return gpuData
}
Expand Down
112 changes: 112 additions & 0 deletions beszel/site/src/components/charts/gpu-power-chart.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import { CartesianGrid, Line, LineChart, YAxis } from "recharts"

import {
ChartContainer,
ChartLegend,
ChartLegendContent,
ChartTooltip,
ChartTooltipContent,
xAxis,
} from "@/components/ui/chart"
import {
useYAxisWidth,
cn,
formatShortDate,
toFixedWithoutTrailingZeros,
decimalString,
chartMargin,
} from "@/lib/utils"
import { ChartData } from "@/types"
import { memo, useMemo } from "react"

export default memo(function GpuPowerChart({ chartData }: { chartData: ChartData }) {
const { yAxisWidth, updateYAxisWidth } = useYAxisWidth()

if (chartData.systemStats.length === 0) {
return null
}

/** Format temperature data for chart and assign colors */
const newChartData = useMemo(() => {
const newChartData = { data: [], colors: {} } as {
data: Record<string, number | string>[]
colors: Record<string, string>
}
const powerSums = {} as Record<string, number>
for (let data of chartData.systemStats) {
let newData = { created: data.created } as Record<string, number | string>

for (let gpu of Object.values(data.stats?.g ?? {})) {
if (gpu.p) {
const name = gpu.n
newData[name] = gpu.p
powerSums[name] = (powerSums[name] ?? 0) + newData[name]
}
}
newChartData.data.push(newData)
}
const keys = Object.keys(powerSums).sort((a, b) => powerSums[b] - powerSums[a])
for (let key of keys) {
newChartData.colors[key] = `hsl(${((keys.indexOf(key) * 360) / keys.length) % 360}, 60%, 55%)`
}
return newChartData
}, [chartData])

const colors = Object.keys(newChartData.colors)

// console.log('rendered at', new Date())

return (
<div>
<ChartContainer
className={cn("h-full w-full absolute aspect-auto bg-card opacity-0 transition-opacity", {
"opacity-100": yAxisWidth,
})}
>
<LineChart accessibilityLayer data={newChartData.data} margin={chartMargin}>
<CartesianGrid vertical={false} />
<YAxis
direction="ltr"
orientation={chartData.orientation}
className="tracking-tighter"
domain={[0, "auto"]}
width={yAxisWidth}
tickFormatter={(value) => {
const val = toFixedWithoutTrailingZeros(value, 2)
return updateYAxisWidth(val + "W")
}}
tickLine={false}
axisLine={false}
/>
{xAxis(chartData)}
<ChartTooltip
animationEasing="ease-out"
animationDuration={150}
// @ts-ignore
itemSorter={(a, b) => b.value - a.value}
content={
<ChartTooltipContent
labelFormatter={(_, data) => formatShortDate(data[0].payload.created)}
contentFormatter={(item) => decimalString(item.value) + "W"}
// indicator="line"
/>
}
/>
{colors.map((key) => (
<Line
key={key}
dataKey={key}
name={key}
type="monotoneX"
dot={false}
strokeWidth={1.5}
stroke={newChartData.colors[key]}
isAnimationActive={false}
/>
))}
{colors.length < 12 && <ChartLegend content={<ChartLegendContent />} />}
</LineChart>
</ChartContainer>
</div>
)
})
22 changes: 19 additions & 3 deletions beszel/site/src/components/routes/system.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ const MemChart = lazy(() => import("../charts/mem-chart"))
const DiskChart = lazy(() => import("../charts/disk-chart"))
const SwapChart = lazy(() => import("../charts/swap-chart"))
const TemperatureChart = lazy(() => import("../charts/temperature-chart"))
const GpuPowerChart = lazy(() => import("../charts/gpu-power-chart"))

const cache = new Map<string, any>()

Expand Down Expand Up @@ -285,6 +286,7 @@ export default function SystemDetail({ name }: { name: string }) {

// if no data, show empty message
const dataEmpty = !chartLoading && chartData.systemStats.length === 0
const hasGpuData = Object.keys(systemStats.at(-1)?.stats.g ?? {}).length > 0

return (
<>
Expand Down Expand Up @@ -455,6 +457,7 @@ export default function SystemDetail({ name }: { name: string }) {
</div>
)}

{/* Swap chart */}
{(systemStats.at(-1)?.stats.su ?? 0) > 0 && (
<ChartCard
empty={dataEmpty}
Expand All @@ -466,6 +469,7 @@ export default function SystemDetail({ name }: { name: string }) {
</ChartCard>
)}

{/* Temperature chart */}
{systemStats.at(-1)?.stats.t && (
<ChartCard
empty={dataEmpty}
Expand All @@ -476,10 +480,22 @@ export default function SystemDetail({ name }: { name: string }) {
<TemperatureChart chartData={chartData} />
</ChartCard>
)}

{/* GPU power draw chart */}
{hasGpuData && (
<ChartCard
empty={dataEmpty}
grid={grid}
title="GPU Power Draw"
description="Average power consumption of GPUs"
>
<GpuPowerChart chartData={chartData} />
</ChartCard>
)}
</div>

{/* GPU charts */}
{Object.keys(systemStats.at(-1)?.stats.g ?? {}).length > 0 && (
{hasGpuData && (
<div className="grid xl:grid-cols-2 gap-4">
{Object.keys(systemStats.at(-1)?.stats.g ?? {}).map((id) => {
const gpu = systemStats.at(-1)?.stats.g?.[id] as GPUData
Expand All @@ -489,15 +505,15 @@ export default function SystemDetail({ name }: { name: string }) {
empty={dataEmpty}
grid={grid}
title={`${gpu.n} ${t`Usage`}`}
description={t`Total utilization of ${gpu.n}`}
description={`Average utilization of ${gpu.n}`}
>
<AreaChartDefault chartData={chartData} chartName={`g.${id}.u`} unit="%" />
</ChartCard>
<ChartCard
empty={dataEmpty}
grid={grid}
title={`${gpu.n} VRAM`}
description={t`VRAM usage of ${gpu.n}`}
description={t`Precise utilization at the recorded time`}
>
<AreaChartDefault
chartData={chartData}
Expand Down

0 comments on commit 03de735

Please sign in to comment.