Skip to content

Commit

Permalink
fix: Guard system memory stat calculations against missing data point…
Browse files Browse the repository at this point in the history
…s and division by zero (#2206)

Fixes #2200.
  • Loading branch information
alco authored Dec 24, 2024
1 parent 7e6f6c4 commit 84eb729
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 30 deletions.
5 changes: 5 additions & 0 deletions .changeset/chilly-peaches-tease.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@core/sync-service": patch
---

Fix arithmetic bugs in system memory stat calculations.
106 changes: 76 additions & 30 deletions packages/sync-service/lib/electric/telemetry.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ defmodule Electric.Telemetry do

import Telemetry.Metrics

require Logger

def start_link(init_arg) do
Supervisor.start_link(__MODULE__, init_arg, name: __MODULE__)
end
Expand Down Expand Up @@ -288,47 +290,91 @@ defmodule Electric.Telemetry do
)
end

@required_system_memory_keys ~w[system_total_memory free_memory buffered_memory cached_memory]a
@required_swap_keys ~w[total_swap free_swap]a

def get_system_memory_usage(opts) do
{total, stats} =
:memsup.get_system_memory_data()
|> Keyword.delete(:total_memory)
|> Keyword.pop(:system_total_memory)
metadata = %{stack_id: opts[:stack_id]}

system_memory = Map.new(:memsup.get_system_memory_data())

# Sanity-check that all the required keys are present before doing any arithmetic on them
missing_system_memory_keys =
Enum.reject(@required_system_memory_keys, &Map.has_key?(system_memory, &1))

mem_stats =
Keyword.take(stats, [:free_memory, :available_memory, :buffered_memory, :cached_memory])
cond do
missing_system_memory_keys != [] ->
Logger.warning(
"Error gathering system memory stats: " <>
"missing data points #{Enum.join(missing_system_memory_keys, ", ")}"
)

{total_swap, stats} = Keyword.pop(stats, :total_swap)
%{}

used_memory = total - Keyword.fetch!(mem_stats, :free_memory)
resident_memory = total - Keyword.fetch!(mem_stats, :available_memory)
system_memory.system_total_memory == 0 ->
Logger.warning("Error gathering system memory stats: zero total memory reported")
%{}

memory_stats =
mem_stats
|> Map.new()
|> Map.put(:used_memory, used_memory)
|> Map.put(:resident_memory, resident_memory)
true ->
total = system_memory.system_total_memory

memory_percent_stats =
mem_stats
|> Map.new(fn {k, v} -> {k, 100 * v / total} end)
|> Map.put(:used_memory, 100 * used_memory / total)
|> Map.put(:resident_memory, 100 * resident_memory / total)
used = total - system_memory.free_memory

:telemetry.execute([:system, :memory], memory_stats, %{
stack_id: opts[:stack_id]
})
resident =
with nil <- Map.get(system_memory, :available_memory) do
total -
(system_memory.free_memory + system_memory.buffered_memory +
system_memory.cached_memory)
end

:telemetry.execute([:system, :memory_percent], memory_percent_stats, %{
stack_id: opts[:stack_id]
})
mem_stats =
system_memory
|> Map.take(~w[available_memory free_memory buffered_memory cached_memory]a)
|> Map.put(:used_memory, used)
|> Map.put(:resident_memory, resident)

mem_percent_stats = Map.new(mem_stats, fn {k, v} -> {k, 100 * v / total} end)

mem_stats = Map.put(mem_stats, :total_memory, total)

:telemetry.execute([:system, :memory], mem_stats, metadata)
:telemetry.execute([:system, :memory_percent], mem_percent_stats, metadata)

mem_stats
end

# Sanity-check that all the required swap keys are present
missing_swap_keys = Enum.reject(@required_swap_keys, &Map.has_key?(system_memory, &1))

swap_stats =
if missing_swap_keys != [] do
Logger.warning(
"Error gathering system swap stats: " <>
"missing data points #{Enum.join(missing_swap_keys, ", ")}"
)

%{}
else
total = system_memory.total_swap
free = system_memory.free_swap
used = total - free

swap_stats = %{total_swap: total, free_swap: free, used_swap: used}

swap_percent_stats =
if total > 0 do
%{free_swap: 100 * free / total, used_swap: 100 * used / total}
else
%{free_swap: 0, used_swap: 0}
end

free_swap = Keyword.get(stats, :free_swap, 0)
used_swap = total_swap - free_swap
:telemetry.execute([:system, :swap], swap_stats, metadata)
:telemetry.execute([:system, :swap_percent], swap_percent_stats, metadata)

swap_stats = %{total: total_swap, free: free_swap, used: used_swap}
swap_percent_stats = %{free: 100 * free_swap / total_swap, used: 100 * used_swap / total_swap}
swap_stats
end

:telemetry.execute([:system, :swap], swap_stats, %{stack_id: opts[:stack_id]})
:telemetry.execute([:system, :swap_percent], swap_percent_stats, %{stack_id: opts[:stack_id]})
Map.merge(mem_stats, swap_stats)
end
end
20 changes: 20 additions & 0 deletions packages/sync-service/test/electric/telemetry_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defmodule Electric.TelemetryTest do
use ExUnit.Case, async: true

describe "get_system_memory_usage" do
test "returns calculated memory stats" do
assert %{
total_memory: _,
available_memory: _,
buffered_memory: _,
cached_memory: _,
free_memory: _,
used_memory: _,
resident_memory: _,
total_swap: _,
free_swap: _,
used_swap: _
} = Electric.Telemetry.get_system_memory_usage([])
end
end
end

0 comments on commit 84eb729

Please sign in to comment.