Skip to content

Commit

Permalink
Fix the size of the perf counters shared memory array (#9226)
Browse files Browse the repository at this point in the history
MaxBackends doesn't include auxiliary processes. Whenever an aux process
made IO operations that updated the counters, they would scribble over
shared memory beoynd the end of the array. The relsize cache hash table
comes after the array, so the symptom was an error about hash table
corruption in the relsize cache hash.
  • Loading branch information
hlinnaka authored Oct 1, 2024
1 parent 62e22df commit 8861e8a
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
9 changes: 5 additions & 4 deletions pgxn/neon/neon_perf_counters.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ NeonPerfCountersShmemSize(void)
{
Size size = 0;

size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters)));
size = add_size(size, mul_size(NUM_NEON_PERF_COUNTER_SLOTS,
sizeof(neon_per_backend_counters)));

return size;
}
Expand All @@ -39,7 +40,7 @@ NeonPerfCountersShmemInit(void)

neon_per_backend_counters_shared =
ShmemInitStruct("Neon perf counters",
mul_size(MaxBackends,
mul_size(NUM_NEON_PERF_COUNTER_SLOTS,
sizeof(neon_per_backend_counters)),
&found);
Assert(found == IsUnderPostmaster);
Expand Down Expand Up @@ -192,7 +193,7 @@ neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
/* We put all the tuples into a tuplestore in one go. */
InitMaterializedSRF(fcinfo, 0);

for (int procno = 0; procno < MaxBackends; procno++)
for (int procno = 0; procno < NUM_NEON_PERF_COUNTER_SLOTS; procno++)
{
PGPROC *proc = GetPGProcByNumber(procno);
int pid = proc->pid;
Expand Down Expand Up @@ -231,7 +232,7 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
InitMaterializedSRF(fcinfo, 0);

/* Aggregate the counters across all backends */
for (int procno = 0; procno < MaxBackends; procno++)
for (int procno = 0; procno < NUM_NEON_PERF_COUNTER_SLOTS; procno++)
{
neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];

Expand Down
8 changes: 8 additions & 0 deletions pgxn/neon/neon_perf_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ typedef struct
/* Pointer to the shared memory array of neon_per_backend_counters structs */
extern neon_per_backend_counters *neon_per_backend_counters_shared;

/*
* Size of the perf counters array in shared memory. One slot for each backend
* and aux process. IOW one for each PGPROC slot, except for slots reserved
* for prepared transactions, because they're not real processes and cannot do
* I/O.
*/
#define NUM_NEON_PERF_COUNTER_SLOTS (MaxBackends + NUM_AUXILIARY_PROCS)

#if PG_VERSION_NUM >= 170000
#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber])
#else
Expand Down
14 changes: 14 additions & 0 deletions pgxn/neon/pagestore_smgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1773,6 +1773,20 @@ neon_init(void)
if (MyPState != NULL)
return;

/*
* Sanity check that theperf counters array is sized correctly. We got
* this wrong once, and the formula for max number of backends and aux
* processes might well change in the future, so better safe than sorry.
* This is a very cheap check so we do it even without assertions. On
* v14, this gets called before initializing MyProc, so we cannot perform
* the check here. That's OK, we don't expect the logic to change in old
* releases.
*/
#if PG_VERSION_NUM>=150000
if (MyNeonCounters >= &neon_per_backend_counters_shared[NUM_NEON_PERF_COUNTER_SLOTS])
elog(ERROR, "MyNeonCounters points past end of array");
#endif

prfs_size = offsetof(PrefetchState, prf_buffer) +
sizeof(PrefetchRequest) * readahead_buffer_size;

Expand Down

1 comment on commit 8861e8a

@github-actions
Copy link

@github-actions github-actions bot commented on 8861e8a Oct 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

5162 tests run: 4992 passed, 2 failed, 168 skipped (full report)


Failures on Postgres 16

  • test_compaction_l0_memory[github-actions-selfhosted]: release-x86-64
  • test_gc_feedback_with_snapshots[github-actions-selfhosted]: release-x86-64
# Run all failed tests locally:
scripts/pytest -vv -n $(nproc) -k "test_compaction_l0_memory[release-pg16-github-actions-selfhosted] or test_gc_feedback_with_snapshots[release-pg16-github-actions-selfhosted]"
Flaky tests (6)

Postgres 17

Postgres 16

Postgres 14

Code coverage* (full report)

  • functions: 31.3% (7486 of 23881 functions)
  • lines: 49.6% (60095 of 121224 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
8861e8a at 2024-10-01T22:08:36.388Z :recycle:

Please sign in to comment.