Skip to content

Commit

Permalink
Add GUC controlling whether to pause recovery if some critical GUCs a…
Browse files Browse the repository at this point in the history
…t replica have smaller value than on primary (#9057)

## Problem

See #9023

## Summary of changes

Ass GUC `recovery_pause_on_misconfig` allowing not to pause in case of
replica and primary configuration mismatch

See neondatabase/postgres#501
See neondatabase/postgres#502
See neondatabase/postgres#503
See neondatabase/postgres#504


## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

---------

Co-authored-by: Konstantin Knizhnik <[email protected]>
Co-authored-by: Heikki Linnakangas <[email protected]>
  • Loading branch information
3 people authored Dec 1, 2024
1 parent 4abc8e5 commit 97a9abd
Show file tree
Hide file tree
Showing 7 changed files with 241 additions and 9 deletions.
13 changes: 13 additions & 0 deletions pgxn/neon/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#include "access/subtrans.h"
#include "access/twophase.h"
#include "access/xlog.h"
#if PG_MAJORVERSION_NUM >= 15
#include "access/xlogrecovery.h"
#endif
#include "replication/logical.h"
#include "replication/slot.h"
#include "replication/walsender.h"
Expand Down Expand Up @@ -432,6 +435,16 @@ _PG_init(void)
restore_running_xacts_callback = RestoreRunningXactsFromClog;


DefineCustomBoolVariable(
"neon.allow_replica_misconfig",
"Allow replica startup when some critical GUCs have smaller value than on primary node",
NULL,
&allowReplicaMisconfig,
true,
PGC_POSTMASTER,
0,
NULL, NULL, NULL);

DefineCustomEnumVariable(
"neon.running_xacts_overflow_policy",
"Action performed on snapshot overflow when restoring runnings xacts from CLOG",
Expand Down
221 changes: 220 additions & 1 deletion test_runner/regress/test_physical_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import time
from typing import TYPE_CHECKING

import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import wait_replica_caughtup

if TYPE_CHECKING:
from fixtures.neon_fixtures import NeonEnv

Expand All @@ -19,8 +23,8 @@ def test_physical_replication(neon_simple_env: NeonEnv):
p_cur.execute(
"CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))"
)
time.sleep(1)
with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary") as secondary:
wait_replica_caughtup(primary, secondary)
with primary.connect() as p_con:
with p_con.cursor() as p_cur:
with secondary.connect() as s_con:
Expand All @@ -42,3 +46,218 @@ def test_physical_replication(neon_simple_env: NeonEnv):
s_cur.execute(
"select * from t where pk=%s", (random.randrange(1, 2 * pk),)
)


def test_physical_replication_config_mismatch_max_connections(neon_simple_env: NeonEnv):
"""
Test for primary and replica with different configuration settings (max_connections).
PostgreSQL enforces that settings that affect how many transactions can be open at the same time
have values equal to or higher in a hot standby replica than in the primary. If they don't, the replica refuses
to start up. If the settings are changed in the primary, it emits a WAL record with the new settings, and
when the replica sees that record it pauses the replay.
PostgreSQL enforces this to ensure that the replica can hold all the XIDs in the so-called
"known-assigned XIDs" array, which is a fixed size array that needs to be allocated
upfront and server startup. That's pretty pessimistic, though; usually you can get
away with smaller settings, because we allocate space for 64 subtransactions per
transaction too. If you get unlucky and you run out of space, WAL redo dies with
"ERROR: too many KnownAssignedXids". It's better to take the chances than refuse
to start up, especially in Neon: if the WAL redo dies, the server is restarted, which is
no worse than refusing to start up in the first place. Furthermore, the control plane
tries to ensure that on restart, the settings are set high enough, so most likely it will
work after restart. Because of that, we have patched Postgres to disable to checks when
the `recovery_pause_on_misconfig` setting is set to `false` (which is the default on neon).
This test tests all those cases of running out of space in known-assigned XIDs array that
we can hit with `recovery_pause_on_misconfig=false`, which are unreachable in unpatched
Postgres.
There's a similar check for `max_locks_per_transactions` too, which is related to running out
of space in the lock manager rather than known-assigned XIDs. Similar story with that, although
running out of space in the lock manager is possible in unmodified Postgres too. Enforcing the
check for `max_locks_per_transactions` ensures that you don't run out of space in the lock manager
when there are no read-only queries holding locks in the replica, but you can still run out if you have
those.
"""
env = neon_simple_env
with env.endpoints.create_start(
branch_name="main",
endpoint_id="primary",
) as primary:
with primary.connect() as p_con:
with p_con.cursor() as p_cur:
p_cur.execute(
"CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))"
)
with env.endpoints.new_replica_start(
origin=primary,
endpoint_id="secondary",
config_lines=["max_connections=5"],
) as secondary:
wait_replica_caughtup(primary, secondary)
with secondary.connect() as s_con:
with s_con.cursor() as s_cur:
cursors = []
for i in range(10):
p_con = primary.connect()
p_cur = p_con.cursor()
p_cur.execute("begin")
p_cur.execute("insert into t (pk) values (%s)", (i,))
cursors.append(p_cur)

for p_cur in cursors:
p_cur.execute("commit")

wait_replica_caughtup(primary, secondary)
s_cur.execute("select count(*) from t")
assert s_cur.fetchall()[0][0] == 10


def test_physical_replication_config_mismatch_max_prepared(neon_simple_env: NeonEnv):
"""
Test for primary and replica with different configuration settings (max_prepared_transactions).
If number of transactions at primary exceeds its limit at replica then WAL replay is terminated.
"""
env = neon_simple_env
primary = env.endpoints.create_start(
branch_name="main",
endpoint_id="primary",
config_lines=["max_prepared_transactions=10"],
)
p_con = primary.connect()
p_cur = p_con.cursor()
p_cur.execute("CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))")

secondary = env.endpoints.new_replica_start(
origin=primary,
endpoint_id="secondary",
config_lines=["max_prepared_transactions=5"],
)
wait_replica_caughtup(primary, secondary)

s_con = secondary.connect()
s_cur = s_con.cursor()
cursors = []
for i in range(10):
p_con = primary.connect()
p_cur = p_con.cursor()
p_cur.execute("begin")
p_cur.execute("insert into t (pk) values (%s)", (i,))
p_cur.execute(f"prepare transaction 't{i}'")
cursors.append(p_cur)

for i in range(10):
cursors[i].execute(f"commit prepared 't{i}'")

time.sleep(5)
with pytest.raises(Exception) as e:
s_cur.execute("select count(*) from t")
assert s_cur.fetchall()[0][0] == 10
secondary.stop()

log.info(f"Replica crashed with {e}")
assert secondary.log_contains("maximum number of prepared transactions reached")


def connect(ep):
max_reconnect_attempts = 10
for _ in range(max_reconnect_attempts):
try:
return ep.connect()
except Exception as e:
log.info(f"Failed to connect with primary: {e}")
time.sleep(1)


def test_physical_replication_config_mismatch_too_many_known_xids(neon_simple_env: NeonEnv):
"""
Test for primary and replica with different configuration settings (max_connections).
In this case large difference in this setting and larger number of concurrent transactions at primary
# cause too many known xids error at replica.
"""
env = neon_simple_env
primary = env.endpoints.create_start(
branch_name="main",
endpoint_id="primary",
config_lines=[
"max_connections=1000",
"shared_buffers=128MB", # prevent "no unpinned buffers available" error
],
)
secondary = env.endpoints.new_replica_start(
origin=primary,
endpoint_id="secondary",
config_lines=[
"max_connections=2",
"autovacuum_max_workers=1",
"max_worker_processes=5",
"max_wal_senders=1",
"superuser_reserved_connections=0",
],
)

p_con = primary.connect()
p_cur = p_con.cursor()
p_cur.execute("CREATE TABLE t(x integer)")

n_connections = 990
cursors = []
for i in range(n_connections):
p_con = connect(primary)
p_cur = p_con.cursor()
p_cur.execute("begin")
p_cur.execute(f"insert into t values({i})")
cursors.append(p_cur)

for cur in cursors:
cur.execute("commit")

time.sleep(5)
with pytest.raises(Exception) as e:
s_con = secondary.connect()
s_cur = s_con.cursor()
s_cur.execute("select count(*) from t")
assert s_cur.fetchall()[0][0] == n_connections
secondary.stop()

log.info(f"Replica crashed with {e}")
assert secondary.log_contains("too many KnownAssignedXids")


def test_physical_replication_config_mismatch_max_locks_per_transaction(neon_simple_env: NeonEnv):
"""
Test for primary and replica with different configuration settings (max_locks_per_transaction).
In conjunction with different number of max_connections at primary and standby it can cause "out of shared memory"
error if the primary obtains more AccessExclusiveLocks than the standby can hold.
"""
env = neon_simple_env
primary = env.endpoints.create_start(
branch_name="main",
endpoint_id="primary",
config_lines=[
"max_locks_per_transaction = 100",
],
)
secondary = env.endpoints.new_replica_start(
origin=primary,
endpoint_id="secondary",
config_lines=[
"max_connections=10",
"max_locks_per_transaction = 10",
],
)

n_tables = 1000

p_con = primary.connect()
p_cur = p_con.cursor()
p_cur.execute("begin")
for i in range(n_tables):
p_cur.execute(f"CREATE TABLE t_{i}(x integer)")
p_cur.execute("commit")

with pytest.raises(Exception) as e:
wait_replica_caughtup(primary, secondary)
secondary.stop()

log.info(f"Replica crashed with {e}")
assert secondary.log_contains("You might need to increase")
2 changes: 1 addition & 1 deletion vendor/postgres-v14
2 changes: 1 addition & 1 deletion vendor/postgres-v15
2 changes: 1 addition & 1 deletion vendor/postgres-v16
2 changes: 1 addition & 1 deletion vendor/postgres-v17
8 changes: 4 additions & 4 deletions vendor/revisions.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
{
"v17": [
"17.2",
"faebe5e5aff5687908504453623778f8515529db"
"a10d95be67265e0f10a422ba0457f5a7af01de71"
],
"v16": [
"16.6",
"13e9e3539419003e79bd9aa29e1bc44f3fd555dd"
"dff6615a8e48a10bb17a03fa3c00635f1ace7a92"
],
"v15": [
"15.10",
"d929b9a8b9f32f6fe5a0eac3e6e963f0e44e27e6"
"972e325e62b455957adbbdd8580e31275bb5b8c9"
],
"v14": [
"14.15",
"c1989c934d46e04e78b3c496c8a34bcd40ddceeb"
"373f9decad933d2d46f321231032ae8b0da81acd"
]
}

1 comment on commit 97a9abd

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7122 tests run: 6804 passed, 0 failed, 318 skipped (full report)


Flaky tests (2)

Postgres 17

Postgres 14

Code coverage* (full report)

  • functions: 30.4% (8274 of 27226 functions)
  • lines: 47.8% (65224 of 136507 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
97a9abd at 2024-12-01T14:19:56.529Z :recycle:

Please sign in to comment.