Add GUC controlling whether to pause recovery if some critical GUCs a…

…t replica have smaller value than on primary (#9057) ## Problem See #9023 ## Summary of changes Ass GUC `recovery_pause_on_misconfig` allowing not to pause in case of replica and primary configuration mismatch See neondatabase/postgres#501 See neondatabase/postgres#502 See neondatabase/postgres#503 See neondatabase/postgres#504 ## Checklist before requesting a review - [ ] I have performed a self-review of my code. - [ ] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ## Checklist before merging - [ ] Do not forget to reformat commit message to not include the above checklist --------- Co-authored-by: Konstantin Knizhnik <[email protected]> Co-authored-by: Heikki Linnakangas <[email protected]>
neondatabase · Dec 1, 2024 · 97a9abd · 97a9abd · github-actions · Dec 1, 2024
1 parent 4abc8e5
commit 97a9abd
Show file tree

Hide file tree

Showing 7 changed files with 241 additions and 9 deletions.
diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c
@@ -15,6 +15,9 @@
 #include "access/subtrans.h"
 #include "access/twophase.h"
 #include "access/xlog.h"
+#if PG_MAJORVERSION_NUM >= 15
+#include "access/xlogrecovery.h"
+#endif
 #include "replication/logical.h"
 #include "replication/slot.h"
 #include "replication/walsender.h"
@@ -432,6 +435,16 @@ _PG_init(void)
 	restore_running_xacts_callback = RestoreRunningXactsFromClog;
 
 
+	DefineCustomBoolVariable(
+							"neon.allow_replica_misconfig",
+							"Allow replica startup when some critical GUCs have smaller value than on primary node",
+							NULL,
+							&allowReplicaMisconfig,
+							true,
+							PGC_POSTMASTER,
+							0,
+							NULL, NULL, NULL);
+
 	DefineCustomEnumVariable(
 							"neon.running_xacts_overflow_policy",
 							"Action performed on snapshot overflow when restoring runnings xacts from CLOG",

diff --git a/test_runner/regress/test_physical_replication.py b/test_runner/regress/test_physical_replication.py
@@ -4,6 +4,10 @@
 import time
 from typing import TYPE_CHECKING
 
+import pytest
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import wait_replica_caughtup
+
 if TYPE_CHECKING:
     from fixtures.neon_fixtures import NeonEnv
 
@@ -19,8 +23,8 @@ def test_physical_replication(neon_simple_env: NeonEnv):
                 p_cur.execute(
                     "CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))"
                 )
-        time.sleep(1)
         with env.endpoints.new_replica_start(origin=primary, endpoint_id="secondary") as secondary:
+            wait_replica_caughtup(primary, secondary)
             with primary.connect() as p_con:
                 with p_con.cursor() as p_cur:
                     with secondary.connect() as s_con:
@@ -42,3 +46,218 @@ def test_physical_replication(neon_simple_env: NeonEnv):
                                 s_cur.execute(
                                     "select * from t where pk=%s", (random.randrange(1, 2 * pk),)
                                 )
+
+
+def test_physical_replication_config_mismatch_max_connections(neon_simple_env: NeonEnv):
+    """
+    Test for primary and replica with different configuration settings (max_connections).
+    PostgreSQL enforces that settings that affect how many transactions can be open at the same time
+    have values equal to or higher in a hot standby replica than in the primary. If they don't, the replica refuses
+    to start up. If the settings are changed in the primary, it emits a WAL record with the new settings, and
+    when the replica sees that record it pauses the replay.
+
+    PostgreSQL enforces this to ensure that the replica can hold all the XIDs in the so-called
+    "known-assigned XIDs" array, which is a fixed size array that needs to be allocated
+    upfront and server startup. That's pretty pessimistic, though; usually you can get
+    away with smaller settings, because we allocate space for 64 subtransactions per
+    transaction too. If you get unlucky and you run out of space, WAL redo dies with
+    "ERROR: too many KnownAssignedXids". It's better to take the chances than refuse
+    to start up, especially in Neon: if the WAL redo dies, the server is restarted, which is
+    no worse than refusing to start up in the first place. Furthermore, the control plane
+    tries to ensure that on restart, the settings are set high enough, so most likely it will
+    work after restart. Because of that, we have patched Postgres to disable to checks when
+    the `recovery_pause_on_misconfig` setting is set to `false` (which is the default on neon).
+
+    This test tests all those cases of running out of space in known-assigned XIDs array that
+    we can hit with `recovery_pause_on_misconfig=false`, which are unreachable in unpatched
+    Postgres.
+    There's a similar check for `max_locks_per_transactions` too, which is related to running out
+    of space in the lock manager rather than known-assigned XIDs. Similar story with that, although
+    running out of space in the lock manager is possible in unmodified Postgres too. Enforcing the
+    check for `max_locks_per_transactions` ensures  that you don't run out of space in the lock manager
+    when there are no read-only queries holding locks in the replica, but you can still run out if you have
+    those.
+    """
+    env = neon_simple_env
+    with env.endpoints.create_start(
+        branch_name="main",
+        endpoint_id="primary",
+    ) as primary:
+        with primary.connect() as p_con:
+            with p_con.cursor() as p_cur:
+                p_cur.execute(
+                    "CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))"
+                )
+        with env.endpoints.new_replica_start(
+            origin=primary,
+            endpoint_id="secondary",
+            config_lines=["max_connections=5"],
+        ) as secondary:
+            wait_replica_caughtup(primary, secondary)
+            with secondary.connect() as s_con:
+                with s_con.cursor() as s_cur:
+                    cursors = []
+                    for i in range(10):
+                        p_con = primary.connect()
+                        p_cur = p_con.cursor()
+                        p_cur.execute("begin")
+                        p_cur.execute("insert into t (pk) values (%s)", (i,))
+                        cursors.append(p_cur)
+
+                    for p_cur in cursors:
+                        p_cur.execute("commit")
+
+                    wait_replica_caughtup(primary, secondary)
+                    s_cur.execute("select count(*) from t")
+                    assert s_cur.fetchall()[0][0] == 10
+
+
+def test_physical_replication_config_mismatch_max_prepared(neon_simple_env: NeonEnv):
+    """
+    Test for primary and replica with different configuration settings (max_prepared_transactions).
+    If number of transactions at primary exceeds its limit at replica then WAL replay is terminated.
+    """
+    env = neon_simple_env
+    primary = env.endpoints.create_start(
+        branch_name="main",
+        endpoint_id="primary",
+        config_lines=["max_prepared_transactions=10"],
+    )
+    p_con = primary.connect()
+    p_cur = p_con.cursor()
+    p_cur.execute("CREATE TABLE t(pk bigint primary key, payload text default repeat('?',200))")
+
+    secondary = env.endpoints.new_replica_start(
+        origin=primary,
+        endpoint_id="secondary",
+        config_lines=["max_prepared_transactions=5"],
+    )
+    wait_replica_caughtup(primary, secondary)
+
+    s_con = secondary.connect()
+    s_cur = s_con.cursor()
+    cursors = []
+    for i in range(10):
+        p_con = primary.connect()
+        p_cur = p_con.cursor()
+        p_cur.execute("begin")
+        p_cur.execute("insert into t (pk) values (%s)", (i,))
+        p_cur.execute(f"prepare transaction 't{i}'")
+        cursors.append(p_cur)
+
+    for i in range(10):
+        cursors[i].execute(f"commit prepared 't{i}'")
+
+    time.sleep(5)
+    with pytest.raises(Exception) as e:
+        s_cur.execute("select count(*) from t")
+        assert s_cur.fetchall()[0][0] == 10
+        secondary.stop()
+
+    log.info(f"Replica crashed with {e}")
+    assert secondary.log_contains("maximum number of prepared transactions reached")
+
+
+def connect(ep):
+    max_reconnect_attempts = 10
+    for _ in range(max_reconnect_attempts):
+        try:
+            return ep.connect()
+        except Exception as e:
+            log.info(f"Failed to connect with primary: {e}")
+            time.sleep(1)
+
+
+def test_physical_replication_config_mismatch_too_many_known_xids(neon_simple_env: NeonEnv):
+    """
+    Test for primary and replica with different configuration settings (max_connections).
+    In this case large difference in this setting and larger number of concurrent transactions at primary
+    # cause too many known xids error  at replica.
+    """
+    env = neon_simple_env
+    primary = env.endpoints.create_start(
+        branch_name="main",
+        endpoint_id="primary",
+        config_lines=[
+            "max_connections=1000",
+            "shared_buffers=128MB",  # prevent "no unpinned buffers available" error
+        ],
+    )
+    secondary = env.endpoints.new_replica_start(
+        origin=primary,
+        endpoint_id="secondary",
+        config_lines=[
+            "max_connections=2",
+            "autovacuum_max_workers=1",
+            "max_worker_processes=5",
+            "max_wal_senders=1",
+            "superuser_reserved_connections=0",
+        ],
+    )
+
+    p_con = primary.connect()
+    p_cur = p_con.cursor()
+    p_cur.execute("CREATE TABLE t(x integer)")
+
+    n_connections = 990
+    cursors = []
+    for i in range(n_connections):
+        p_con = connect(primary)
+        p_cur = p_con.cursor()
+        p_cur.execute("begin")
+        p_cur.execute(f"insert into t values({i})")
+        cursors.append(p_cur)
+
+    for cur in cursors:
+        cur.execute("commit")
+
+    time.sleep(5)
+    with pytest.raises(Exception) as e:
+        s_con = secondary.connect()
+        s_cur = s_con.cursor()
+        s_cur.execute("select count(*) from t")
+        assert s_cur.fetchall()[0][0] == n_connections
+        secondary.stop()
+
+    log.info(f"Replica crashed with {e}")
+    assert secondary.log_contains("too many KnownAssignedXids")
+
+
+def test_physical_replication_config_mismatch_max_locks_per_transaction(neon_simple_env: NeonEnv):
+    """
+    Test for primary and replica with different configuration settings (max_locks_per_transaction).
+    In  conjunction with different number of max_connections at primary and standby it can cause "out of shared memory"
+    error if the primary obtains more AccessExclusiveLocks than the standby can hold.
+    """
+    env = neon_simple_env
+    primary = env.endpoints.create_start(
+        branch_name="main",
+        endpoint_id="primary",
+        config_lines=[
+            "max_locks_per_transaction = 100",
+        ],
+    )
+    secondary = env.endpoints.new_replica_start(
+        origin=primary,
+        endpoint_id="secondary",
+        config_lines=[
+            "max_connections=10",
+            "max_locks_per_transaction = 10",
+        ],
+    )
+
+    n_tables = 1000
+
+    p_con = primary.connect()
+    p_cur = p_con.cursor()
+    p_cur.execute("begin")
+    for i in range(n_tables):
+        p_cur.execute(f"CREATE TABLE t_{i}(x integer)")
+    p_cur.execute("commit")
+
+    with pytest.raises(Exception) as e:
+        wait_replica_caughtup(primary, secondary)
+        secondary.stop()
+
+    log.info(f"Replica crashed with {e}")
+    assert secondary.log_contains("You might need to increase")
diff --git a/vendor/postgres-v14 b/vendor/postgres-v14
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
diff --git a/vendor/postgres-v17 b/vendor/postgres-v17
diff --git a/vendor/revisions.json b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
   "v17": [
     "17.2",
-    "faebe5e5aff5687908504453623778f8515529db"
+    "a10d95be67265e0f10a422ba0457f5a7af01de71"
   ],
   "v16": [
     "16.6",
-    "13e9e3539419003e79bd9aa29e1bc44f3fd555dd"
+    "dff6615a8e48a10bb17a03fa3c00635f1ace7a92"
   ],
   "v15": [
     "15.10",
-    "d929b9a8b9f32f6fe5a0eac3e6e963f0e44e27e6"
+    "972e325e62b455957adbbdd8580e31275bb5b8c9"
   ],
   "v14": [
     "14.15",
-    "c1989c934d46e04e78b3c496c8a34bcd40ddceeb"
+    "373f9decad933d2d46f321231032ae8b0da81acd"
   ]
 }
+18 −1		src/backend/access/transam/xlog.c
+1 −0		src/include/access/xlog.h
+18 −1		src/backend/access/transam/xlogrecovery.c
+1 −0		src/include/access/xlogrecovery.h