@@ -21,11 +21,10 @@ import (
2121 "github.com/crunchydata/postgres-operator/internal/feature"
2222 "github.com/crunchydata/postgres-operator/internal/initialize"
2323 "github.com/crunchydata/postgres-operator/internal/naming"
24+ "github.com/crunchydata/postgres-operator/internal/shell"
2425 "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
2526)
2627
27- // Upgrade job
28-
2928// pgUpgradeJob returns the ObjectMeta for the pg_upgrade Job utilized to
3029// upgrade from one major PostgreSQL version to another
3130func pgUpgradeJob (upgrade * v1beta1.PGUpgrade ) metav1.ObjectMeta {
@@ -48,20 +47,24 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
4847 oldVersion := spec .FromPostgresVersion
4948 newVersion := spec .ToPostgresVersion
5049
51- // if the fetch key command is set for TDE, provide the value during initialization
52- initdb := `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"`
50+ var argEncryptionKeyCommand string
5351 if fetchKeyCommand != "" {
54- initdb + = ` --encryption-key-command " ` + fetchKeyCommand + `"`
52+ argEncryptionKeyCommand = ` --encryption-key-command= ` + shell . QuoteWord ( fetchKeyCommand )
5553 }
5654
5755 args := []string {fmt .Sprint (oldVersion ), fmt .Sprint (newVersion )}
5856 script := strings .Join ([]string {
57+ // Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
58+ `shopt -so errexit nounset` ,
59+
5960 `declare -r data_volume='/pgdata' old_version="$1" new_version="$2"` ,
60- `printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"` ,
61+ `printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"` ,
62+ `section() { printf '\n\n%s\n' "$@"; }` ,
6163
62- // Note : Rather than import the nss_wrapper init container, as we do in
63- // the main postgres-operator , this job does the required nss_wrapper
64+ // NOTE : Rather than import the nss_wrapper init container, as we do in
65+ // the PostgresCluster controller , this job does the required nss_wrapper
6466 // settings here.
67+ `section 'Step 1 of 7: Ensuring username is postgres...'` ,
6568
6669 // Create a copy of the system group definitions, but remove the "postgres"
6770 // group or any group with the current GID. Replace them with our own that
@@ -82,57 +85,54 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
8285 `export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD` ,
8386 `id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]` ,
8487
88+ `section 'Step 2 of 7: Finding data and tools...'` ,
89+
8590 // Expect Postgres executables at the Red Hat paths.
86- `[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]` ,
87- `[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]` ,
88- `[[ -d /pgdata/pg"${old_version}" ]]` ,
91+ `old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]]` ,
92+ `new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]]` ,
93+ `old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]` ,
94+ `new_data="${data_volume}/pg${new_version}"` ,
95+
96+ // pg_upgrade writes its files in "${new_data}/pg_upgrade_output.d" since PostgreSQL v15.
97+ // Change to a writable working directory to be compatible with PostgreSQL v14 and earlier.
98+ //
99+ // https://www.postgresql.org/docs/release/15#id-1.11.6.20.5.11.3
100+ `cd "${data_volume}"` ,
89101
90102 // Below is the pg_upgrade script used to upgrade a PostgresCluster from
91103 // one major version to another. Additional information concerning the
92104 // steps used and command flag specifics can be found in the documentation:
93105 // - https://www.postgresql.org/docs/current/pgupgrade.html
94106
95- // To begin, we first move to the mounted /pgdata directory and create a
96- // new version directory which is then initialized with the initdb command.
97- `cd /pgdata || exit` ,
98- `echo -e "Step 1: Making new pgdata directory...\n"` ,
99- `mkdir /pgdata/pg"${new_version}"` ,
100- `echo -e "Step 2: Initializing new pgdata directory...\n"` ,
101- initdb ,
102-
103- // Before running the upgrade check, which ensures the clusters are compatible,
104- // proper permissions have to be set on the old pgdata directory and the
105- // preload library settings must be copied over.
106- `echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"` ,
107- `chmod 750 /pgdata/pg"${old_version}"` ,
108- `echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"` ,
109- `echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \` ,
110- `/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf` ,
111-
112- // Before the actual upgrade is run, we will run the upgrade --check to
113- // verify everything before actually changing any data.
114- `echo -e "Step 5: Running pg_upgrade check...\n"` ,
115- `time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \` ,
116- `--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\` ,
117- ` --new-datadir /pgdata/pg"${new_version}" --check` + argMethod + argJobs ,
118-
119- // Assuming the check completes successfully, the pg_upgrade command will
120- // be run that actually prepares the upgraded pgdata directory.
121- `echo -e "\nStep 6: Running pg_upgrade...\n"` ,
122- `time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \` ,
123- `--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \` ,
124- `--new-datadir /pgdata/pg"${new_version}"` + argMethod + argJobs ,
125-
126- // Since we have cleared the Patroni cluster step by removing the EndPoints, we copy patroni.dynamic.json
127- // from the old data dir to help retain PostgreSQL parameters you had set before.
128- // - https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
129- `echo -e "\nStep 7: Copying patroni.dynamic.json...\n"` ,
130- `cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"` ,
131-
132- `echo -e "\npg_upgrade Job Complete!"` ,
107+ `section 'Step 3 of 7: Initializing new data directory...'` ,
108+ `PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums` + argEncryptionKeyCommand ,
109+
110+ // Read the configured value then quote it; every single-quote U+0027 is replaced by two.
111+ //
112+ // https://www.postgresql.org/docs/current/config-setting.html
113+ // https://www.gnu.org/software/bash/manual/bash.html#ANSI_002dC-Quoting
114+ `section 'Step 4 of 7: Copying shared_preload_libraries parameter...'` ,
115+ `value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)` ,
116+ `echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"` ,
117+
118+ `section 'Step 5 of 7: Checking for potential issues...'` ,
119+ `"${new_bin}/pg_upgrade" --check` + argMethod + argJobs + ` \` ,
120+ `--old-bindir="${old_bin}" --old-datadir="${old_data}" \` ,
121+ `--new-bindir="${new_bin}" --new-datadir="${new_data}"` ,
122+
123+ `section 'Step 6 of 7: Performing upgrade...'` ,
124+ `(set -x && time "${new_bin}/pg_upgrade"` + argMethod + argJobs + ` \` ,
125+ `--old-bindir="${old_bin}" --old-datadir="${old_data}" \` ,
126+ `--new-bindir="${new_bin}" --new-datadir="${new_data}")` ,
127+
128+ // https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
129+ `section 'Step 7 of 7: Copying Patroni settings...'` ,
130+ `(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")` ,
131+
132+ `section 'Success!'` ,
133133 }, "\n " )
134134
135- return append ([]string {"bash" , "-ceu " , "--" , script , "upgrade" }, args ... )
135+ return append ([]string {"bash" , "-c " , "--" , script , "upgrade" }, args ... )
136136}
137137
138138// largestWholeCPU returns the maximum CPU request or limit as a non-negative
@@ -238,38 +238,37 @@ func (r *PGUpgradeReconciler) generateUpgradeJob(
238238// We currently target the `pgdata/pg{old_version}` and `pgdata/pg{old_version}_wal`
239239// directories for removal.
240240func removeDataCommand (upgrade * v1beta1.PGUpgrade ) []string {
241- oldVersion := fmt . Sprint ( upgrade .Spec .FromPostgresVersion )
241+ oldVersion := upgrade .Spec .FromPostgresVersion
242242
243243 // Before removing the directories (both data and wal), we check that
244244 // the directory is not in use by running `pg_controldata` and making sure
245245 // the server state is "shut down in recovery"
246- // TODO(benjaminjb): pg_controldata seems pretty stable, but might want to
247- // experiment with a few more versions.
248- args := []string {oldVersion }
246+ args := []string {fmt .Sprint (oldVersion )}
249247 script := strings .Join ([]string {
250- `declare -r old_version="$1"` ,
251- `printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"` ,
252- `echo -e "Checking the directory exists and isn't being used...\n"` ,
253- `cd /pgdata || exit` ,
254- // The string `shut down in recovery` is the dbstate that postgres sets from
255- // at least version 10 to 14 when a replica has been shut down.
256- // - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_controldata/pg_controldata.c;h=f911f98d946d83f1191abf35239d9b4455c5f52a;hb=HEAD#l59
257- // Note: `pg_controldata` is actually used by `pg_upgrade` before upgrading
258- // to make sure that the server in question is shut down as a primary;
259- // that aligns with our use here, where we're making sure that the server in question
260- // was shut down as a replica.
261- // - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_upgrade/controldata.c;h=41b8f69b8cbe4f40e6098ad84c2e8e987e24edaf;hb=HEAD#l122
262- `if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi` ,
263- `echo -e "Removing old pgdata directory...\n"` ,
264- // When deleting the wal directory, use `realpath` to resolve the symlink from
265- // the pgdata directory. This is necessary because the wal directory can be
266- // mounted at different places depending on if an external wal PVC is used,
267- // i.e. `/pgdata/pg14_wal` vs `/pgwal/pg14_wal`
268- `rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"` ,
269- `echo -e "Remove Data Job Complete!"` ,
248+ // Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
249+ `shopt -so errexit nounset` ,
250+
251+ `declare -r data_volume='/pgdata' old_version="$1"` ,
252+ `printf 'Removing PostgreSQL %s data...\n\n' "$@"` ,
253+ `delete() (set -x && rm -rf -- "$@")` ,
254+
255+ `old_data="${data_volume}/pg${old_version}"` ,
256+ `control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")` ,
257+ `read -r state <<< "${control##*cluster state:}"` ,
258+
259+ // We expect exactly one state for a replica that has been stopped.
260+ //
261+ // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_controldata/pg_controldata.c#l55
262+ // https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/bin/pg_controldata/pg_controldata.c#l58
263+ `[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }` ,
264+
265+ // "rm" does not follow symbolic links.
266+ // Delete the old data directory after subdirectories that contain versioned data.
267+ `delete "${old_data}/pg_wal/"` ,
268+ `delete "${old_data}" && echo 'Success!'` ,
270269 }, "\n " )
271270
272- return append ([]string {"bash" , "-ceu " , "--" , script , "remove" }, args ... )
271+ return append ([]string {"bash" , "-c " , "--" , script , "remove" }, args ... )
273272}
274273
275274// generateRemoveDataJob returns a Job that can remove the data
0 commit comments