Skip to content

Commit 0732b48

Browse files
committed
Simplify upgrade scripts with more variables
I find this Go code and resulting Bash easier to read. This also logs more about what is happening without changing the sequence of commands. The script included an unnecessary `|| exit`, so I moved the `set -eu` out of the Bash invocation into the script itself to make that behavior more obvious.
1 parent 04dc9a2 commit 0732b48

File tree

2 files changed

+115
-113
lines changed

2 files changed

+115
-113
lines changed

internal/controller/pgupgrade/jobs.go

Lines changed: 74 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ import (
2121
"github.com/crunchydata/postgres-operator/internal/feature"
2222
"github.com/crunchydata/postgres-operator/internal/initialize"
2323
"github.com/crunchydata/postgres-operator/internal/naming"
24+
"github.com/crunchydata/postgres-operator/internal/shell"
2425
"github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1"
2526
)
2627

27-
// Upgrade job
28-
2928
// pgUpgradeJob returns the ObjectMeta for the pg_upgrade Job utilized to
3029
// upgrade from one major PostgreSQL version to another
3130
func pgUpgradeJob(upgrade *v1beta1.PGUpgrade) metav1.ObjectMeta {
@@ -48,20 +47,24 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
4847
oldVersion := spec.FromPostgresVersion
4948
newVersion := spec.ToPostgresVersion
5049

51-
// if the fetch key command is set for TDE, provide the value during initialization
52-
initdb := `/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"`
50+
var argEncryptionKeyCommand string
5351
if fetchKeyCommand != "" {
54-
initdb += ` --encryption-key-command "` + fetchKeyCommand + `"`
52+
argEncryptionKeyCommand = ` --encryption-key-command=` + shell.QuoteWord(fetchKeyCommand)
5553
}
5654

5755
args := []string{fmt.Sprint(oldVersion), fmt.Sprint(newVersion)}
5856
script := strings.Join([]string{
57+
// Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
58+
`shopt -so errexit nounset`,
59+
5960
`declare -r data_volume='/pgdata' old_version="$1" new_version="$2"`,
60-
`printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"`,
61+
`printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"`,
62+
`section() { printf '\n\n%s\n' "$@"; }`,
6163

62-
// Note: Rather than import the nss_wrapper init container, as we do in
63-
// the main postgres-operator, this job does the required nss_wrapper
64+
// NOTE: Rather than import the nss_wrapper init container, as we do in
65+
// the PostgresCluster controller, this job does the required nss_wrapper
6466
// settings here.
67+
`section 'Step 1 of 7: Ensuring username is postgres...'`,
6568

6669
// Create a copy of the system group definitions, but remove the "postgres"
6770
// group or any group with the current GID. Replace them with our own that
@@ -82,57 +85,54 @@ func upgradeCommand(spec *v1beta1.PGUpgradeSettings, fetchKeyCommand string) []s
8285
`export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD`,
8386
`id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]`,
8487

88+
`section 'Step 2 of 7: Finding data and tools...'`,
89+
8590
// Expect Postgres executables at the Red Hat paths.
86-
`[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]`,
87-
`[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]`,
88-
`[[ -d /pgdata/pg"${old_version}" ]]`,
91+
`old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]]`,
92+
`new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]]`,
93+
`old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]`,
94+
`new_data="${data_volume}/pg${new_version}"`,
95+
96+
// pg_upgrade writes its files in "${new_data}/pg_upgrade_output.d" since PostgreSQL v15.
97+
// Change to a writable working directory to be compatible with PostgreSQL v14 and earlier.
98+
//
99+
// https://www.postgresql.org/docs/release/15#id-1.11.6.20.5.11.3
100+
`cd "${data_volume}"`,
89101

90102
// Below is the pg_upgrade script used to upgrade a PostgresCluster from
91103
// one major version to another. Additional information concerning the
92104
// steps used and command flag specifics can be found in the documentation:
93105
// - https://www.postgresql.org/docs/current/pgupgrade.html
94106

95-
// To begin, we first move to the mounted /pgdata directory and create a
96-
// new version directory which is then initialized with the initdb command.
97-
`cd /pgdata || exit`,
98-
`echo -e "Step 1: Making new pgdata directory...\n"`,
99-
`mkdir /pgdata/pg"${new_version}"`,
100-
`echo -e "Step 2: Initializing new pgdata directory...\n"`,
101-
initdb,
102-
103-
// Before running the upgrade check, which ensures the clusters are compatible,
104-
// proper permissions have to be set on the old pgdata directory and the
105-
// preload library settings must be copied over.
106-
`echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"`,
107-
`chmod 750 /pgdata/pg"${old_version}"`,
108-
`echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"`,
109-
`echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \`,
110-
`/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf`,
111-
112-
// Before the actual upgrade is run, we will run the upgrade --check to
113-
// verify everything before actually changing any data.
114-
`echo -e "Step 5: Running pg_upgrade check...\n"`,
115-
`time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`,
116-
`--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\`,
117-
` --new-datadir /pgdata/pg"${new_version}" --check` + argMethod + argJobs,
118-
119-
// Assuming the check completes successfully, the pg_upgrade command will
120-
// be run that actually prepares the upgraded pgdata directory.
121-
`echo -e "\nStep 6: Running pg_upgrade...\n"`,
122-
`time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \`,
123-
`--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \`,
124-
`--new-datadir /pgdata/pg"${new_version}"` + argMethod + argJobs,
125-
126-
// Since we have cleared the Patroni cluster step by removing the EndPoints, we copy patroni.dynamic.json
127-
// from the old data dir to help retain PostgreSQL parameters you had set before.
128-
// - https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
129-
`echo -e "\nStep 7: Copying patroni.dynamic.json...\n"`,
130-
`cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"`,
131-
132-
`echo -e "\npg_upgrade Job Complete!"`,
107+
`section 'Step 3 of 7: Initializing new data directory...'`,
108+
`PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums` + argEncryptionKeyCommand,
109+
110+
// Read the configured value then quote it; every single-quote U+0027 is replaced by two.
111+
//
112+
// https://www.postgresql.org/docs/current/config-setting.html
113+
// https://www.gnu.org/software/bash/manual/bash.html#ANSI_002dC-Quoting
114+
`section 'Step 4 of 7: Copying shared_preload_libraries parameter...'`,
115+
`value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)`,
116+
`echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"`,
117+
118+
`section 'Step 5 of 7: Checking for potential issues...'`,
119+
`"${new_bin}/pg_upgrade" --check` + argMethod + argJobs + ` \`,
120+
`--old-bindir="${old_bin}" --old-datadir="${old_data}" \`,
121+
`--new-bindir="${new_bin}" --new-datadir="${new_data}"`,
122+
123+
`section 'Step 6 of 7: Performing upgrade...'`,
124+
`(set -x && time "${new_bin}/pg_upgrade"` + argMethod + argJobs + ` \`,
125+
`--old-bindir="${old_bin}" --old-datadir="${old_data}" \`,
126+
`--new-bindir="${new_bin}" --new-datadir="${new_data}")`,
127+
128+
// https://patroni.readthedocs.io/en/latest/existing_data.html#major-upgrade-of-postgresql-version
129+
`section 'Step 7 of 7: Copying Patroni settings...'`,
130+
`(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")`,
131+
132+
`section 'Success!'`,
133133
}, "\n")
134134

135-
return append([]string{"bash", "-ceu", "--", script, "upgrade"}, args...)
135+
return append([]string{"bash", "-c", "--", script, "upgrade"}, args...)
136136
}
137137

138138
// largestWholeCPU returns the maximum CPU request or limit as a non-negative
@@ -238,38 +238,37 @@ func (r *PGUpgradeReconciler) generateUpgradeJob(
238238
// We currently target the `pgdata/pg{old_version}` and `pgdata/pg{old_version}_wal`
239239
// directories for removal.
240240
func removeDataCommand(upgrade *v1beta1.PGUpgrade) []string {
241-
oldVersion := fmt.Sprint(upgrade.Spec.FromPostgresVersion)
241+
oldVersion := upgrade.Spec.FromPostgresVersion
242242

243243
// Before removing the directories (both data and wal), we check that
244244
// the directory is not in use by running `pg_controldata` and making sure
245245
// the server state is "shut down in recovery"
246-
// TODO(benjaminjb): pg_controldata seems pretty stable, but might want to
247-
// experiment with a few more versions.
248-
args := []string{oldVersion}
246+
args := []string{fmt.Sprint(oldVersion)}
249247
script := strings.Join([]string{
250-
`declare -r old_version="$1"`,
251-
`printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"`,
252-
`echo -e "Checking the directory exists and isn't being used...\n"`,
253-
`cd /pgdata || exit`,
254-
// The string `shut down in recovery` is the dbstate that postgres sets from
255-
// at least version 10 to 14 when a replica has been shut down.
256-
// - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_controldata/pg_controldata.c;h=f911f98d946d83f1191abf35239d9b4455c5f52a;hb=HEAD#l59
257-
// Note: `pg_controldata` is actually used by `pg_upgrade` before upgrading
258-
// to make sure that the server in question is shut down as a primary;
259-
// that aligns with our use here, where we're making sure that the server in question
260-
// was shut down as a replica.
261-
// - https://git.postgresql.org/gitweb/?p=postgresql.git;a=blob;f=src/bin/pg_upgrade/controldata.c;h=41b8f69b8cbe4f40e6098ad84c2e8e987e24edaf;hb=HEAD#l122
262-
`if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi`,
263-
`echo -e "Removing old pgdata directory...\n"`,
264-
// When deleting the wal directory, use `realpath` to resolve the symlink from
265-
// the pgdata directory. This is necessary because the wal directory can be
266-
// mounted at different places depending on if an external wal PVC is used,
267-
// i.e. `/pgdata/pg14_wal` vs `/pgwal/pg14_wal`
268-
`rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"`,
269-
`echo -e "Remove Data Job Complete!"`,
248+
// Exit immediately when a pipeline or subshell exits non-zero or when expanding an unset variable.
249+
`shopt -so errexit nounset`,
250+
251+
`declare -r data_volume='/pgdata' old_version="$1"`,
252+
`printf 'Removing PostgreSQL %s data...\n\n' "$@"`,
253+
`delete() (set -x && rm -rf -- "$@")`,
254+
255+
`old_data="${data_volume}/pg${old_version}"`,
256+
`control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")`,
257+
`read -r state <<< "${control##*cluster state:}"`,
258+
259+
// We expect exactly one state for a replica that has been stopped.
260+
//
261+
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_10_0;f=src/bin/pg_controldata/pg_controldata.c#l55
262+
// https://git.postgresql.org/gitweb/?p=postgresql.git;hb=refs/tags/REL_17_0;f=src/bin/pg_controldata/pg_controldata.c#l58
263+
`[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }`,
264+
265+
// "rm" does not follow symbolic links.
266+
// Delete the old data directory after subdirectories that contain versioned data.
267+
`delete "${old_data}/pg_wal/"`,
268+
`delete "${old_data}" && echo 'Success!'`,
270269
}, "\n")
271270

272-
return append([]string{"bash", "-ceu", "--", script, "remove"}, args...)
271+
return append([]string{"bash", "-c", "--", script, "remove"}, args...)
273272
}
274273

275274
// generateRemoveDataJob returns a Job that can remove the data

internal/controller/pgupgrade/jobs_test.go

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func TestUpgradeCommand(t *testing.T) {
8787
spec := &v1beta1.PGUpgradeSettings{Jobs: tt.Spec}
8888
command := upgradeCommand(spec, "")
8989
assert.Assert(t, len(command) > 3)
90-
assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3])
90+
assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3])
9191

9292
script := command[3]
9393
assert.Assert(t, cmp.Contains(script, tt.Args))
@@ -111,7 +111,7 @@ func TestUpgradeCommand(t *testing.T) {
111111
spec := &v1beta1.PGUpgradeSettings{TransferMethod: tt.Spec}
112112
command := upgradeCommand(spec, "")
113113
assert.Assert(t, len(command) > 3)
114-
assert.DeepEqual(t, []string{"bash", "-ceu", "--"}, command[:3])
114+
assert.DeepEqual(t, []string{"bash", "-c", "--"}, command[:3])
115115

116116
script := command[3]
117117
assert.Assert(t, cmp.Contains(script, tt.Args))
@@ -196,11 +196,14 @@ spec:
196196
containers:
197197
- command:
198198
- bash
199-
- -ceu
199+
- -c
200200
- --
201201
- |-
202+
shopt -so errexit nounset
202203
declare -r data_volume='/pgdata' old_version="$1" new_version="$2"
203-
printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n\n' "$@"
204+
printf 'Performing PostgreSQL upgrade from version "%s" to "%s" ...\n' "$@"
205+
section() { printf '\n\n%s\n' "$@"; }
206+
section 'Step 1 of 7: Ensuring username is postgres...'
204207
gid=$(id -G); NSS_WRAPPER_GROUP=$(mktemp)
205208
(sed "/^postgres:x:/ d; /^[^:]*:x:${gid%% *}:/ d" /etc/group
206209
echo "postgres:x:${gid%% *}:") > "${NSS_WRAPPER_GROUP}"
@@ -209,30 +212,28 @@ spec:
209212
echo "postgres:x:${uid}:${gid%% *}::${data_volume}:") > "${NSS_WRAPPER_PASSWD}"
210213
export LD_PRELOAD='libnss_wrapper.so' NSS_WRAPPER_GROUP NSS_WRAPPER_PASSWD
211214
id; [[ "$(id -nu)" == 'postgres' && "$(id -ng)" == 'postgres' ]]
212-
[[ -x /usr/pgsql-"${old_version}"/bin/postgres ]]
213-
[[ -x /usr/pgsql-"${new_version}"/bin/initdb ]]
214-
[[ -d /pgdata/pg"${old_version}" ]]
215-
cd /pgdata || exit
216-
echo -e "Step 1: Making new pgdata directory...\n"
217-
mkdir /pgdata/pg"${new_version}"
218-
echo -e "Step 2: Initializing new pgdata directory...\n"
219-
/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}"
220-
echo -e "\nStep 3: Setting the expected permissions on the old pgdata directory...\n"
221-
chmod 750 /pgdata/pg"${old_version}"
222-
echo -e "Step 4: Copying shared_preload_libraries setting to new postgresql.conf file...\n"
223-
echo "shared_preload_libraries = '$(/usr/pgsql-"""${old_version}"""/bin/postgres -D \
224-
/pgdata/pg"""${old_version}""" -C shared_preload_libraries)'" >> /pgdata/pg"${new_version}"/postgresql.conf
225-
echo -e "Step 5: Running pg_upgrade check...\n"
226-
time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \
227-
--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}"\
228-
--new-datadir /pgdata/pg"${new_version}" --check --link --jobs=1
229-
echo -e "\nStep 6: Running pg_upgrade...\n"
230-
time /usr/pgsql-"${new_version}"/bin/pg_upgrade --old-bindir /usr/pgsql-"${old_version}"/bin \
231-
--new-bindir /usr/pgsql-"${new_version}"/bin --old-datadir /pgdata/pg"${old_version}" \
232-
--new-datadir /pgdata/pg"${new_version}" --link --jobs=1
233-
echo -e "\nStep 7: Copying patroni.dynamic.json...\n"
234-
cp /pgdata/pg"${old_version}"/patroni.dynamic.json /pgdata/pg"${new_version}"
235-
echo -e "\npg_upgrade Job Complete!"
215+
section 'Step 2 of 7: Finding data and tools...'
216+
old_bin="/usr/pgsql-${old_version}/bin" && [[ -x "${old_bin}/postgres" ]]
217+
new_bin="/usr/pgsql-${new_version}/bin" && [[ -x "${new_bin}/initdb" ]]
218+
old_data="${data_volume}/pg${old_version}" && [[ -d "${old_data}" ]]
219+
new_data="${data_volume}/pg${new_version}"
220+
cd "${data_volume}"
221+
section 'Step 3 of 7: Initializing new data directory...'
222+
PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums
223+
section 'Step 4 of 7: Copying shared_preload_libraries parameter...'
224+
value=$(LC_ALL=C PGDATA="${old_data}" "${old_bin}/postgres" -C shared_preload_libraries)
225+
echo >> "${new_data}/postgresql.conf" "shared_preload_libraries = '${value//$'\''/$'\'\''}'"
226+
section 'Step 5 of 7: Checking for potential issues...'
227+
"${new_bin}/pg_upgrade" --check --link --jobs=1 \
228+
--old-bindir="${old_bin}" --old-datadir="${old_data}" \
229+
--new-bindir="${new_bin}" --new-datadir="${new_data}"
230+
section 'Step 6 of 7: Performing upgrade...'
231+
(set -x && time "${new_bin}/pg_upgrade" --link --jobs=1 \
232+
--old-bindir="${old_bin}" --old-datadir="${old_data}" \
233+
--new-bindir="${new_bin}" --new-datadir="${new_data}")
234+
section 'Step 7 of 7: Copying Patroni settings...'
235+
(set -x && cp "${old_data}/patroni.dynamic.json" "${new_data}")
236+
section 'Success!'
236237
- upgrade
237238
- "19"
238239
- "25"
@@ -267,7 +268,7 @@ status: {}
267268

268269
tdeJob := reconciler.generateUpgradeJob(ctx, upgrade, startup, "echo testKey")
269270
assert.Assert(t, cmp.MarshalContains(tdeJob,
270-
`/usr/pgsql-"${new_version}"/bin/initdb --allow-group-access -k -D /pgdata/pg"${new_version}" --encryption-key-command "echo testKey"`))
271+
`PGDATA="${new_data}" "${new_bin}/initdb" --allow-group-access --data-checksums --encryption-key-command='echo testKey'`))
271272
}
272273

273274
func TestGenerateRemoveDataJob(t *testing.T) {
@@ -343,17 +344,19 @@ spec:
343344
containers:
344345
- command:
345346
- bash
346-
- -ceu
347+
- -c
347348
- --
348349
- |-
349-
declare -r old_version="$1"
350-
printf 'Removing PostgreSQL data dir for pg%s...\n\n' "$@"
351-
echo -e "Checking the directory exists and isn't being used...\n"
352-
cd /pgdata || exit
353-
if [ "$(/usr/pgsql-"${old_version}"/bin/pg_controldata /pgdata/pg"${old_version}" | grep -c "shut down in recovery")" -ne 1 ]; then echo -e "Directory in use, cannot remove..."; exit 1; fi
354-
echo -e "Removing old pgdata directory...\n"
355-
rm -rf /pgdata/pg"${old_version}" "$(realpath /pgdata/pg${old_version}/pg_wal)"
356-
echo -e "Remove Data Job Complete!"
350+
shopt -so errexit nounset
351+
declare -r data_volume='/pgdata' old_version="$1"
352+
printf 'Removing PostgreSQL %s data...\n\n' "$@"
353+
delete() (set -x && rm -rf -- "$@")
354+
old_data="${data_volume}/pg${old_version}"
355+
control=$(LC_ALL=C /usr/pgsql-${old_version}/bin/pg_controldata "${old_data}")
356+
read -r state <<< "${control##*cluster state:}"
357+
[[ "${state}" == 'shut down in recovery' ]] || { printf >&2 'Unexpected state! %q\n' "${state}"; exit 1; }
358+
delete "${old_data}/pg_wal/"
359+
delete "${old_data}" && echo 'Success!'
357360
- remove
358361
- "19"
359362
image: img4

0 commit comments

Comments
 (0)