From d39423807d5cf8ef63c75c3bcf806f48fe794599 Mon Sep 17 00:00:00 2001
From: Rohan McGovern <rmcgover@redhat.com>
Date: Tue, 3 Oct 2023 09:43:10 +1000
Subject: [PATCH] Use --exodus-commit=phase1 in pulp hook [RHELDST-20490]

When this hook is active, the control flow is:

1. pubtools-exodus: create publish
2. rhsm-pulp: add content to publish
3. pubtools-exodus: commit publish

Problem: step 2 might succeed from Pulp's point of view, but then fail
at step 3 leaving the content not actually published to the CDN storage.
As Pulp believes the publishes were successful, it would then skip
publish of certain files at the next attempt, leading to missing
content.

The newly introduced concept of a phase1 commit should fix this by
allowing step 2 to request that all "phase 1 content" (e.g. RPMs, but
not repodata) are flushed to the CDN storage before proceeding. Go ahead
and start using that via the new related exodus-rsync argument.

While it would appear to make sense to do this at *every* publish, an
env var is added as an escape hatch to go back to the old behavior, just
in case something doesn't work as expected or upgrades are performed in
the wrong order.
---
 pubtools/exodus/_hooks/pulp.py  | 15 +++++++++++++--
 tests/test_exodus_pulp_hooks.py | 24 ++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/pubtools/exodus/_hooks/pulp.py b/pubtools/exodus/_hooks/pulp.py
index 70f6ad8..b48f6a4 100644
--- a/pubtools/exodus/_hooks/pulp.py
+++ b/pubtools/exodus/_hooks/pulp.py
@@ -24,8 +24,8 @@ def __init__(self):
     def pulp_repository_pre_publish(self, repository, options):
         """Invoked as the first step in publishing a Pulp repository.
 
-        This implementation adds to each config the --exodus-publish argument,
-        attaching the repository to an exodus-gw publish.
+        This implementation adds to each config the necessary arguments
+        to attach this repository's publish task to an exodus-gw publish.
 
         Args:
             repository (:class:`~pubtools.pulplib.Repository`):
@@ -48,6 +48,17 @@ def pulp_repository_pre_publish(self, repository, options):
             list(options.rsync_extra_args) if options.rsync_extra_args else []
         )
         args.append("--exodus-publish=%s" % self.publish["id"])
+
+        # 2023-10: by default, the pulp hook should always use phase1 commit.
+        # But since the functionality is relatively new, this is provided as an
+        # escape hatch in case it would need to be disabled in some environments
+        # for unanticipated reasons.
+        #
+        # Consider deleting this conditional once the functionality is proven
+        # in production.
+        if os.getenv("EXODUS_PULP_HOOK_PHASE1_COMMIT", "1") == "1":
+            args.append("--exodus-commit=phase1")
+
         return attr.evolve(options, rsync_extra_args=args)
 
     @property
diff --git a/tests/test_exodus_pulp_hooks.py b/tests/test_exodus_pulp_hooks.py
index 8ff759d..0ba13ab 100644
--- a/tests/test_exodus_pulp_hooks.py
+++ b/tests/test_exodus_pulp_hooks.py
@@ -34,6 +34,7 @@ def test_exodus_pulp_typical(
             rsync_extra_args=[
                 "--existing-arg",
                 "--exodus-publish=497f6eca-6276-4993-bfeb-53cbbbba6f08",
+                "--exodus-commit=phase1",
             ],
         )
 
@@ -55,6 +56,29 @@ def test_exodus_pulp_typical(
         )
 
 
+def test_exodus_pulp_phase1_disabled(
+    successful_gw_task, monkeypatch: pytest.MonkeyPatch
+):
+    monkeypatch.setenv("EXODUS_PULP_HOOK_PHASE1_COMMIT", "0")
+
+    with task_context():
+        hook_rets = pm.hook.pulp_repository_pre_publish(
+            repository=None,
+            options=FakePublishOptions(rsync_extra_args=["--existing-arg"]),
+        )
+        hook_rets = [ret for ret in hook_rets if ret is not None]
+
+        # The pre-publish hook should've returned options with exodus-publish
+        # arg appended to existing rsync_extra_args, but this time it should
+        # NOT have added exodus-commit due to the above env var.
+        assert hook_rets[0] == FakePublishOptions(
+            rsync_extra_args=[
+                "--existing-arg",
+                "--exodus-publish=497f6eca-6276-4993-bfeb-53cbbbba6f08",
+            ],
+        )
+
+
 def test_exodus_pulp_no_publish(patch_env_vars, caplog):
     caplog.set_level(logging.DEBUG, "pubtools-exodus")