Skip to content

Commit

Permalink
Fix diff-file being written unconditionally.
Browse files Browse the repository at this point in the history
Restore original behavior where a diff file is only written if
the 'mirror.diff-file' config option is set to a non-empty value.
  • Loading branch information
flyinghyrax committed Mar 30, 2024
1 parent d6fbfe3 commit 1c833a2
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 35 deletions.
87 changes: 53 additions & 34 deletions src/bandersnatch/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
import os
import sys
import time
from collections.abc import Awaitable
from collections.abc import Awaitable, Callable
from json import dump
from pathlib import Path, WindowsPath
from threading import RLock
from typing import Any
from typing import Any, TypeVar
from urllib.parse import unquote, urlparse

from filelock import Timeout
Expand All @@ -22,7 +22,7 @@
from .master import Master
from .package import Package
from .simple import SimpleAPI, SimpleFormat, SimpleFormats
from .storage import storage_backend_plugins
from .storage import Storage, storage_backend_plugins

LOG_PLUGINS = True
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -188,7 +188,6 @@ def __init__(
digest_name: str | None = None,
root_uri: str | None = None,
keep_index_versions: int = 0,
diff_file: Path | str | None = None,
diff_append_epoch: bool = False,
diff_full_path: Path | str | None = None,
flock_timeout: int = 1,
Expand Down Expand Up @@ -230,7 +229,6 @@ def __init__(
# PyPI mirror, which requires serving packages from
# https://files.pythonhosted.org
self.root_uri = root_uri or ""
self.diff_file = diff_file
self.diff_append_epoch = diff_append_epoch
self.diff_full_path = diff_full_path
self.keep_index_versions = keep_index_versions
Expand Down Expand Up @@ -913,6 +911,44 @@ async def download_file(
return path


_T = TypeVar("_T")


async def _run_in_storage_executor(
storage_plugin: Storage, func: Callable[..., _T], *args: Any
) -> _T:
return await storage_plugin.loop.run_in_executor(
storage_plugin.executor, func, *args
)


async def _setup_diff_file(
storage_plugin: Storage, configured_path: str, append_epoch: bool
) -> Path:
# use the storage backend to convert an abstract path to a concrete one
concrete_path = storage_plugin.PATH_BACKEND(configured_path)

# create parent directories if needed
await _run_in_storage_executor(
storage_plugin, lambda: concrete_path.parent.mkdir(exist_ok=True, parents=True)
)

# adjust the file/directory name if timestamps are enabled
if append_epoch:
epoch = int(time.time())
concrete_path = concrete_path.with_name(f"{concrete_path.name}-{epoch}")

Check warning on line 939 in src/bandersnatch/mirror.py

View check run for this annotation

Codecov / codecov/patch

src/bandersnatch/mirror.py#L938-L939

Added lines #L938 - L939 were not covered by tests

# if the path is directory, adjust to write to a file inside it
if await _run_in_storage_executor(storage_plugin, concrete_path.is_dir):
concrete_path = concrete_path / "mirrored-files"

Check warning on line 943 in src/bandersnatch/mirror.py

View check run for this annotation

Codecov / codecov/patch

src/bandersnatch/mirror.py#L943

Added line #L943 was not covered by tests

# if there is an existing file there then delete it
if await _run_in_storage_executor(storage_plugin, concrete_path.is_file):
concrete_path.unlink()

Check warning on line 947 in src/bandersnatch/mirror.py

View check run for this annotation

Codecov / codecov/patch

src/bandersnatch/mirror.py#L947

Added line #L947 was not covered by tests

return concrete_path


async def mirror(
config: configparser.ConfigParser,
specific_packages: list[str] | None = None,
Expand All @@ -928,28 +964,13 @@ async def mirror(
)
)

diff_file = storage_plugin.PATH_BACKEND(config_values.diff_file_path)
diff_full_path: Path | str
if diff_file:
diff_file.parent.mkdir(exist_ok=True, parents=True)
if config_values.diff_append_epoch:
diff_full_path = diff_file.with_name(f"{diff_file.name}-{int(time.time())}")
else:
diff_full_path = diff_file
else:
diff_full_path = ""

if diff_full_path:
if isinstance(diff_full_path, str):
diff_full_path = storage_plugin.PATH_BACKEND(diff_full_path)
if await storage_plugin.loop.run_in_executor(
storage_plugin.executor, diff_full_path.is_file
):
diff_full_path.unlink()
elif await storage_plugin.loop.run_in_executor(
storage_plugin.executor, diff_full_path.is_dir
):
diff_full_path = diff_full_path / "mirrored-files"
diff_full_path: Path | None = None
if config_values.diff_file_path:
diff_full_path = await _setup_diff_file(
storage_plugin,
config_values.diff_file_path,
config_values.diff_append_epoch,
)

mirror_url = config.get("mirror", "master")
timeout = config.getfloat("mirror", "timeout")
Expand All @@ -975,9 +996,8 @@ async def mirror(
keep_index_versions=config.getint(
"mirror", "keep_index_versions", fallback=0
),
diff_file=diff_file,
diff_append_epoch=config_values.diff_append_epoch,
diff_full_path=diff_full_path if diff_full_path else None,
diff_full_path=diff_full_path,
cleanup=config_values.cleanup,
release_files_save=config_values.release_files_save,
download_mirror=config_values.download_mirror,
Expand All @@ -997,14 +1017,13 @@ async def mirror(
loggable_changes = [str(chg) for chg in package_changes]
logger.debug(f"{package_name} added: {loggable_changes}")

if mirror.diff_full_path:
logger.info(f"Writing diff file to {mirror.diff_full_path}")
if diff_full_path:
logger.info(f"Writing diff file to '{diff_full_path}'")
diff_text = f"{os.linesep}".join(
[str(chg.absolute()) for chg in mirror.diff_file_list]
)
diff_file = mirror.storage_backend.PATH_BACKEND(mirror.diff_full_path)
await storage_plugin.loop.run_in_executor(
storage_plugin.executor, diff_file.write_text, diff_text
await _run_in_storage_executor(
storage_plugin, diff_full_path.write_text, diff_text
)

return 0
1 change: 0 additions & 1 deletion src/bandersnatch/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def test_main_reads_config_values(mirror_mock: mock.MagicMock, tmpdir: Path) ->
"keep_index_versions": 0,
"release_files_save": True,
"storage_backend": "filesystem",
"diff_file": diff_file,
"diff_append_epoch": False,
"diff_full_path": diff_file,
"cleanup": False,
Expand Down
66 changes: 66 additions & 0 deletions src/bandersnatch/tests/test_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import unittest.mock as mock
from collections.abc import Iterator
from configparser import ConfigParser
from os import sep
from pathlib import Path
from tempfile import TemporaryDirectory
Expand All @@ -14,6 +15,7 @@
from bandersnatch.configuration import BandersnatchConfig, Singleton
from bandersnatch.master import Master
from bandersnatch.mirror import BandersnatchMirror
from bandersnatch.mirror import mirror as mirror_cmd
from bandersnatch.package import Package
from bandersnatch.simple import SimpleFormats
from bandersnatch.tests.test_simple_fixtures import SIXTYNINE_METADATA
Expand Down Expand Up @@ -206,6 +208,70 @@ def test_mirror_removes_old_status_and_todo_inits_generation(tmpdir: Path) -> No
assert open(str(tmpdir / "generation")).read().strip() == "5"


# This is a test to check that a diff file is NOT created when 'diff-file' isn't set in
# configuration. Obviously we can't exhaustively assert a negative - the regression
# being checked for here is a diff file being created at a 'default' location
# ($directory/mirrored-files) when diff-file isn't configured.
@pytest.mark.asyncio
async def test_mirror_subcommand_only_creates_diff_file_if_configured(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture
) -> None:
# Setup 1 - create a configuration for the 'mirror' subcommand
# (Configuration is passed as a parameter, so we don't need to fiddle with Singleton)
config = ConfigParser()
config.read_dict(
{
"mirror": {
"master": "https://pypi.org",
"directory": (tmp_path / "mirror").as_posix(),
"timeout": 15,
"stop-on-error": False,
"workers": 1,
"hash-index": False,
},
"plugins": {"enabled": ["allowlist_project"]},
"allowlist": {"packages": ["black", "ruff", "autopep8"]},
}
)

# Setup 2 - patch 'Mirror.synchronize' with a stub returning fixed data
# Normally it returns 'self.altered_packages', and those are what get written into
# the diff file. Non-empty fixed content here means a diff-file should exist and
# have content IF it is configured.
async def mock_synchronize(
self: Any,
specific_packages: list[str] | None = None,
sync_simple_index: bool = True,
) -> dict[str, set[str]]:
return {
"ruff": {"fake/file/path/1", "fake/file/path/2"},
"black": {"fake/file/path/3"},
}

monkeypatch.setattr(BandersnatchMirror, "synchronize", mock_synchronize)

# Execute the 'mirror' subcommand. Because 'synchronize' is patched, this is pretty
# much running the body of the 'mirror' function, including creating Master and
# Mirror instances, but doesn't perform any actual package syncing at all.
await mirror_cmd(config=config)

# An 'info'-level message is always logged after 'synchronize' completes. It should
# be consistent with the static data returned from our mock method.
expected_message = "2 packages had changes"
assert any(
m == expected_message for m in caplog.messages
), f"Logged '{expected_message}'"

# Because BandersnatchMirror's 'bootstrap' runs, this directory should exist;
# this is just sanity checking that some disk IO happened...
web_folder = tmp_path / "mirror" / "web"
assert web_folder.is_dir(), f"The folder '{web_folder}' was created"

# But there should NOT be a diff file:
absent_diff_file = tmp_path / "mirror" / "mirrored-files"
assert not absent_diff_file.exists(), f"there is no diff file at {absent_diff_file}"


def test_mirror_with_same_homedir_needs_lock(
mirror: BandersnatchMirror, tmpdir: Path
) -> None:
Expand Down

0 comments on commit 1c833a2

Please sign in to comment.