Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow source files to publish at by-hash paths #1061

Merged
merged 1 commit into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ VARSYAML

cat >> vars/main.yaml << VARSYAML
pulp_env: {}
pulp_settings: {"allowed_content_checksums": ["md5", "sha1", "sha256", "sha512"], "allowed_export_paths": ["/tmp"], "allowed_import_paths": ["/tmp"]}
pulp_settings: {"allowed_content_checksums": ["md5", "sha1", "sha256", "sha512"], "allowed_export_paths": ["/tmp"], "allowed_import_paths": ["/tmp"], "apt_by_hash": true}
pulp_scheme: https
pulp_default_container: ghcr.io/pulp/pulp-ci-centos9:latest
VARSYAML
Expand Down
1 change: 1 addition & 0 deletions CHANGES/1059.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend publishing at by-hash paths to source files.
38 changes: 25 additions & 13 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,19 +365,9 @@ def finish(self):

# Generating metadata files using checksum
if settings.APT_BY_HASH:
for path, index in (
(package_index_path, package_index),
(gz_package_index_path, gz_package_index),
):
for checksum in settings.ALLOWED_CONTENT_CHECKSUMS:
if checksum in CHECKSUM_TYPE_MAP:
hashed_index_path = _fetch_file_checksum(path, index, checksum)
hashed_index = PublishedMetadata.create_from_file(
publication=self.parent.publication,
file=File(open(path, "rb")),
relative_path=hashed_index_path,
)
hashed_index.save()
self.generate_by_hash(
package_index_path, package_index, gz_package_index_path, gz_package_index
)

self.parent.add_metadata(package_index)
self.parent.add_metadata(gz_package_index)
Expand All @@ -394,9 +384,31 @@ def finish(self):
publication=self.parent.publication, file=File(open(gz_source_index_path, "rb"))
)
gz_source_index.save()

# Generating metadata files using checksum
if settings.APT_BY_HASH:
self.generate_by_hash(
source_index_path, source_index, gz_source_index_path, gz_source_index
)

self.parent.add_metadata(source_index)
self.parent.add_metadata(gz_source_index)

def generate_by_hash(self, index_path, index, gz_index_path, gz_index):
for path, index in (
(index_path, index),
(gz_index_path, gz_index),
):
for checksum in settings.ALLOWED_CONTENT_CHECKSUMS:
if checksum in CHECKSUM_TYPE_MAP:
hashed_index_path = _fetch_file_checksum(path, index, checksum)
hashed_index = PublishedMetadata.create_from_file(
publication=self.parent.publication,
file=File(open(path, "rb")),
relative_path=hashed_index_path,
)
hashed_index.save()


class _ReleaseHelper:
def __init__(
Expand Down
42 changes: 42 additions & 0 deletions pulp_deb/tests/functional/api/test_download_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import os
import pytest
import hashlib
import re
from random import choice
from urllib.parse import urljoin

from pulp_deb.tests.functional.constants import (
DEB_FIXTURE_STANDARD_REPOSITORY_NAME,
DEB_FIXTURE_SINGLE_DIST,
DEB_GENERIC_CONTENT_NAME,
DEB_PACKAGE_NAME,
DEB_PACKAGE_RELEASE_COMPONENT_NAME,
Expand Down Expand Up @@ -204,3 +206,43 @@ def test_download_cached_content(
content_hash = hashlib.sha256(content).hexdigest()

assert package_hash == content_hash


@pytest.mark.parallel
def test_apt_by_hash(
deb_init_and_sync,
deb_publication_factory,
deb_distribution_factory,
deb_get_content_types,
download_content_unit,
):
"""Verify that deb and deb source content is available in the by-hash path."""
# Create/sync a repo and then do a publish and create a distro
repo, _ = deb_init_and_sync(remote_args={"sync_sources": True})
deb_publication_factory(repo, structured=True, simple=True)
distribution = deb_distribution_factory(repository=repo)

# Obtain the Release file and parse out the sha256
release_metadata = deb_get_content_types(
"apt_release_file_api", DEB_RELEASE_FILE_NAME, repo, repo.latest_version_href
)
single_release_metadata = next(
release for release in release_metadata if release.distribution == DEB_FIXTURE_SINGLE_DIST
)
release_file_path = next(
key for key in single_release_metadata.artifacts.keys() if key.endswith("/Release")
)
release_file = download_content_unit(distribution.base_path, release_file_path).decode("utf-8")
sha256_section = release_file.split("SHA256:")[1].split("SHA512:")[0].strip()
sha256_pattern = re.compile(
r"([a-fA-F0-9]{64})\s+\d+\s+([^/\s]+/[^/\s]+)/(Packages|Sources)(?:\s|\n)"
)
matches = sha256_pattern.findall(sha256_section)
sha256_dict = {path: sha for sha, path, _ in matches}

# Verify that all by-hash files are available
for path, sha256 in sha256_dict.items():
content_url = f"dists/{DEB_FIXTURE_SINGLE_DIST}/{path}/by-hash/SHA256/{sha256}"
assert "404" not in download_content_unit(distribution.base_path, content_url).decode(
"utf-8"
)
1 change: 1 addition & 0 deletions template_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ pulp_settings:
- /tmp
allowed_import_paths:
- /tmp
apt_by_hash: true
pulp_settings_azure: null
pulp_settings_gcp: null
pulp_settings_s3: null
Expand Down