Skip to content

Commit

Permalink
Allow source files to publish at by-hash paths
Browse files Browse the repository at this point in the history
Closes #1059
  • Loading branch information
Andrew Cheng committed Jul 10, 2024
1 parent 106f148 commit d4d1e43
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGES/1059.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Extend publishing at by-hash paths to source files.
38 changes: 25 additions & 13 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,19 +355,9 @@ def finish(self):

# Generating metadata files using checksum
if settings.APT_BY_HASH:
for path, index in (
(package_index_path, package_index),
(gz_package_index_path, gz_package_index),
):
for checksum in settings.ALLOWED_CONTENT_CHECKSUMS:
if checksum in CHECKSUM_TYPE_MAP:
hashed_index_path = _fetch_file_checksum(path, index, checksum)
hashed_index = PublishedMetadata.create_from_file(
publication=self.parent.publication,
file=File(open(path, "rb")),
relative_path=hashed_index_path,
)
hashed_index.save()
self.generate_by_hash(
package_index_path, package_index, gz_package_index_path, gz_package_index
)

self.parent.add_metadata(package_index)
self.parent.add_metadata(gz_package_index)
Expand All @@ -384,9 +374,31 @@ def finish(self):
publication=self.parent.publication, file=File(open(gz_source_index_path, "rb"))
)
gz_source_index.save()

# Generating metadata files using checksum
if settings.APT_BY_HASH:
self.generate_by_hash(
source_index_path, source_index, gz_source_index_path, gz_source_index
)

self.parent.add_metadata(source_index)
self.parent.add_metadata(gz_source_index)

def generate_by_hash(self, index_path, index, gz_index_path, gz_index):
for path, index in (
(index_path, index),
(gz_index_path, gz_index),
):
for checksum in settings.ALLOWED_CONTENT_CHECKSUMS:
if checksum in CHECKSUM_TYPE_MAP:
hashed_index_path = _fetch_file_checksum(path, index, checksum)
hashed_index = PublishedMetadata.create_from_file(
publication=self.parent.publication,
file=File(open(path, "rb")),
relative_path=hashed_index_path,
)
hashed_index.save()


class _ReleaseHelper:
def __init__(
Expand Down
43 changes: 43 additions & 0 deletions pulp_deb/tests/functional/api/test_download_content.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Tests that verify download of content served by Pulp."""

import os
import pytest
import hashlib
import re
from random import choice
from urllib.parse import urljoin

from pulp_deb.tests.functional.constants import (
DEB_FIXTURE_STANDARD_REPOSITORY_NAME,
DEB_FIXTURE_SINGLE_DIST,
DEB_GENERIC_CONTENT_NAME,
DEB_PACKAGE_NAME,
DEB_PACKAGE_RELEASE_COMPONENT_NAME,
Expand Down Expand Up @@ -203,3 +206,43 @@ def test_download_cached_content(
content_hash = hashlib.sha256(content).hexdigest()

assert package_hash == content_hash


@pytest.mark.parallel
def test_apt_by_hash(
deb_init_and_sync,
deb_publication_factory,
deb_distribution_factory,
deb_get_content_types,
download_content_unit,
):
"""Verify that deb and deb source content is available in the by-hash path."""
# Create/sync a repo and then do a publish and create a distro
repo, _ = deb_init_and_sync(remote_args={"sync_sources": True})
publication = deb_publication_factory(repo, structured=True, simple=True)
distribution = deb_distribution_factory(publication)

# Obtain the Release file and parse out the sha256
release_metadata = deb_get_content_types(
"apt_release_file_api", DEB_RELEASE_FILE_NAME, repo, repo.latest_version_href
)
single_release_metadata = next(
release for release in release_metadata if release.distribution == DEB_FIXTURE_SINGLE_DIST
)
release_file_path = next(
key for key in single_release_metadata.artifacts.keys() if key.endswith("/Release")
)
release_file = download_content_unit(distribution.base_path, release_file_path).decode("utf-8")
sha256_section = release_file.split("SHA256:")[1].split("SHA512:")[0].strip()
sha256_pattern = re.compile(
r"([a-fA-F0-9]{64})\s+\d+\s+([^/\s]+/[^/\s]+)/(Packages|Sources)(?:\s|\n)"
)
matches = sha256_pattern.findall(sha256_section)
sha256_dict = {path: sha for sha, path, _ in matches}

# Verify that all by-hash files are available
for path, sha256 in sha256_dict.items():
content_url = f"dists/{DEB_FIXTURE_SINGLE_DIST}/{path}/by-hash/SHA256/{sha256}"
assert "404" not in download_content_unit(distribution.base_path, content_url).decode(
"utf-8"
)
1 change: 1 addition & 0 deletions template_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pulp_settings:
- /tmp
allowed_import_paths:
- /tmp
apt_by_hash: true
pulp_settings_azure: null
pulp_settings_gcp: null
pulp_settings_s3: null
Expand Down

0 comments on commit d4d1e43

Please sign in to comment.