From e7e9bce2146ae43a1b2db3ea8bd50890cbf872c3 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Sat, 17 Jun 2023 14:39:13 -0400 Subject: [PATCH 01/15] Supply sha256 query parameters using boto3 to avoid hundreds of extra Gigabytes of downloads each day during pipenv and poetry resolution lock cycles. --- s3_management/manage.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 655f7de40..720b707ea 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -11,6 +11,7 @@ from packaging.version import parse import boto3 +from botocore.exceptions import NoCredentialsError S3 = boto3.resource('s3') @@ -212,6 +213,23 @@ def normalize_package_version(self: S3IndexType, obj: str) -> str: def obj_to_package_name(self, obj: str) -> str: return path.basename(obj).split('-', 1)[0] + def fetch_checksum_from_s3(self, s3_key): + s3_key = s3_key.replace("%2B", "+") + try: + response = CLIENT.get_object_attributes( + Bucket=BUCKET, + Key=s3_key, + ObjectAttributes=['Checksum'] + ) + checksum = response['Checksum']['ChecksumSHA256'] + return checksum + except NoCredentialsError: + print("No AWS credentials found") + return None + except Exception as e: + print(f"Unable to retrieve checksum due to {e}") + return None + def to_legacy_html( self, subdir: Optional[str]=None @@ -255,7 +273,8 @@ def to_simple_package_html( out.append(' ') out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): - out.append(f' {path.basename(obj).replace("%2B","+")}
') + checksum = self.fetch_checksum_from_s3(obj) + out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer out.append(' ') out.append('') From 713cfea46781a8f83c19427fada0478896492eab Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Sat, 17 Jun 2023 14:43:42 -0400 Subject: [PATCH 02/15] Safety check for when checksum isn't found. --- s3_management/manage.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 720b707ea..fd8a4e9fb 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -274,7 +274,10 @@ def to_simple_package_html( out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): checksum = self.fetch_checksum_from_s3(obj) - out.append(f' {path.basename(obj).replace("%2B","+")}
') + if checksum: + out.append(f' {path.basename(obj).replace("%2B","+")}
') + else: + out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer out.append(' ') out.append('') From b3799a8eb42b56118a4bc0658902875734888721 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Wed, 5 Jul 2023 20:47:05 -0400 Subject: [PATCH 03/15] PR feedback --- s3_management/manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index fd8a4e9fb..7fdb1e1c0 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -275,7 +275,7 @@ def to_simple_package_html( for obj in sorted(self.gen_file_list(subdir, package_name)): checksum = self.fetch_checksum_from_s3(obj) if checksum: - out.append(f' {path.basename(obj).replace("%2B","+")}
') + out.append(f' {path.basename(obj).replace("%2B","+")}
') else: out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer From 089e93d46206b22f25e4953cce635b51c3994087 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Wed, 12 Jul 2023 22:46:20 -0400 Subject: [PATCH 04/15] Retrieve the checksum if present in the from_S3 method and change objects to be a Dict mapping the key to the checksum, update usage code. --- s3_management/manage.py | 54 +++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 34 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 7fdb1e1c0..1b16d6319 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -11,11 +11,9 @@ from packaging.version import parse import boto3 -from botocore.exceptions import NoCredentialsError S3 = boto3.resource('s3') -CLIENT = boto3.client('s3') BUCKET = S3.Bucket('pytorch') ACCEPTED_FILE_EXTENSIONS = ("whl", "zip", "tar.gz") @@ -122,8 +120,8 @@ def between_bad_dates(package_build_time: datetime): class S3Index: - def __init__(self: S3IndexType, objects: List[str], prefix: str) -> None: - self.objects = objects + def __init__(self: S3IndexType, objects: Dict[str, str], prefix: str) -> None: + self.objects = objects # s3 key to checksum mapping self.prefix = prefix.rstrip("/") self.html_name = PREFIXES_WITH_HTML[self.prefix] # should dynamically grab subdirectories like whl/test/cu101 @@ -147,7 +145,7 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]: # also includes versions without GPU specifier (i.e. cu102) for easier # sorting, sorts in reverse to put the most recent versions first all_sorted_packages = sorted( - {self.normalize_package_version(obj) for obj in self.objects}, + {self.normalize_package_version(s3_key) for s3_key in self.objects.keys()}, key=lambda name_ver: parse(name_ver.split('-', 1)[-1]), reverse=True, ) @@ -167,10 +165,12 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]: to_hide.add(obj) else: packages[package_name] += 1 - return set(self.objects).difference({ - obj for obj in self.objects - if self.normalize_package_version(obj) in to_hide - }) + nightly_packages = {} + for obj, checksum in self.objects.items(): + normalized_package_version = self.normalize_package_version(obj) + if not normalized_package_version in to_hide: + nightly_packages[normalized_package_version] = checksum + return nightly_packages def is_obj_at_root(self, obj:str) -> bool: return path.dirname(obj) == self.prefix @@ -191,15 +191,15 @@ def gen_file_list( else self.objects ) subdir = self._resolve_subdir(subdir) + '/' - for obj in objects: + for obj, checksum in objects.items(): if package_name is not None: if self.obj_to_package_name(obj) != package_name: continue if self.is_obj_at_root(obj) or obj.startswith(subdir): - yield obj + yield obj, checksum def get_package_names(self, subdir: Optional[str] = None) -> List[str]: - return sorted(set(self.obj_to_package_name(obj) for obj in self.gen_file_list(subdir))) + return sorted(set(self.obj_to_package_name(obj) for obj, _ in self.gen_file_list(subdir))) def normalize_package_version(self: S3IndexType, obj: str) -> str: # removes the GPU specifier from the package name as well as @@ -213,23 +213,6 @@ def normalize_package_version(self: S3IndexType, obj: str) -> str: def obj_to_package_name(self, obj: str) -> str: return path.basename(obj).split('-', 1)[0] - def fetch_checksum_from_s3(self, s3_key): - s3_key = s3_key.replace("%2B", "+") - try: - response = CLIENT.get_object_attributes( - Bucket=BUCKET, - Key=s3_key, - ObjectAttributes=['Checksum'] - ) - checksum = response['Checksum']['ChecksumSHA256'] - return checksum - except NoCredentialsError: - print("No AWS credentials found") - return None - except Exception as e: - print(f"Unable to retrieve checksum due to {e}") - return None - def to_legacy_html( self, subdir: Optional[str]=None @@ -244,7 +227,7 @@ def to_legacy_html( out: List[str] = [] subdir = self._resolve_subdir(subdir) is_root = subdir == self.prefix - for obj in self.gen_file_list(subdir): + for obj, _ in self.gen_file_list(subdir): # Strip our prefix sanitized_obj = obj.replace(subdir, "", 1) if sanitized_obj.startswith('/'): @@ -272,8 +255,7 @@ def to_simple_package_html( out.append('') out.append(' ') out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) - for obj in sorted(self.gen_file_list(subdir, package_name)): - checksum = self.fetch_checksum_from_s3(obj) + for obj, checksum in sorted(self.gen_file_list(subdir, package_name)): if checksum: out.append(f' {path.basename(obj).replace("%2B","+")}
') else: @@ -338,7 +320,6 @@ def upload_pep503_htmls(self) -> None: Body=self.to_simple_package_html(subdir=subdir, package_name=pkg_name) ) - def save_legacy_html(self) -> None: for subdir in self.subdirs: print(f"INFO Saving {subdir}/{self.html_name}") @@ -370,10 +351,13 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: for pattern in ACCEPTED_SUBDIR_PATTERNS ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS) if is_acceptable: + response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED") + sha256 = response.get("ChecksumSHA256") sanitized_key = obj.key.replace("+", "%2B") - objects.append(sanitized_key) + objects.append((sanitized_key, sha256)) return cls(objects, prefix) + def create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser("Manage S3 HTML indices for PyTorch") parser.add_argument( @@ -385,6 +369,7 @@ def create_parser() -> argparse.ArgumentParser: parser.add_argument("--generate-pep503", action="store_true") return parser + def main(): parser = create_parser() args = parser.parse_args() @@ -409,5 +394,6 @@ def main(): if args.generate_pep503: idx.upload_pep503_htmls() + if __name__ == "__main__": main() From a6044911dc2b9fb1932868998c32c3c60bffd91c Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Thu, 13 Jul 2023 12:42:53 -0400 Subject: [PATCH 05/15] PR feedback --- s3_management/manage.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 1b16d6319..65f8164aa 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -120,7 +120,7 @@ def between_bad_dates(package_build_time: datetime): class S3Index: - def __init__(self: S3IndexType, objects: Dict[str, str], prefix: str) -> None: + def __init__(self: S3IndexType, objects: Dict[str, str | None], prefix: str) -> None: self.objects = objects # s3 key to checksum mapping self.prefix = prefix.rstrip("/") self.html_name = PREFIXES_WITH_HTML[self.prefix] @@ -130,7 +130,7 @@ def __init__(self: S3IndexType, objects: Dict[str, str], prefix: str) -> None: path.dirname(obj) for obj in objects if path.dirname != prefix } - def nightly_packages_to_show(self: S3IndexType) -> Set[str]: + def nightly_packages_to_show(self: S3IndexType) -> Dict[str, str | None]: """Finding packages to show based on a threshold we specify Basically takes our S3 packages, normalizes the version for easier @@ -145,7 +145,7 @@ def nightly_packages_to_show(self: S3IndexType) -> Set[str]: # also includes versions without GPU specifier (i.e. cu102) for easier # sorting, sorts in reverse to put the most recent versions first all_sorted_packages = sorted( - {self.normalize_package_version(s3_key) for s3_key in self.objects.keys()}, + {self.normalize_package_version(s3_key) for s3_key in self.objects}, key=lambda name_ver: parse(name_ver.split('-', 1)[-1]), reverse=True, ) @@ -185,7 +185,7 @@ def gen_file_list( self, subdir: Optional[str]=None, package_name: Optional[str] = None - ) -> Iterator[str]: + ) -> Iterator[str, str | None]: objects = ( self.nightly_packages_to_show() if self.prefix == 'whl/nightly' else self.objects @@ -351,6 +351,7 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: for pattern in ACCEPTED_SUBDIR_PATTERNS ]) and obj.key.endswith(ACCEPTED_FILE_EXTENSIONS) if is_acceptable: + # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible. response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED") sha256 = response.get("ChecksumSHA256") sanitized_key = obj.key.replace("+", "%2B") From 984f7dd4cd82152c37c16db58b925364303e2817 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Thu, 13 Jul 2023 21:14:28 -0400 Subject: [PATCH 06/15] Update s3_management/manage.py use dictionary comprehension and maintain consistency of original key. Co-authored-by: Josh Cannon --- s3_management/manage.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 65f8164aa..9744196c9 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -165,12 +165,11 @@ def nightly_packages_to_show(self: S3IndexType) -> Dict[str, str | None]: to_hide.add(obj) else: packages[package_name] += 1 - nightly_packages = {} - for obj, checksum in self.objects.items(): - normalized_package_version = self.normalize_package_version(obj) - if not normalized_package_version in to_hide: - nightly_packages[normalized_package_version] = checksum - return nightly_packages + return { + s3_key: checksum + for s3_key, checksum in self.objects.items() + if self.normalize_package_version(s3_key) not in to_hide + } def is_obj_at_root(self, obj:str) -> bool: return path.dirname(obj) == self.prefix From 9966096694b51bf51181c32e17c4d5b1c572d306 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Thu, 13 Jul 2023 21:15:02 -0400 Subject: [PATCH 07/15] Update s3_management/manage.py Co-authored-by: Josh Cannon --- s3_management/manage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 9744196c9..ba242b4b5 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -255,10 +255,10 @@ def to_simple_package_html( out.append(' ') out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj, checksum in sorted(self.gen_file_list(subdir, package_name)): + maybe_fragment = "" if checksum: - out.append(f' {path.basename(obj).replace("%2B","+")}
') - else: - out.append(f' {path.basename(obj).replace("%2B","+")}
') + maybe_fragment = f"#sha256={checksum}" + out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer out.append(' ') out.append('') From 8312f09b1b24d3620b56d86a8f0fb24714a96f5b Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Thu, 13 Jul 2023 21:18:42 -0400 Subject: [PATCH 08/15] fix missed conversion of objects to dict. --- s3_management/manage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index ba242b4b5..6ee16befd 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -339,7 +339,7 @@ def save_pep503_htmls(self) -> None: @classmethod def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: - objects = [] + objects = {} prefix = prefix.rstrip("/") for obj in BUCKET.objects.filter(Prefix=prefix): is_acceptable = any([path.dirname(obj.key) == prefix] + [ @@ -354,7 +354,7 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED") sha256 = response.get("ChecksumSHA256") sanitized_key = obj.key.replace("+", "%2B") - objects.append((sanitized_key, sha256)) + objects[sanitized_key] = sha256 return cls(objects, prefix) From b65a9454567b14d3105303da654d48df54b6d106 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 01:08:45 -0400 Subject: [PATCH 09/15] Update s3_management/manage.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Vít Zikmund <75443448+vit-zikmund@users.noreply.github.com> --- s3_management/manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 6ee16befd..c70e7e4a0 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -352,7 +352,7 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: if is_acceptable: # Add PEP 503-compatible hashes to URLs to allow clients to avoid spurious downloads, if possible. response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED") - sha256 = response.get("ChecksumSHA256") + sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex() sanitized_key = obj.key.replace("+", "%2B") objects[sanitized_key] = sha256 return cls(objects, prefix) From 1d11042ab74043816beb34062ef71f1229f3abe8 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 01:11:58 -0400 Subject: [PATCH 10/15] fix import --- s3_management/manage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/s3_management/manage.py b/s3_management/manage.py index c70e7e4a0..dcd610f53 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import argparse +import base64 import time from os import path, makedirs From 88959300c893026f48cdb8ec7e89b361907077cf Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 01:35:16 -0400 Subject: [PATCH 11/15] Refactor: PR feedback to use dataclass to organize potentially multiple attributes about the s3 objects. --- s3_management/manage.py | 81 ++++++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index dcd610f53..24f227d91 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -2,12 +2,13 @@ import argparse import base64 +import dataclasses import time from os import path, makedirs from datetime import datetime from collections import defaultdict -from typing import Iterator, List, Type, Dict, Set, TypeVar, Optional +from typing import Iterable, List, Type, Dict, Set, TypeVar, Optional from re import sub, match, search from packaging.version import parse @@ -104,6 +105,31 @@ S3IndexType = TypeVar('S3IndexType', bound='S3Index') + +@dataclasses.dataclass(frozen=True) +class S3Object: + key: str + checksum: str | None + + def __str__(self): + return self.key + + def __cmp__(self, other): + return self.key == other.key + + def __lt__(self, other): + return self.key < other.key + + def __le__(self, other): + return self.key <= other.key + + def __gt__(self, other): + return self.key > other.key + + def __ge__(self, other): + return self.key >= other.key + + def extract_package_build_time(full_package_name: str) -> datetime: result = search(PACKAGE_DATE_REGEX, full_package_name) if result is not None: @@ -121,8 +147,8 @@ def between_bad_dates(package_build_time: datetime): class S3Index: - def __init__(self: S3IndexType, objects: Dict[str, str | None], prefix: str) -> None: - self.objects = objects # s3 key to checksum mapping + def __init__(self: S3IndexType, objects: List[S3Object], prefix: str) -> None: + self.objects = objects self.prefix = prefix.rstrip("/") self.html_name = PREFIXES_WITH_HTML[self.prefix] # should dynamically grab subdirectories like whl/test/cu101 @@ -131,7 +157,7 @@ def __init__(self: S3IndexType, objects: Dict[str, str | None], prefix: str) -> path.dirname(obj) for obj in objects if path.dirname != prefix } - def nightly_packages_to_show(self: S3IndexType) -> Dict[str, str | None]: + def nightly_packages_to_show(self: S3IndexType) -> Set[S3Object]: """Finding packages to show based on a threshold we specify Basically takes our S3 packages, normalizes the version for easier @@ -146,7 +172,7 @@ def nightly_packages_to_show(self: S3IndexType) -> Dict[str, str | None]: # also includes versions without GPU specifier (i.e. cu102) for easier # sorting, sorts in reverse to put the most recent versions first all_sorted_packages = sorted( - {self.normalize_package_version(s3_key) for s3_key in self.objects}, + {self.normalize_package_version(obj) for obj in self.objects}, key=lambda name_ver: parse(name_ver.split('-', 1)[-1]), reverse=True, ) @@ -166,14 +192,13 @@ def nightly_packages_to_show(self: S3IndexType) -> Dict[str, str | None]: to_hide.add(obj) else: packages[package_name] += 1 - return { - s3_key: checksum - for s3_key, checksum in self.objects.items() - if self.normalize_package_version(s3_key) not in to_hide - } + return set(self.objects).difference({ + obj for obj in self.objects + if self.normalize_package_version(obj) in to_hide + }) - def is_obj_at_root(self, obj:str) -> bool: - return path.dirname(obj) == self.prefix + def is_obj_at_root(self, obj: S3Object) -> bool: + return path.dirname(str(obj)) == self.prefix def _resolve_subdir(self, subdir: Optional[str] = None) -> str: if not subdir: @@ -185,33 +210,33 @@ def gen_file_list( self, subdir: Optional[str]=None, package_name: Optional[str] = None - ) -> Iterator[str, str | None]: + ) -> Iterable[S3Object]: objects = ( self.nightly_packages_to_show() if self.prefix == 'whl/nightly' else self.objects ) subdir = self._resolve_subdir(subdir) + '/' - for obj, checksum in objects.items(): + for obj in objects: if package_name is not None: if self.obj_to_package_name(obj) != package_name: continue - if self.is_obj_at_root(obj) or obj.startswith(subdir): - yield obj, checksum + if self.is_obj_at_root(obj) or str(obj).startswith(subdir): + yield obj def get_package_names(self, subdir: Optional[str] = None) -> List[str]: - return sorted(set(self.obj_to_package_name(obj) for obj, _ in self.gen_file_list(subdir))) + return sorted(set(self.obj_to_package_name(obj) for obj in self.gen_file_list(subdir))) - def normalize_package_version(self: S3IndexType, obj: str) -> str: + def normalize_package_version(self: S3IndexType, obj: S3Object) -> str: # removes the GPU specifier from the package name as well as # unnecessary things like the file extension, architecture name, etc. return sub( r"%2B.*", "", - "-".join(path.basename(obj).split("-")[:2]) + "-".join(path.basename(str(obj)).split("-")[:2]) ) - def obj_to_package_name(self, obj: str) -> str: - return path.basename(obj).split('-', 1)[0] + def obj_to_package_name(self, obj: S3Object) -> str: + return path.basename(str(obj)).split('-', 1)[0] def to_legacy_html( self, @@ -255,10 +280,10 @@ def to_simple_package_html( out.append('') out.append(' ') out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) - for obj, checksum in sorted(self.gen_file_list(subdir, package_name)): + for obj in sorted(self.gen_file_list(subdir, package_name)): maybe_fragment = "" - if checksum: - maybe_fragment = f"#sha256={checksum}" + if obj.checksum: + maybe_fragment = f"#sha256={obj.checksum}" out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer out.append(' ') @@ -340,7 +365,7 @@ def save_pep503_htmls(self) -> None: @classmethod def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: - objects = {} + objects = [] prefix = prefix.rstrip("/") for obj in BUCKET.objects.filter(Prefix=prefix): is_acceptable = any([path.dirname(obj.key) == prefix] + [ @@ -355,7 +380,11 @@ def from_S3(cls: Type[S3IndexType], prefix: str) -> S3IndexType: response = obj.meta.client.head_object(Bucket=BUCKET.name, Key=obj.key, ChecksumMode="ENABLED") sha256 = (_b64 := response.get("ChecksumSHA256")) and base64.b64decode(_b64).hex() sanitized_key = obj.key.replace("+", "%2B") - objects[sanitized_key] = sha256 + s3_object = S3Object( + key=sanitized_key, + checksum=sha256, + ) + objects.append(s3_object) return cls(objects, prefix) From 5d132b91bd6c01a49290e31f78558b1628b6a8f6 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 08:41:17 -0400 Subject: [PATCH 12/15] Update s3_management/manage.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Vít Zikmund <75443448+vit-zikmund@users.noreply.github.com> --- s3_management/manage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 24f227d91..7f3e2d31a 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -252,7 +252,7 @@ def to_legacy_html( out: List[str] = [] subdir = self._resolve_subdir(subdir) is_root = subdir == self.prefix - for obj, _ in self.gen_file_list(subdir): + for obj in self.gen_file_list(subdir): # Strip our prefix sanitized_obj = obj.replace(subdir, "", 1) if sanitized_obj.startswith('/'): From 744503fb2318aba3e2ebc91b74f25a94fab94c59 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 08:41:31 -0400 Subject: [PATCH 13/15] Update s3_management/manage.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Vít Zikmund <75443448+vit-zikmund@users.noreply.github.com> --- s3_management/manage.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 7f3e2d31a..31ca3bc89 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -281,9 +281,7 @@ def to_simple_package_html( out.append(' ') out.append('

Links for {}

'.format(package_name.lower().replace("_","-"))) for obj in sorted(self.gen_file_list(subdir, package_name)): - maybe_fragment = "" - if obj.checksum: - maybe_fragment = f"#sha256={obj.checksum}" + maybe_fragment = f"#sha256={obj.checksum}" if obj.checksum else "" out.append(f' {path.basename(obj).replace("%2B","+")}
') # Adding html footer out.append(' ') From f7e6d7f375f207588fabec085c21ab8a1f622bff Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 08:41:57 -0400 Subject: [PATCH 14/15] Update s3_management/manage.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Vít Zikmund <75443448+vit-zikmund@users.noreply.github.com> --- s3_management/manage.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/s3_management/manage.py b/s3_management/manage.py index 31ca3bc89..116470b35 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -107,6 +107,7 @@ @dataclasses.dataclass(frozen=True) +@functools.total_ordering class S3Object: key: str checksum: str | None @@ -114,21 +115,12 @@ class S3Object: def __str__(self): return self.key - def __cmp__(self, other): + def __eq__(self, other): return self.key == other.key def __lt__(self, other): return self.key < other.key - def __le__(self, other): - return self.key <= other.key - - def __gt__(self, other): - return self.key > other.key - - def __ge__(self, other): - return self.key >= other.key - def extract_package_build_time(full_package_name: str) -> datetime: result = search(PACKAGE_DATE_REGEX, full_package_name) From 004fa7034fced036efe33d419a79d5f8f8655587 Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Tue, 1 Aug 2023 08:42:09 -0400 Subject: [PATCH 15/15] Update s3_management/manage.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Vít Zikmund <75443448+vit-zikmund@users.noreply.github.com> --- s3_management/manage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/s3_management/manage.py b/s3_management/manage.py index 116470b35..ff615b9f6 100644 --- a/s3_management/manage.py +++ b/s3_management/manage.py @@ -3,6 +3,7 @@ import argparse import base64 import dataclasses +import functools import time from os import path, makedirs