From 677c26f63951cebd9a30d339738cb496a7cc613d Mon Sep 17 00:00:00 2001 From: "Fon E. Noel NFEBE" Date: Wed, 12 Apr 2023 19:07:58 +0100 Subject: [PATCH] Introduce basics for modification detection As part of this work, we need to verify that files that are uploaded to permanent are download without changes. This modification detection could also help verify completion of uploads and downloads. Signed-off-by: Fon E. Noel NFEBE --- README.md | 11 +++-- verify.py | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 3 deletions(-) create mode 100755 verify.py diff --git a/README.md b/README.md index d344a8e..be635d6 100644 --- a/README.md +++ b/README.md @@ -169,15 +169,20 @@ To test a nest with more levels, simply paste a nested folder structure inside ` Run -`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=nested` - -Check the downloads folder in `test-tree/downloads` and ensure that the `downloads/nested` directory has a structure like the nested directory uploaded in the [nested uploads test](#nested-uploads). +`./test-download.py --remote=prod --archive-path="/archives/rclone QA 1 (0a21-0000)/My Files/" --remote-dir=misc/nested` +To verify that everything in the nest folder was downloaded correctly run `./verify.py --nested-complete`. ### What file types and scenarios are left out? Anything not included in the section above describing what is currently covered is by implication excluded from these tests. +## Hash verification + +### Modification Detection + +To verify that files that were successfully uploaded and downloaded have remained unchanged as we would expect run `./verify.py --succeeded`. + ## Troubleshooting - Remember that the commands are examples and some of the arguments may not apply to your specific environment. diff --git a/verify.py b/verify.py new file mode 100755 index 0000000..46781e4 --- /dev/null +++ b/verify.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +import os +import sys +import hashlib +import argparse + +DOWNLOAD_MISC_DIR = "test-tree/downloads/misc" +MISC_DIR = "test-tree/misc" +CHUNK_SIZE = 1024 + +OKBLUE = "\033[94m" +OKGREEN = "\033[92m" +FAIL = "\033[91m" +WARNING = "\033[93m" +ENDC = "\033[0m" + + +def hash_file(file_path): + """ "Make and return SHA-1 hash of file at file_path""" + h = hashlib.sha1() + with open(file_path, "rb") as file: + chunk = 0 + while chunk != b"": + # read only CHUNK_SIZE bytes at a time + chunk = file.read(CHUNK_SIZE) + h.update(chunk) + return h.hexdigest() + + +def crawl_upload_and_download_paths(): + """Build a list of uploaded and downloaded paths""" + uploaded_paths = [] + downloaded_paths = [] + for subdir, _, files in os.walk(MISC_DIR): + for file in files: + uploaded_paths.append(os.path.join(subdir, file)) + + for subdir, _, files in os.walk(DOWNLOAD_MISC_DIR): + for file in files: + downloaded_paths.append(os.path.join(subdir, file)) + return uploaded_paths, downloaded_paths + + +def make_file_to_harsh_maps(): + uploaded_paths, downloaded_paths = crawl_upload_and_download_paths() + pre_upload_hashes = [] + post_upload_hashes = [] + for path in uploaded_paths: + pre_upload_hashes.append({"path": path, "hash": hash_file(path)}) + for path in downloaded_paths: + post_upload_hashes.append({"path": path, "hash": hash_file(path)}) + return pre_upload_hashes, post_upload_hashes + + +def parse_cli(): + """Prepare parser""" + parser = argparse.ArgumentParser( + prog="verify", description="Check results of upload/download operations" + ) + parser.add_argument( + "--misc-complete", + help="Verify that both the upload and download of the complete misc folder was successful", + action="store_true", + ) + parser.add_argument( + "--nested-complete", + help="Verify that both the upload and download of the complete nested folder was successful", + action="store_true", + ) + parser.add_argument( + "--succeeded", + help="Verify that files that were successfully uploaded where downloaded successfully", + action="store_true", + ) + + return parser + + +def main(): + parser = parse_cli() + args = parser.parse_args() + pre_upload_hash_data, post_upload_hash_data = make_file_to_harsh_maps() + pre_upload_hashes = map(lambda x: x.get("hash"), pre_upload_hash_data) + + failed_once = False + if args.succeeded: + for file_data in post_upload_hash_data: + print(f"{OKBLUE}Verifying hash for {file_data.get('path')} ...{ENDC}") + if file_data.get("hash") not in pre_upload_hashes: + print( + f"{WARNING}The hash to the path {file_data.get('path')} is missing!{ENDC}" + ) + print( + f"{WARNING}File has either been modified (on disk or permanent) or is missing!{ENDC}\n" + ) + failed_once = True + if not failed_once: + print(f"{OKGREEN}\nVerification complete!{ENDC}\n") + print( + f"{OKGREEN}All downloaded files have matching hashes in pre-uploaded file hashes.{ENDC}\n" + ) + else: + print( + f"{FAIL}\nVerification complete but failed! Missing hash(es) detected.\n{ENDC}" + ) + print( + f"{FAIL}At least once missing hash detected, check the logs above.\n{ENDC}" + ) + elif args.misc_complete: + pass + elif args.nested_complete: + pass + else: + print("Not sure what to do!\n\n") + parser.print_help() + + +if __name__ == "__main__": + main()