From 39ac48025e5d7d2e335f940b2c5bed4574e1fb10 Mon Sep 17 00:00:00 2001 From: Johannes11833 <24914225+Johannes11833@users.noreply.github.com> Date: Tue, 28 Jan 2025 14:38:37 +0100 Subject: [PATCH] Add support for the check command (#58) * Add support for the check command * Set version to 0.1.21 --- README.md | 16 ++++++++ rclone_python/__init__.py | 2 +- rclone_python/rclone.py | 78 +++++++++++++++++++++++++++++++++++++ rclone_python/utils.py | 2 + tests/test_check.py | 81 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 tests/test_check.py diff --git a/README.md b/README.md index 89ce394..90b5dff 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,22 @@ print(rclone.hash(HashTypes.sha1, "box:data") {'video1.webm': '3ef08d895f25e8b7d84d3a1ac58f8f302e33058b', 'video3.webm': '3ef08d895f25e8b7d84d3a1ac58f8f302e33058b', 'video2.webm': '3ef08d895f25e8b7d84d3a1ac58f8f302e33058b'} ``` +### Check +Checks the files in the source and destination match. + - "=" path means path was found in source and destination and was identical + - "-" path means path was missing on the source, so only in the destination + - "+" path means path was missing on the destination, so only in the source + - "*" path means path was present in source and destination but different. + - "!" path means there was an error reading or hashing the source or dest. +```python +from rclone_python import rclone + +print(rclone.check("data", "box:data")) +``` +```console +(False, [('*', 'video1.webm'), ('=', 'video2.webm'), ('=', 'video2.webm')]) +``` + ## Custom Progressbar You can use your own rich progressbar with all transfer operations. This allows you to customize the columns to be displayed. diff --git a/rclone_python/__init__.py b/rclone_python/__init__.py index c635a98..c07af06 100644 --- a/rclone_python/__init__.py +++ b/rclone_python/__init__.py @@ -1 +1 @@ -VERSION = "0.1.20" +VERSION = "0.1.21" diff --git a/rclone_python/rclone.py b/rclone_python/rclone.py index 8fe1689..bf8c87f 100644 --- a/rclone_python/rclone.py +++ b/rclone_python/rclone.py @@ -1,7 +1,9 @@ import json +from pathlib import Path import re from functools import wraps from shutil import which +import tempfile from typing import Optional, Tuple, Union, List, Dict, Callable from rclone_python import utils @@ -586,6 +588,82 @@ def hash( return hashsums +@__check_installed +def check( + source: str, + dest: str, + combined: str = None, + size_only: bool = False, + download: bool = False, + one_way: bool = False, + args: List[str] = None, +) -> Tuple[bool, List[Tuple[str, str]]]: + """Checks the files in the source and destination match. + + Args: + source (str): The source path. + dest (str): The destination path. + combined (str, optional): Path to the combined file. Defaults to None. + size_only (bool, optional): Only compare the sizes not the hashes as well. Use this for a quick check. Defaults to False. + download (bool, optional): Download the data from both remotes and check them against each other on the fly. This can be useful for remotes that don't support hashes or if you really want to check all the data. Defaults to False. + one_way (bool, optional): Only check that files in the source match the files in the destination, not the other way around. This means that extra files in the destination that are not in the source will not be detected. Defaults to False. + args (List[str], optional): Optional additional list of flags and arguments. Defaults to None. + + Raises: + utils.RcloneException: Raised when the rclone command does not succeed. + + Returns: + Tuple[bool, List[Tuple[str, str]]]: The bool is true if source and dest match. + The list contains a symbol and all file paths in both directories. The following symbols are used: + "=" path means path was found in source and destination and was identical + "-" path means path was missing on the source, so only in the destination + "+" path means path was missing on the destination, so only in the source + "*" path means path was present in source and destination but different. + "!" path means there was an error reading or hashing the source or dest. + + """ + if args is None: + args = [] + if size_only: + args.append("--size-only") + if download: + args.append("--download") + if one_way: + args.append("--one-way") + + tmp = None + if not combined: + tmp = tempfile.TemporaryDirectory() + combined = Path(tmp.name, "combined_file") + # even if --combined is also specified by the user through args, + # this one will be used as apparently rclone uses the last specification. + args.append(f'--combined "{combined}"') + + returncode, _, stderr = utils.run_rclone_cmd( + f'check "{source}" "{dest}"', args, raise_errors=False + ) + + logger.debug(f"Rclone check stderr output:\n{stderr}") + + # read the combined file and extract all elements + combined_file = Path(combined) + if returncode != 0 and not combined_file.is_file(): + raise utils.RcloneException( + f'check command failed on source: "{source}" dest: "{dest}"', + stderr, + ) + out = [ + # the file holds the symbol followed by a space and then the filepath + tuple(line.split(" ", maxsplit=1)) + for line in combined_file.read_text().splitlines() + ] + + if tmp: + tmp.cleanup() + + return returncode == 0, out + + @__check_installed def version( check=False, diff --git a/rclone_python/utils.py b/rclone_python/utils.py index 782b28f..897306d 100644 --- a/rclone_python/utils.py +++ b/rclone_python/utils.py @@ -47,6 +47,8 @@ def run_rclone_cmd( args_str = args2string(args) full_command = f"rclone {command} {args_str}" + logger.debug(f"Running command: {full_command}") + process = subprocess.run( full_command, stdout=subprocess.PIPE, diff --git a/tests/test_check.py b/tests/test_check.py new file mode 100644 index 0000000..8b33f28 --- /dev/null +++ b/tests/test_check.py @@ -0,0 +1,81 @@ +from pathlib import Path +import tempfile +import pytest + +from rclone_python import rclone + + +@pytest.fixture(scope="module") +def check_command_setup(default_test_setup): + # uploads the lorem ipsum text file prior to the execution of the tests and deletes it afterwards + + # INITIALIZATION + # LOCAL + local_path = tempfile.TemporaryDirectory() + text = default_test_setup.local_test_txt_file.read_text() + for i in range(5): + Path(local_path.name, f"file {i}").write_text(f"This is file #{i}.\n{text}") + print( + "\ncreated temporary directory:", + local_path.name, + "with content:", + list(Path(local_path.name).iterdir()), + ) + + # REMOTE + remote_path = f"{default_test_setup.remote_test_data_dir}" + rclone.copy( + local_path.name, + remote_path, + show_progress=False, + ) + + yield Path(local_path.name), remote_path + + # TEARDOWN + print(f"\nTeardown: remote {remote_path} and local: {local_path.name}") + rclone.delete(remote_path) + local_path.cleanup() + + +def test_check_matching(check_command_setup): + # here, dest and source match + local_path, remote_path = check_command_setup + local_files = [x.name for x in local_path.iterdir() if x.is_file()] + + expected = [("=", f) for f in local_files] + valid, output = rclone.check(local_path, remote_path) + + print("output: ", output) + print("expected:", expected) + + assert valid + # order might be different, so check that there is no difference between the 2 sets + assert not set(expected) ^ set(output) + + +def test_check_not_matching(check_command_setup): + # same as above: here, dest and source match at first + local_path, remote_path = check_command_setup + local_files = [x.name for x in local_path.iterdir() if x.is_file()] + + # alter first file locally + local_path.iterdir().__next__().write_text( + "The content of the first file is now different locally." + ) + # add another file locally that is missing on remote + new_file = Path(local_path, "new_file") + new_file.write_text("The content of the new local file") + + # create expected list + expected = [("=", f) for f in local_files] + expected.append(("+", new_file.name)) + expected[0] = ("*", expected[0][1]) # the first file was altered + + valid, output = rclone.check(local_path, remote_path) + print("output: ", output) + print("expected:", expected) + + assert not valid + # order might be different, so check that there is no difference between the 2 sets + assert not set(expected) ^ set(output)