From e389a88ad8716d94675cd41eed7838431f302331 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 13 Dec 2020 18:32:47 +0100 Subject: [PATCH 001/138] Review and standardize some error messages --- archive/archive.py | 14 +++++++------- tests/test_03_verify_errors.py | 4 ++-- tests/test_04_cli_error.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index d515cc9..316ff7a 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -95,7 +95,7 @@ def _create(self, path, mode, paths, basedir, excludes, dedup, tags): p = fi.path name = self._arcname(p) if name in md_names: - raise ArchiveCreateError("cannot add %s: " + raise ArchiveCreateError("invalid path '%s': " "this filename is reserved" % p) if fi.is_file(): ti = tarf.gettarinfo(str(p), arcname=name) @@ -140,8 +140,8 @@ def _check_paths(self, paths, basedir, excludes): abspath = None for p in itertools.chain(paths, excludes or ()): if not _is_normalized(p): - raise ArchiveCreateError("invalid path %s: must be normalized" - % p) + raise ArchiveCreateError("invalid path '%s': " + "must be normalized" % p) if abspath is None: abspath = p.is_absolute() else: @@ -166,7 +166,7 @@ def _add_metadata_files(self, tarf): for md in self._metadata: name = str(md.path) if name in md_names: - raise ArchiveCreateError("duplicate metadata %s" % name) + raise ArchiveCreateError("duplicate metadata '%s'" % name) md_names.add(name) ti = tarf.gettarinfo(arcname=name, fileobj=md.fileobj) ti.mode = stat.S_IFREG | stat.S_IMODE(md.mode) @@ -220,7 +220,7 @@ def get_metadata(self, name): ti = self._file.next() path = Path(ti.path) if path.name != name: - raise ArchiveIntegrityError("%s not found" % name) + raise ArchiveIntegrityError("metadata item '%s' not found" % name) fileobj = self._file.extractfile(ti) md = MetadataItem(path=path, tarinfo=ti, fileobj=fileobj) self._metadata.append(md) @@ -257,8 +257,8 @@ def verify(self): for md in self.manifest.metadata: ti = next(tarf_it) if ti.name != md: - raise ArchiveIntegrityError("Expected metadata item '%s' " - "not found" % (md)) + raise ArchiveIntegrityError("metadata item '%s' not found" + % md) # Check the content of the archive. for fileinfo in self.manifest: self._verify_item(fileinfo) diff --git a/tests/test_03_verify_errors.py b/tests/test_03_verify_errors.py index 6c07b0c..0180635 100644 --- a/tests/test_03_verify_errors.py +++ b/tests/test_03_verify_errors.py @@ -53,7 +53,7 @@ def test_verify_missing_manifest(test_data, testname): with pytest.raises(ArchiveIntegrityError) as err: with Archive().open(Path(name)) as archive: pass - assert ".manifest.yaml not found" in str(err.value) + assert "metadata item '.manifest.yaml' not found" in str(err.value) def test_verify_missing_metadata_item(test_data, testname): name = archive_name(tags=[testname]) @@ -72,7 +72,7 @@ def test_verify_missing_metadata_item(test_data, testname): with Archive().open(Path(name)) as archive: with pytest.raises(ArchiveIntegrityError) as err: archive.verify() - assert "'base/.msg.txt' not found" in str(err.value) + assert "metadata item 'base/.msg.txt' not found" in str(err.value) def test_verify_missing_file(test_data, testname): name = archive_name(tags=[testname]) diff --git a/tests/test_04_cli_error.py b/tests/test_04_cli_error.py index 9ed8c19..2ed695a 100644 --- a/tests/test_04_cli_error.py +++ b/tests/test_04_cli_error.py @@ -104,7 +104,7 @@ def test_cli_create_normalized_path(test_dir, testname, monkeypatch): callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) line = f.readline() - assert "invalid path base/empty/..: must be normalized" in line + assert "invalid path 'base/empty/..': must be normalized" in line def test_cli_create_rel_start_basedir(test_dir, testname, monkeypatch): monkeypatch.chdir(str(test_dir)) @@ -162,7 +162,7 @@ def test_cli_integrity_no_manifest(test_dir, testname, monkeypatch): callscript("archive-tool.py", args, returncode=3, stderr=f) f.seek(0) line = f.readline() - assert ".manifest.yaml not found" in line + assert "metadata item '.manifest.yaml' not found" in line def test_cli_integrity_missing_file(test_dir, testname, monkeypatch): monkeypatch.chdir(str(test_dir)) From 1fc4c32b16097aa713c0bbd9ff0f8cab1dfa87f1 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 13 Dec 2020 18:53:53 +0100 Subject: [PATCH 002/138] Explicitely provide the error message in the case of a relative path not being a subpath of the base directory, rather then taking over the error message from Path.relative_to(). This partly revises a7e8fae. --- archive/archive.py | 6 ++++-- tests/test_04_cli_error.py | 7 ++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 316ff7a..c62443a 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -153,8 +153,10 @@ def _check_paths(self, paths, basedir, excludes): # This will raise ValueError if p does not start # with basedir: p.relative_to(self.basedir) - except ValueError as e: - raise ArchiveCreateError(str(e)) + except ValueError: + raise ArchiveCreateError("invalid path '%s': must be a " + "subpath of base directory %s" + % (p, self.basedir)) if not abspath: if self.basedir.is_symlink() or not self.basedir.is_dir(): raise ArchiveCreateError("basedir must be a directory") diff --git a/tests/test_04_cli_error.py b/tests/test_04_cli_error.py index 2ed695a..8eb47e1 100644 --- a/tests/test_04_cli_error.py +++ b/tests/test_04_cli_error.py @@ -3,7 +3,6 @@ import os from pathlib import Path -import re import stat import tarfile from tempfile import TemporaryFile @@ -114,10 +113,8 @@ def test_cli_create_rel_start_basedir(test_dir, testname, monkeypatch): callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) line = f.readline() - # The actual error message differs between Python versions, so - # lets just assert that the error is something about - # 'base/msg.txt' and 'base/data'. - assert re.search(r"'base/msg.txt'.*'base/data'", line) + assert ("invalid path 'base/msg.txt': must be a subpath of " + "base directory base/data") in line def test_cli_ls_archive_not_found(test_dir, monkeypatch): monkeypatch.chdir(str(test_dir)) From c7fb624c25e7d2d81e7b7f6d7c88c55b41d9876b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 13 Dec 2020 19:19:15 +0100 Subject: [PATCH 003/138] Update changelog --- CHANGES.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 769e88a..3a8583c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,17 @@ Changelog ========= +0.6 (not yet released) +~~~~~~~~~~~~~~~~~~~~~~ + +Bug fixes and minor changes +--------------------------- + ++ `#48`_: Review and standardize some error messages. + +.. _#48: https://github.com/RKrahl/archive-tools/pull/48 + + 0.5.1 (2020-12-12) ~~~~~~~~~~~~~~~~~~ From d470de2f9d4a365ee01255d2cffc95caceaa2451 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 25 Dec 2020 16:38:33 +0100 Subject: [PATCH 004/138] Add tests for Issue #50 --- tests/data/mailarchive-legacy-1_0.tar.xz | Bin 0 -> 2036 bytes tests/test_05_mailarchive_create.py | 8 ++++++++ tests/test_05_mailarchive_legacy.py | 17 +++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 tests/data/mailarchive-legacy-1_0.tar.xz create mode 100644 tests/test_05_mailarchive_legacy.py diff --git a/tests/data/mailarchive-legacy-1_0.tar.xz b/tests/data/mailarchive-legacy-1_0.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..bf06c8825239431a7455e290f363a8205b7587d9 GIT binary patch literal 2036 zcmVv(iNpf5;o;jE(tD|2O zrPU3E_g;z0e*~Rrvk*egX&xgJvWW88RZb*yl(h6naqTF z<)0Mj0LU4whxhJUua zfJR{XDN%||TcwFm`Vz6svQ@>~HJ`EhHg)jpwz&yGoF z)>)1LKLM%P0Z&YfEX0Z`rC;MRiLj68l1Dxx83;f^erg7nuf<-thD}ndLlugLtdFFx zVSyDDsWe@k@#2Qj5IPG) zM!T$c9ws%eV`!qa_vq~P(gMel(=473ESR$CnBpT9MXL3xasIBq7 zq`#7ilt(|C62hthGJH#3Dup`krIPi9dW^(^Zz*u6&Sp{KW8WC=D>s$4ytS?kLwCu! zNE6blypSQ0h7TK}roJccJTwF@6(%CC#-`^T&6;vVWw%Xt1NrMVLC~ScRp$|OiJP|} zj_CK}XMmZDp_F8esl;B1ukB@|H2esTAB+*KMcy>n09T%*s4^9&WeJGmukyO-5m9sL z+iESRxc!yrEOCB)pwhvFc1SUxECbZ>y7XT@ZE!(`vny{Lw#CGP5^J3;*QubE56+sm zrBu_(jPqj>jKUbs_dH&)R%j3ySvm}?g@zvRLwKSi4~~DkBgWg}yfO=g4hzUY6-7t5 zsiQYEvNKk_+VUQ=&zV%$-T5=!3w~d!w+>%y(4g@edyLE{(baG|IXz(n9@r~bH=w+g zwcyjY9sEUp291NrpE!wYQ;RiAGnld}GM_tKrr*fio^q;~pgRHcBG417Ds~|yg&jUF zhABzm0sEZT0E0-=J5ZgziTnsJ7Ct4(15lhe3)m=jFjG*fM2a)NnBPBndIOis7r*H62A$vOtf2sV@jicJiwG`x5u z!kXJ8IyUnfR@;shCzrYQnQBl~ud5aS#00$Ck1d6@Gq!|j^c4y`GUxZ|{JTmMUTKv! zt9UE@+c*Djdd)oyD=3;SglJ>8pZa=AI_Z+Jx-oPys#4JZ>&QOh zyKG3Yg1_XkS(*Hw*g3gN=^947?E?0#ye1DTL(R9$Dk>L}9`=WEq~xpxtbr+!zSN92 zFGEaqLTaQqg%Bfs2^mIuDq;HQCBHUYzLQ%Ut!DpimddS)oN2+OR0yevWz4EzOaJ?( z`^P>=2S{{BT5O20P6;&UFPMG2nzg?w=_x7adfUMUK^I@SDLNoKzSP-T@br1E3CdvC zRTO+;FJMm*R*yk_?gV)eGih7V0)~tre@ajXQ9Ix@wB4GfYpW53`hTk$S`;0m(+v+s zh?H7CT(w%`rm9&SI_>N@8s(qUV_{LP!o~>bgGH~n=t~E!Ka^9CghGma38}U-TN%o< zk>{Xq>d8qrR+S92@A(OD_vYbjve*%A>`I_4I1NL!gkcpW9wJ$S0Qg2yiO63kSdWz0 zi5qq4*2`gFgmxJK1@^H-8!byQMDQ;8{{47UB9FKZD+=&rBIeZ;3yOb#QAnUPV<_Z3 z=d{x>_d#VDwb<=Q(QN zdOS@n?0cAP$Q}o$b!#`3L-3=@yn0B!;XCichTJ Date: Fri, 25 Dec 2020 17:22:33 +0100 Subject: [PATCH 005/138] Add a header with metadata to the mailindex. Close #50. --- archive/mailarchive.py | 59 ++++++++++++++++++++++++----- scripts/imap-to-archive.py | 3 +- tests/test_05_mailarchive_create.py | 4 +- tests/test_05_mailarchive_legacy.py | 1 - 4 files changed, 51 insertions(+), 16 deletions(-) diff --git a/archive/mailarchive.py b/archive/mailarchive.py index 462ecba..7342a12 100644 --- a/archive/mailarchive.py +++ b/archive/mailarchive.py @@ -1,21 +1,65 @@ +from distutils.version import StrictVersion import hashlib from mailbox import Maildir from pathlib import Path from tempfile import TemporaryDirectory, TemporaryFile import yaml from archive import Archive -from archive.tools import tmp_chdir, tmp_umask +from archive.tools import now_str, parse_date, tmp_chdir, tmp_umask + + +class MailIndex(list): + + Version = "1.1" + + def __init__(self, fileobj=None, items=None, server=None): + if fileobj: + docs = yaml.safe_load_all(fileobj) + try: + head = next(docs) + items = next(docs) + except StopIteration: + items = head + head = dict(Version="1.0") + super().__init__(items) + self.head = head + else: + if items: + super().__init__(items) + else: + super().__init__() + self.head = { + "Date": now_str(), + "Version": self.Version, + } + if server: + self.head["Server"] = server + + @property + def version(self): + return StrictVersion(self.head["Version"]) + + @property + def date(self): + return parse_date(self.head["Date"]) + + def write(self, fileobj): + fileobj.write("%YAML 1.1\n".encode("ascii")) + yaml.dump(self.head, stream=fileobj, encoding="ascii", + default_flow_style=False, explicit_start=True) + yaml.dump(list(self), stream=fileobj, encoding="ascii", + default_flow_style=False, explicit_start=True) class MailArchive(Archive): - def create(self, path, mails, compression='xz', comment=None): + def create(self, path, mails, compression='xz', server=None): path = Path.cwd() / path with TemporaryDirectory(prefix="mailarchive-") as tmpdir: with tmp_chdir(tmpdir), tmp_umask(0o077): basedir = Path(path.name.split('.')[0]) maildir = Maildir(str(basedir), create=True) - self.mailindex = [] + self.mailindex = MailIndex(server=server) last_folder = None for folder, msgbytes in mails: if folder != last_folder: @@ -36,12 +80,7 @@ def create(self, path, mails, compression='xz', comment=None): } self.mailindex.append(idx_item) with TemporaryFile(dir=tmpdir) as tmpf: - head = "%YAML 1.1\n" - if comment: - head += "# %s\n" % comment - tmpf.write(head.encode("ascii")) - yaml.dump(self.mailindex, stream=tmpf, encoding="ascii", - default_flow_style=False, explicit_start=True) + self.mailindex.write(tmpf) tmpf.seek(0) self.add_metadata(".mailindex.yaml", tmpf) super().create(path, compression, [basedir]) @@ -50,5 +89,5 @@ def create(self, path, mails, compression='xz', comment=None): def open(self, path): super().open(path) md = self.get_metadata(".mailindex.yaml") - self.mailindex = yaml.safe_load(md.fileobj) + self.mailindex = MailIndex(fileobj=md.fileobj) return self diff --git a/scripts/imap-to-archive.py b/scripts/imap-to-archive.py index 3197ae6..ab422ee 100644 --- a/scripts/imap-to-archive.py +++ b/scripts/imap-to-archive.py @@ -125,7 +125,6 @@ def getmsgs(imap, basedir): imap.starttls() imap.login(config['user'], config['pass']) log.debug("Login to %s successful", config['host']) - comment = "Fetched from %s at %s" % (config['host'], now_str()) archive = MailArchive() - archive.create(archive_path, getmsgs(imap, "INBOX"), comment=comment) + archive.create(archive_path, getmsgs(imap, "INBOX"), server=config['host']) diff --git a/tests/test_05_mailarchive_create.py b/tests/test_05_mailarchive_create.py index 6ca0647..21263e9 100644 --- a/tests/test_05_mailarchive_create.py +++ b/tests/test_05_mailarchive_create.py @@ -43,9 +43,8 @@ def test_create_mailarchive(tmpdir, monkeypatch, testcase): else: monkeypatch.chdir(str(tmpdir)) archive_path = "mailarchive-rel.tar.xz" - comment = "Test mail archive created at %s" % (now_str()) archive = MailArchive() - archive.create(archive_path, getmsgs(), comment=comment) + archive.create(archive_path, getmsgs(), server="imap.example.org") @pytest.mark.dependency() def test_verify_mailarchive(tmpdir, dep_testcase): @@ -67,7 +66,6 @@ def test_check_mailindex(tmpdir, dep_testcase): assert item['folder'] == folder @pytest.mark.dependency() -@pytest.mark.xfail(reason="Issue #50 not yet implemented") def test_check_mailindex_head(tmpdir, dep_testcase): archive_path = tmpdir / ("mailarchive-%s.tar.xz" % dep_testcase) with MailArchive().open(archive_path) as archive: diff --git a/tests/test_05_mailarchive_legacy.py b/tests/test_05_mailarchive_legacy.py index ce29ba9..bbed231 100644 --- a/tests/test_05_mailarchive_legacy.py +++ b/tests/test_05_mailarchive_legacy.py @@ -9,7 +9,6 @@ def legacy_1_0_archive(): return gettestdata("mailarchive-legacy-1_0.tar.xz") -@pytest.mark.xfail(reason="Issue #50 not yet implemented") def test_1_0_check_mailindex(legacy_1_0_archive): with MailArchive().open(legacy_1_0_archive) as archive: archive.verify() From f1b41a9bf186c050d7bb5ec1193e2c5753925447 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 25 Dec 2020 17:32:23 +0100 Subject: [PATCH 006/138] Add a few more checks on the mailindex metadata --- tests/test_05_mailarchive_create.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_05_mailarchive_create.py b/tests/test_05_mailarchive_create.py index 21263e9..47d6c7c 100644 --- a/tests/test_05_mailarchive_create.py +++ b/tests/test_05_mailarchive_create.py @@ -1,12 +1,13 @@ """Test creating a mail archive and check its content. """ +import datetime import email import pytest from pytest_dependency import depends import yaml from archive import Archive -from archive.mailarchive import MailArchive +from archive.mailarchive import MailIndex, MailArchive from archive.tools import now_str from conftest import gettestdata @@ -70,7 +71,11 @@ def test_check_mailindex_head(tmpdir, dep_testcase): archive_path = tmpdir / ("mailarchive-%s.tar.xz" % dep_testcase) with MailArchive().open(archive_path) as archive: assert archive.mailindex.head - assert archive.mailindex.version + assert set(archive.mailindex.head.keys()) == { + "Date", "Server", "Version" + } + assert isinstance(archive.mailindex.date, datetime.datetime) + assert archive.mailindex.version == MailIndex.Version @pytest.mark.dependency() def test_check_mail_messages(tmpdir, dep_testcase): From ecc11715321129811fa02c129f31db394fb8a74a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 25 Dec 2020 19:23:45 +0100 Subject: [PATCH 007/138] Update changelog --- CHANGES.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 3a8583c..24eb7f2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,12 +5,26 @@ Changelog 0.6 (not yet released) ~~~~~~~~~~~~~~~~~~~~~~ +New features +------------ + ++ `#50`_, `#51`_: Add a header with some metadata to the index in a + mail archive created by :class:`MailArchive`. + +Incompatible changes +-------------------- + ++ The `comment` keyword argument to :class:`MailArchive` has been + dropped, ref. `#51`_. + Bug fixes and minor changes --------------------------- + `#48`_: Review and standardize some error messages. .. _#48: https://github.com/RKrahl/archive-tools/pull/48 +.. _#50: https://github.com/RKrahl/archive-tools/issues/50 +.. _#51: https://github.com/RKrahl/archive-tools/pull/51 0.5.1 (2020-12-12) From f9eecd681063fff167e5c2fd4485291c620dea68 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 17 Apr 2021 15:53:44 +0200 Subject: [PATCH 008/138] Move (most parts of) get_config() from imap-to-archive.py into a new separate module --- archive/config.py | 30 ++++++++++++++++++++++++++++++ archive/exception.py | 9 ++++++--- scripts/imap-to-archive.py | 30 ++++-------------------------- 3 files changed, 40 insertions(+), 29 deletions(-) create mode 100644 archive/config.py diff --git a/archive/config.py b/archive/config.py new file mode 100644 index 0000000..421190a --- /dev/null +++ b/archive/config.py @@ -0,0 +1,30 @@ +"""Manage configuration. + +.. note:: + This module is intended as a helper for the internal use in some + command line scripts. It is not considered to be part of the API + of archive-tools. Most users will not need to use it directly or + even care about it. +""" + +from collections import ChainMap +import configparser +from archive.exception import ConfigError + +def get_config(args, defaults): + args_cfg_options = ('host', 'port', 'security', 'user') + args_cfg = { k:vars(args)[k] for k in args_cfg_options if vars(args)[k] } + config = ChainMap({}, args_cfg) + if args.config_section: + cp = configparser.ConfigParser() + if not cp.read(args.config_file): + raise ConfigError("configuration file %s not found" + % args.config_file) + try: + config.maps.append(cp[args.config_section]) + except KeyError: + raise ConfigError("configuration section %s not found" + % args.config_section) + config.maps.append(defaults) + return config + diff --git a/archive/exception.py b/archive/exception.py index 30bfc9c..57fe5b8 100644 --- a/archive/exception.py +++ b/archive/exception.py @@ -19,9 +19,6 @@ def __init__(self, *args): if hasattr(self, '__cause__'): self.__cause__ = None -class ArgError(_BaseException): - pass - class ArchiveError(_BaseException): pass @@ -52,3 +49,9 @@ def __init__(self, path, ftype): class ArchiveWarning(Warning): pass + +class ArgError(_BaseException): + pass + +class ConfigError(_BaseException): + pass diff --git a/scripts/imap-to-archive.py b/scripts/imap-to-archive.py index ab422ee..085b070 100644 --- a/scripts/imap-to-archive.py +++ b/scripts/imap-to-archive.py @@ -3,14 +3,14 @@ """ import argparse -from collections import ChainMap -import configparser import getpass import logging import os.path from pathlib import Path import sys from imapclient import IMAPClient +from archive.config import get_config +from archive.exception import ConfigError from archive.mailarchive import MailArchive from archive.tools import now_str @@ -54,25 +54,8 @@ if args.verbose: logging.getLogger().setLevel(logging.DEBUG) -class ConfigError(Exception): - pass - -def get_config(args, defaults): - args_cfg_options = ('host', 'port', 'security', 'user') - args_cfg = { k:vars(args)[k] for k in args_cfg_options if vars(args)[k] } - config = ChainMap({}, args_cfg) - if args.config_section: - cp = configparser.ConfigParser() - if not cp.read(args.config_file): - raise ConfigError("configuration file %s not found" - % args.config_file) - try: - config.maps.append(cp[args.config_section]) - except KeyError: - raise ConfigError("configuration section %s not found" - % args.config_section) - config.maps.append(defaults) - +try: + config = get_config(args, defaults) if config['security'] not in security_methods: raise ConfigError("invalid security method '%s'" % config['security']) if not config['host']: @@ -84,11 +67,6 @@ def get_config(args, defaults): raise ConfigError("IMAP4 user name not specified") if config['pass'] is None: config['pass'] = getpass.getpass() - - return config - -try: - config = get_config(args, defaults) except ConfigError as e: print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) sys.exit(2) From 468951c7536dac03d6353609ebf82b8c264bb9fc Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 18 Apr 2021 15:30:04 +0200 Subject: [PATCH 009/138] Convert get_config() into a class Config --- archive/config.py | 39 ++++++++++++++----------- scripts/imap-to-archive.py | 58 ++++++++++++++++++++++++-------------- 2 files changed, 60 insertions(+), 37 deletions(-) diff --git a/archive/config.py b/archive/config.py index 421190a..a88f7ae 100644 --- a/archive/config.py +++ b/archive/config.py @@ -11,20 +11,27 @@ import configparser from archive.exception import ConfigError -def get_config(args, defaults): - args_cfg_options = ('host', 'port', 'security', 'user') - args_cfg = { k:vars(args)[k] for k in args_cfg_options if vars(args)[k] } - config = ChainMap({}, args_cfg) - if args.config_section: - cp = configparser.ConfigParser() - if not cp.read(args.config_file): - raise ConfigError("configuration file %s not found" - % args.config_file) - try: - config.maps.append(cp[args.config_section]) - except KeyError: - raise ConfigError("configuration section %s not found" - % args.config_section) - config.maps.append(defaults) - return config +class Config: + defaults = dict() + config_file = None + args_options = () + + def __init__(self, args, config_section=None): + args_cfg = { k:vars(args)[k] + for k in self.args_options + if vars(args)[k] is not None } + self.config = ChainMap({}, args_cfg) + if self.config_file and config_section: + cp = configparser.ConfigParser(comment_prefixes=('#', '!')) + self.config_file = cp.read(self.config_file) + if isinstance(config_section, str): + config_section = (config_section,) + self.config_section = [] + for section in config_section: + try: + self.config.maps.append(cp[section]) + self.config_section.append(section) + except KeyError: + pass + self.config.maps.append(self.defaults) diff --git a/scripts/imap-to-archive.py b/scripts/imap-to-archive.py index 085b070..32a652a 100644 --- a/scripts/imap-to-archive.py +++ b/scripts/imap-to-archive.py @@ -9,7 +9,7 @@ from pathlib import Path import sys from imapclient import IMAPClient -from archive.config import get_config +import archive.config from archive.exception import ConfigError from archive.mailarchive import MailArchive from archive.tools import now_str @@ -19,15 +19,42 @@ log = logging.getLogger(__name__) security_methods = {'imaps', 'starttls'} - default_config_file = os.path.expanduser("~/.config/archive/imap.cfg") -defaults = { - 'host': None, - 'port': None, - 'security': 'imaps', - 'user': None, - 'pass': None, -} + +class Config(archive.config.Config): + + defaults = { + 'host': None, + 'port': None, + 'security': 'imaps', + 'user': None, + 'pass': None, + } + args_options = ('host', 'port', 'security', 'user') + + def __init__(self, args): + self.config_file = args.config_file + super().__init__(args, config_section=args.config_section) + if args.config_section: + if not self.config_file: + raise ConfigError("configuration file %s not found" + % args.config_file) + if not self.config_section: + raise ConfigError("configuration section %s not found" + % args.config_section) + if self.config['security'] not in security_methods: + raise ConfigError("invalid security method '%s'" + % self.config['security']) + if not self.config['host']: + raise ConfigError("IMAP4 host name not specified") + if self.config['port'] is not None: + self.config['port'] = int(config['port']) + self.config['ssl'] = self.config['security'] == 'imaps' + if not self.config['user']: + raise ConfigError("IMAP4 user name not specified") + if self.config['pass'] is None: + self.config['pass'] = getpass.getpass() + argparser = argparse.ArgumentParser(add_help=False) argparser.add_argument('--help', @@ -55,18 +82,7 @@ logging.getLogger().setLevel(logging.DEBUG) try: - config = get_config(args, defaults) - if config['security'] not in security_methods: - raise ConfigError("invalid security method '%s'" % config['security']) - if not config['host']: - raise ConfigError("IMAP4 host name not specified") - if config['port'] is not None: - config['port'] = int(config['port']) - config['ssl'] = config['security'] == 'imaps' - if not config['user']: - raise ConfigError("IMAP4 user name not specified") - if config['pass'] is None: - config['pass'] = getpass.getpass() + config = Config(args).config except ConfigError as e: print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) sys.exit(2) From afe5a7a48da96d4bd7468fe8173bbcc0686a2c87 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 18 Apr 2021 22:56:23 +0200 Subject: [PATCH 010/138] Fixup d470de2: include missing test data to source distribution --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index c7abdab..bf5a022 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include README.rst include tests/conftest.py include tests/data/.sha256 include tests/data/legacy-1_0.tar.gz +include tests/data/mailarchive-legacy-1_0.tar.xz include tests/data/mails.tar.gz include tests/data/manifest.yaml include tests/data/msg.txt From 53f2f5e4837c1ed2271f770734df2e9a5ccb6ece Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 18 Apr 2021 18:03:47 +0200 Subject: [PATCH 011/138] Add method Config.get() --- archive/config.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/archive/config.py b/archive/config.py index a88f7ae..365a5c0 100644 --- a/archive/config.py +++ b/archive/config.py @@ -23,7 +23,8 @@ def __init__(self, args, config_section=None): if vars(args)[k] is not None } self.config = ChainMap({}, args_cfg) if self.config_file and config_section: - cp = configparser.ConfigParser(comment_prefixes=('#', '!')) + cp = configparser.ConfigParser(comment_prefixes=('#', '!'), + interpolation=None) self.config_file = cp.read(self.config_file) if isinstance(config_section, str): config_section = (config_section,) @@ -35,3 +36,15 @@ def __init__(self, args, config_section=None): except KeyError: pass self.config.maps.append(self.defaults) + + def get(self, option, required=False, subst=True, split=False): + value = self.config[option] + if value is None: + if required: + raise ConfigError("%s not specified" % option) + else: + if subst: + value = value % self.config + if split: + value = value.split() + return value From 98e7fb5869d50bccb5eeea0cc64ab630e0011973 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 18 Apr 2021 22:40:56 +0200 Subject: [PATCH 012/138] Start backup-tool.py script: read configuration --- etc/backup.cfg | 38 +++++++++++++++++++ python-archive-tools.spec | 1 + scripts/backup-tool.py | 79 +++++++++++++++++++++++++++++++++++++++ setup.py | 4 +- 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 etc/backup.cfg create mode 100644 scripts/backup-tool.py diff --git a/etc/backup.cfg b/etc/backup.cfg new file mode 100644 index 0000000..f95ffbb --- /dev/null +++ b/etc/backup.cfg @@ -0,0 +1,38 @@ +# Configuration file for backup-tool. + +# backup-tool tries to read the following configuration sections in +# order: [/], [], [], where is the +# hostname of the local machine and is the argument to the +# --policy command line option. The default policy is "sys". For +# each configuration option, the first occurrence in any of these +# sections will be used. + +# The default policy sys +[sys] +! dirs = +! /etc +! /root +! /usr/local +! excludes = +! /root/.cache +! targetdir = /proj/backup/auto + +# The special policy user is used when the --user command line option is used. +[user] +! name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +! dirs = %(home)s +! excludes = +! %(home)s/.cache +! %(home)s/.thumbnails +! %(home)s/tmp +! targetdir = /proj/backup/auto + +# Override settings on a particular host +! [db-host] +! targetdir = /proj/db +! +! [db-host/sys] +! dirs = +! /etc +! /root +! /var/lib/mysql diff --git a/python-archive-tools.spec b/python-archive-tools.spec index 7589767..24f4b8e 100644 --- a/python-archive-tools.spec +++ b/python-archive-tools.spec @@ -53,6 +53,7 @@ python3 setup.py test %files %defattr(-,root,root) %doc README.rst +%config %{_sysconfdir}/backup.cfg %{python3_sitelib}/* %{_bindir}/* diff --git a/scripts/backup-tool.py b/scripts/backup-tool.py new file mode 100644 index 0000000..5f9a1e0 --- /dev/null +++ b/scripts/backup-tool.py @@ -0,0 +1,79 @@ +#! /usr/bin/python +"""Create a backup. +""" + +import argparse +import datetime +import logging +import os +from pathlib import Path +import pwd +import socket +import sys +import archive.config +from archive.exception import ConfigError +from archive.tools import now_str + +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +log = logging.getLogger(__name__) + +schedules = {'full', 'cumu', 'incr'} +def get_config_file(): + try: + return os.environ['BACKUP_CFG'] + except KeyError: + return "/etc/backup.cfg" + +class Config(archive.config.Config): + + defaults = { + 'dirs': None, + 'excludes': "", + 'targetdir': None, + 'backupdir': "%(targetdir)s", + 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", + 'tags': "", + } + config_file = get_config_file() + args_options = ('policy', 'user', 'schedule') + + def __init__(self, args): + host = socket.gethostname() + sections = ("%s/%s" % (host, args.policy), host, args.policy) + super().__init__(args, config_section=sections) + if not self.config_file: + raise ConfigError("configuration file %s not found" + % self.config_file) + self.config['host'] = host + self.config['date'] = datetime.date.today().strftime("%y%m%d") + if args.user: + try: + self.config['home'] = pwd.getpwnam(self.config['user']).pw_dir + except KeyError: + pass + self.config['name'] = self.get('name', required=True) + self.config['dirs'] = self.get('dirs', required=True, split=True) + self.config['excludes'] = self.get('excludes', split=True) + self.config['targetdir'] = self.get('targetdir', required=True) + self.config['backupdir'] = self.get('backupdir') + self.config['tags'] = self.get('tags', split=True) + +argparser = argparse.ArgumentParser() +clsgrp = argparser.add_mutually_exclusive_group() +clsgrp.add_argument('--policy', default='sys') +clsgrp.add_argument('--user') +argparser.add_argument('--schedule', choices=schedules, default='full') +argparser.add_argument('-v', '--verbose', action='store_true', + help=("verbose diagnostic output")) +args = argparser.parse_args() + +if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) +if args.user: + args.policy = 'user' + +try: + config = Config(args).config +except ConfigError as e: + print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) + sys.exit(2) diff --git a/setup.py b/setup.py index a587a96..3f78763 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,9 @@ def run(self): license = "Apache-2.0", requires = ["PyYAML"], packages = ["archive", "archive.cli"], - scripts = ["scripts/archive-tool.py", "scripts/imap-to-archive.py"], + scripts = ["scripts/archive-tool.py", "scripts/backup-tool.py", + "scripts/imap-to-archive.py"], + data_files = [("/etc", ["etc/backup.cfg"])], classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: System Administrators", From c9a8e324d65e41fd5e449147d88a10375a09660b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 13:55:17 +0200 Subject: [PATCH 013/138] Add a test for Issue #53 --- tests/test_03_create_workdir.py | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 tests/test_03_create_workdir.py diff --git a/tests/test_03_create_workdir.py b/tests/test_03_create_workdir.py new file mode 100644 index 0000000..1f72667 --- /dev/null +++ b/tests/test_03_create_workdir.py @@ -0,0 +1,41 @@ +"""Tests passing the workdir keyword argument to Archive.create(). +""" + +from pathlib import Path +import pytest +from archive import Archive +from conftest import * + + +testdata = [ + DataDir(Path("base"), 0o755), + DataDir(Path("base", "data"), 0o755), + DataFile(Path("base", "data", "rnd.dat"), 0o644), +] + +@pytest.fixture(scope="module") +def test_dir(tmpdir): + setup_testdata(tmpdir / "work", testdata) + return tmpdir + + +@pytest.mark.parametrize("abs_wd", [ + True, + pytest.param(False, marks=pytest.mark.xfail(raises=FileNotFoundError, + reason="Issue #53")) +], ids=absflag) +def test_create_workdir(test_dir, monkeypatch, abs_wd): + """Pass an absolute or relative workdir to Archive.create(). + (Issue #53) + """ + monkeypatch.chdir(str(test_dir)) + if abs_wd: + workdir = test_dir / "work" + else: + workdir = Path("work") + archive_path = Path(archive_name(tags=[absflag(abs_wd)])) + Archive().create(archive_path, "", [Path("base")], workdir=workdir) + with Archive().open(workdir / archive_path) as archive: + assert archive.basedir == Path("base") + check_manifest(archive.manifest, testdata) + archive.verify() From ad7c3784ad1296adef1b15496d6f6c343af75a8a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 14:06:00 +0200 Subject: [PATCH 014/138] Fix #53 --- archive/archive.py | 2 +- tests/test_03_create_workdir.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index c62443a..3806590 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -70,7 +70,7 @@ def create(self, path, compression, paths, mode = 'x:' + compression if workdir: with tmp_chdir(workdir): - self._create(workdir / path, mode, paths, + self._create(path, mode, paths, basedir, excludes, dedup, tags) else: self._create(path, mode, paths, basedir, excludes, dedup, tags) diff --git a/tests/test_03_create_workdir.py b/tests/test_03_create_workdir.py index 1f72667..7e48f74 100644 --- a/tests/test_03_create_workdir.py +++ b/tests/test_03_create_workdir.py @@ -19,11 +19,7 @@ def test_dir(tmpdir): return tmpdir -@pytest.mark.parametrize("abs_wd", [ - True, - pytest.param(False, marks=pytest.mark.xfail(raises=FileNotFoundError, - reason="Issue #53")) -], ids=absflag) +@pytest.mark.parametrize("abs_wd", [ True, False ], ids=absflag) def test_create_workdir(test_dir, monkeypatch, abs_wd): """Pass an absolute or relative workdir to Archive.create(). (Issue #53) From 6952518957d9f48c2c0df05bf3b61f5c5dddc110 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 14:20:46 +0200 Subject: [PATCH 015/138] Fix outdated doc string: Archive._check_paths() does not accept strings any more. Ref. #36. --- archive/archive.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 3806590..14bd618 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -115,8 +115,7 @@ def _create(self, path, mode, paths, basedir, excludes, dedup, tags): def _check_paths(self, paths, basedir, excludes): """Check the paths to be added to an archive for several error - conditions. Accept a list of either strings or path-like - objects. Convert them to a list of Path objects. Also sets + conditions. Accept a list of path-like objects. Also sets self.basedir. """ if not paths: From 86f52d504cdba6c53379e338f683f1b447d2bb92 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 14:46:52 +0200 Subject: [PATCH 016/138] Minor refactor of handling workdir in Archive.create() --- archive/archive.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 14bd618..9ecc29f 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -3,6 +3,7 @@ from enum import Enum import itertools +import os from pathlib import Path import stat import sys @@ -10,7 +11,7 @@ import tempfile from archive.manifest import Manifest from archive.exception import * -from archive.tools import tmp_chdir, checksum +from archive.tools import checksum def _is_normalized(p): """Check if the path is normalized. @@ -68,12 +69,15 @@ def create(self, path, compression, paths, mode = 'w:' + compression else: mode = 'x:' + compression - if workdir: - with tmp_chdir(workdir): - self._create(path, mode, paths, - basedir, excludes, dedup, tags) - else: + save_wd = None + try: + if workdir: + save_wd = os.getcwd() + os.chdir(str(workdir)) self._create(path, mode, paths, basedir, excludes, dedup, tags) + finally: + if save_wd: + os.chdir(save_wd) return self def _create(self, path, mode, paths, basedir, excludes, dedup, tags): From 012c1534eac8a48b4c8969e974282e2026c00e35 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 15:24:50 +0200 Subject: [PATCH 017/138] Minor refactor of Archive._check_paths() --- archive/archive.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 9ecc29f..28a4212 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -124,12 +124,12 @@ def _check_paths(self, paths, basedir, excludes): """ if not paths: raise ArchiveCreateError("refusing to create an empty archive") + abspath = paths[0].is_absolute() if not basedir: - p = paths[0] - if p.is_absolute(): + if abspath: self.basedir = Path(self.path.name.split('.')[0]) else: - self.basedir = Path(p.parts[0]) + self.basedir = Path(paths[0].parts[0]) else: self.basedir = basedir if self.basedir.is_absolute(): @@ -140,17 +140,13 @@ def _check_paths(self, paths, basedir, excludes): # The same rules for paths also apply to excludes, if # provided. So we may just iterate over the chain of both # lists. - abspath = None for p in itertools.chain(paths, excludes or ()): if not _is_normalized(p): raise ArchiveCreateError("invalid path '%s': " "must be normalized" % p) - if abspath is None: - abspath = p.is_absolute() - else: - if abspath != p.is_absolute(): - raise ArchiveCreateError("mixing of absolute and relative " - "paths is not allowed") + if abspath != p.is_absolute(): + raise ArchiveCreateError("mixing of absolute and relative " + "paths is not allowed") if not p.is_absolute(): try: # This will raise ValueError if p does not start From 99a82368c18efac529b982d4a092d69ab006db78 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 15:59:43 +0200 Subject: [PATCH 018/138] Add a command line argument --directory to archive-tool create --- archive/cli/create.py | 5 ++++- tests/test_04_cli_create_misc.py | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/archive/cli/create.py b/archive/cli/create.py index 44a4d39..bf5fc4d 100644 --- a/archive/cli/create.py +++ b/archive/cli/create.py @@ -23,13 +23,16 @@ def create(args): if args.compression == 'none': args.compression = '' archive = Archive().create(args.archive, args.compression, args.files, - basedir=args.basedir, excludes=args.exclude, + basedir=args.basedir, workdir=args.directory, + excludes=args.exclude, dedup=DedupMode(args.deduplicate), tags=args.tag) return 0 def add_parser(subparsers): parser = subparsers.add_parser('create', help="create the archive") + parser.add_argument('--directory', type=Path, + help=("change directory prior creating the archive")) parser.add_argument('--tag', action='append', help=("user defined tags to mark the archive")) parser.add_argument('--compression', diff --git a/tests/test_04_cli_create_misc.py b/tests/test_04_cli_create_misc.py index f361dd5..c429a65 100644 --- a/tests/test_04_cli_create_misc.py +++ b/tests/test_04_cli_create_misc.py @@ -42,3 +42,12 @@ def test_cli_create_tags(test_dir, monkeypatch, tags, expected): with Archive().open(archive_path) as archive: assert archive.manifest.tags == expected check_manifest(archive.manifest, testdata) + +def test_cli_create_directory(test_dir): + """Change the working directory using the --directory argument. + """ + archive_path = archive_name(tags=["dir"]) + args = ["create", "--directory", str(test_dir), archive_path, "base"] + callscript("archive-tool.py", args) + with Archive().open(test_dir / archive_path) as archive: + check_manifest(archive.manifest, testdata) From f8cb279ab253225fb7d798b24480d594cae04144 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 1 May 2021 20:26:07 +0200 Subject: [PATCH 019/138] Move checking the paths and the creation of the manifest out of Archive._create() into the caller Archive.create() --- archive/archive.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 28a4212..a752cd2 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -74,20 +74,20 @@ def create(self, path, compression, paths, if workdir: save_wd = os.getcwd() os.chdir(str(workdir)) - self._create(path, mode, paths, basedir, excludes, dedup, tags) + self.path = path + self._check_paths(paths, basedir, excludes) + self.manifest = Manifest(paths=paths, excludes=excludes, tags=tags) + self.manifest.add_metadata(self.basedir / ".manifest.yaml") + for md in self._metadata: + md.set_path(self.basedir) + self.manifest.add_metadata(md.path) + self._create(mode, dedup) finally: if save_wd: os.chdir(save_wd) return self - def _create(self, path, mode, paths, basedir, excludes, dedup, tags): - self.path = path - self._check_paths(paths, basedir, excludes) - self.manifest = Manifest(paths=paths, excludes=excludes, tags=tags) - self.manifest.add_metadata(self.basedir / ".manifest.yaml") - for md in self._metadata: - md.set_path(self.basedir) - self.manifest.add_metadata(md.path) + def _create(self, mode, dedup): with tarfile.open(str(self.path), mode) as tarf: with tempfile.TemporaryFile() as tmpf: self.manifest.write(tmpf) From d4ae402f929d707739fe6d259ad1a81f43e798df Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 2 May 2021 17:41:08 +0200 Subject: [PATCH 020/138] Add a new keyword argument fileinfos that Manifest() and Archive.create() accept in place of paths and excludes --- archive/archive.py | 18 ++++++-- archive/manifest.py | 14 ++++-- tests/test_01_manifest.py | 44 ++++++++++++------ tests/test_03_create_fileinfos.py | 77 +++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 23 deletions(-) create mode 100644 tests/test_03_create_fileinfos.py diff --git a/archive/archive.py b/archive/archive.py index a752cd2..01042ad 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -1,6 +1,7 @@ """Provide the Archive class. """ +from collections.abc import Sequence from enum import Enum import itertools import os @@ -60,8 +61,8 @@ def __init__(self): self._file = None self._metadata = [] - def create(self, path, compression, paths, - basedir=None, workdir=None, excludes=None, + def create(self, path, compression, paths=None, fileinfos=None, + basedir=None, workdir=None, excludes=None, dedup=DedupMode.LINK, tags=None): if sys.version_info < (3, 5): # The 'x' (exclusive creation) mode was added to tarfile @@ -75,8 +76,15 @@ def create(self, path, compression, paths, save_wd = os.getcwd() os.chdir(str(workdir)) self.path = path - self._check_paths(paths, basedir, excludes) - self.manifest = Manifest(paths=paths, excludes=excludes, tags=tags) + if fileinfos is not None: + if not isinstance(fileinfos, Sequence): + fileinfos = list(fileinfos) + self._check_paths([fi.path for fi in fileinfos], basedir) + self.manifest = Manifest(fileinfos=fileinfos, tags=tags) + else: + self._check_paths(paths, basedir, excludes) + self.manifest = Manifest(paths=paths, excludes=excludes, + tags=tags) self.manifest.add_metadata(self.basedir / ".manifest.yaml") for md in self._metadata: md.set_path(self.basedir) @@ -117,7 +125,7 @@ def _create(self, mode, dedup): else: tarf.add(str(p), arcname=name, recursive=False) - def _check_paths(self, paths, basedir, excludes): + def _check_paths(self, paths, basedir, excludes=None): """Check the paths to be added to an archive for several error conditions. Accept a list of path-like objects. Also sets self.basedir. diff --git a/archive/manifest.py b/archive/manifest.py index 89cd7ef..8ea134f 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -144,14 +144,15 @@ class Manifest(Sequence): Version = "1.1" - def __init__(self, fileobj=None, paths=None, excludes=None, tags=None): + def __init__(self, fileobj=None, paths=None, excludes=None, + fileinfos=None, tags=None): if fileobj is not None: docs = yaml.safe_load_all(fileobj) self.head = next(docs) # Legacy: version 1.0 head did not have Metadata: self.head.setdefault("Metadata", []) self.fileinfos = [ FileInfo(data=d) for d in next(docs) ] - elif paths is not None: + elif paths is not None or fileinfos is not None: self.head = { "Checksums": FileInfo.Checksums, "Date": now_str(), @@ -161,10 +162,13 @@ def __init__(self, fileobj=None, paths=None, excludes=None, tags=None): } if tags is not None: self.head["Tags"] = tags - fileinfos = FileInfo.iterpaths(paths, set(excludes or ())) - self.fileinfos = sorted(fileinfos, key=lambda fi: fi.path) + if fileinfos is None: + fileinfos = FileInfo.iterpaths(paths, set(excludes or ())) + self.fileinfos = list(fileinfos) + self.sort() else: - raise TypeError("Either fileobj or paths must be provided") + raise TypeError("Either fileobj or paths or fileinfos " + "must be provided") def __len__(self): return len(self.fileinfos) diff --git a/tests/test_01_manifest.py b/tests/test_01_manifest.py index 82ccea8..6f31110 100644 --- a/tests/test_01_manifest.py +++ b/tests/test_01_manifest.py @@ -26,34 +26,34 @@ def test_dir(tmpdir): return tmpdir -def test_manifest_from_paths(test_dir, monkeypatch): - """Create a manifest reading the files in test_dir. +def test_manifest_from_fileobj(): + """Read a manifest from a YAML file. """ - monkeypatch.chdir(str(test_dir)) - manifest = Manifest(paths=[Path("base")]) + with gettestdata("manifest.yaml").open("rt") as f: + manifest = Manifest(fileobj=f) head = manifest.head assert set(head.keys()) == { "Checksums", "Date", "Generator", "Metadata", "Version" } - assert manifest.version == Manifest.Version + assert manifest.version == "1.1" assert isinstance(manifest.date, datetime.datetime) - assert manifest.checksums == tuple(FileInfo.Checksums) + assert manifest.checksums == ("sha256",) assert manifest.tags == () check_manifest(manifest, testdata) -def test_manifest_from_fileobj(): - """Read a manifest from a YAML file. +def test_manifest_from_paths(test_dir, monkeypatch): + """Create a manifest reading the files in test_dir. """ - with gettestdata("manifest.yaml").open("rt") as f: - manifest = Manifest(fileobj=f) + monkeypatch.chdir(str(test_dir)) + manifest = Manifest(paths=[Path("base")]) head = manifest.head assert set(head.keys()) == { "Checksums", "Date", "Generator", "Metadata", "Version" } - assert manifest.version == "1.1" + assert manifest.version == Manifest.Version assert isinstance(manifest.date, datetime.datetime) - assert manifest.checksums == ("sha256",) + assert manifest.checksums == tuple(FileInfo.Checksums) assert manifest.tags == () check_manifest(manifest, testdata) @@ -115,7 +115,25 @@ def test_manifest_exclude_explicit_include(test_dir, monkeypatch): data = sub_testdata(testdata, excludes[0], paths[1]) check_manifest(manifest, data) -def test_mnifest_sort(test_dir, monkeypatch): + +def test_manifest_from_fileinfos(test_dir, monkeypatch): + """Create a manifest providing an iterable of fileinfos. + """ + monkeypatch.chdir(str(test_dir)) + fileinfos = FileInfo.iterpaths([Path("base")], set()) + manifest = Manifest(fileinfos=fileinfos) + head = manifest.head + assert set(head.keys()) == { + "Checksums", "Date", "Generator", "Metadata", "Version" + } + assert manifest.version == Manifest.Version + assert isinstance(manifest.date, datetime.datetime) + assert manifest.checksums == tuple(FileInfo.Checksums) + assert manifest.tags == () + check_manifest(manifest, testdata) + + +def test_manifest_sort(test_dir, monkeypatch): """Test the Manifest.sort() method. """ monkeypatch.chdir(str(test_dir)) diff --git a/tests/test_03_create_fileinfos.py b/tests/test_03_create_fileinfos.py new file mode 100644 index 0000000..bc3d87b --- /dev/null +++ b/tests/test_03_create_fileinfos.py @@ -0,0 +1,77 @@ +"""Test creating an archive from an iterable of FileInfo objects. +""" + +from pathlib import Path +import pytest +from archive import Archive +from archive.manifest import FileInfo, Manifest +from conftest import * + + +# Setup a directory with some test data to be put into an archive. +# Make sure that we have all kind of different things in there. +testdata = [ + DataDir(Path("base"), 0o755, mtime=1565100853), + DataDir(Path("base", "data"), 0o750, mtime=1555271302), + DataDir(Path("base", "empty"), 0o755, mtime=1547911753), + DataFile(Path("base", "msg.txt"), 0o644, mtime=1547911753), + DataFile(Path("base", "data", "rnd.dat"), 0o600, mtime=1563112510), + DataSymLink(Path("base", "s.dat"), Path("data", "rnd.dat"), + mtime=1565100853), +] +sha256sum = "sha256sum" + +@pytest.fixture(scope="module") +def test_dir(tmpdir): + setup_testdata(tmpdir, testdata) + return tmpdir + + +def test_create_fileinfos_list(test_dir, monkeypatch): + """Create the archive from a list of FileInfo objects. + """ + monkeypatch.chdir(str(test_dir)) + fileinfos = list(FileInfo.iterpaths([Path("base")], set())) + archive_path = Path("archive-fi-list.tar") + Archive().create(archive_path, "", fileinfos=fileinfos) + with Archive().open(archive_path) as archive: + check_manifest(archive.manifest, testdata) + archive.verify() + +def test_create_fileinfos_generator(test_dir, monkeypatch): + """Create the archive from FileInfo.iterpaths() which returns a generator. + """ + monkeypatch.chdir(str(test_dir)) + fileinfos = FileInfo.iterpaths([Path("base")], set()) + archive_path = Path("archive-fi-generator.tar") + Archive().create(archive_path, "", fileinfos=fileinfos) + with Archive().open(archive_path) as archive: + check_manifest(archive.manifest, testdata) + archive.verify() + +def test_create_fileinfos_manifest(test_dir, monkeypatch): + """Create the archive from a Manifest. + A Manifest is an iterable of FileInfo objects. + """ + monkeypatch.chdir(str(test_dir)) + manifest = Manifest(paths=[Path("base")]) + archive_path = Path("archive-fi-manifest.tar") + Archive().create(archive_path, "", fileinfos=manifest) + with Archive().open(archive_path) as archive: + check_manifest(archive.manifest, testdata) + archive.verify() + +def test_create_fileinfos_subset(test_dir, monkeypatch): + """Do not include the content of a directory. + This test verifies that creating an archive from fileinfos does + not implicitly descend subdirectories. + """ + monkeypatch.chdir(str(test_dir)) + excludes = [Path("base", "data", "rnd.dat")] + fileinfos = FileInfo.iterpaths([Path("base")], set(excludes)) + data = sub_testdata(testdata, excludes[0]) + archive_path = Path("archive-fi-subset.tar") + Archive().create(archive_path, "", fileinfos=fileinfos) + with Archive().open(archive_path) as archive: + check_manifest(archive.manifest, data) + archive.verify() From b6ed9c919a491f0fb0227438ba06ecb016fae6dd Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 2 May 2021 20:29:05 +0200 Subject: [PATCH 021/138] Update changelog --- CHANGES.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 24eb7f2..84b2ffa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,13 @@ Changelog New features ------------ ++ `#54`_: Add a command line flags `--directory ` to + `archive-tool create`. The script will change into this directory + prior creating the archive if provided. + ++ `#54`_: Add a new keyword argument `fileinfos` that + :class:`Manifest` and :meth:`Archive.create` accept. + + `#50`_, `#51`_: Add a header with some metadata to the index in a mail archive created by :class:`MailArchive`. @@ -20,11 +27,17 @@ Incompatible changes Bug fixes and minor changes --------------------------- ++ `#53`_, `#54`_: Spurious :exc:`FileNotFoundError` from + :meth:`Archive.create` when passing a relative path as `workdir` + argument. + + `#48`_: Review and standardize some error messages. .. _#48: https://github.com/RKrahl/archive-tools/pull/48 .. _#50: https://github.com/RKrahl/archive-tools/issues/50 .. _#51: https://github.com/RKrahl/archive-tools/pull/51 +.. _#53: https://github.com/RKrahl/archive-tools/issues/53 +.. _#54: https://github.com/RKrahl/archive-tools/pull/54 0.5.1 (2020-12-12) From 3e3664fd11f80e111ae8a91b171148e507f5ebea Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 3 May 2021 09:10:57 +0200 Subject: [PATCH 022/138] Fix: should verify that fileinfos passed to Manifest() actually contain the checksums advertised in the manifest header. --- archive/manifest.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/archive/manifest.py b/archive/manifest.py index 8ea134f..6b39ef7 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -163,8 +163,15 @@ def __init__(self, fileobj=None, paths=None, excludes=None, if tags is not None: self.head["Tags"] = tags if fileinfos is None: - fileinfos = FileInfo.iterpaths(paths, set(excludes or ())) - self.fileinfos = list(fileinfos) + fileinfos = list(FileInfo.iterpaths(paths, set(excludes or ()))) + else: + fileinfos = list(fileinfos) + cs = set(FileInfo.Checksums) + for fi in fileinfos: + if fi.is_file() and not cs.issubset(fi.checksum.keys()): + raise ValueError("Missing checksum on item %s" + % fi.path) + self.fileinfos = fileinfos self.sort() else: raise TypeError("Either fileobj or paths or fileinfos " From 39e80f94c7c9bd00beba042ae8876ef94ce290ad Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 3 May 2021 21:50:02 +0200 Subject: [PATCH 023/138] Raise an ArchiveCreateError if the fileinfos passed to Archive.create() do not contain the proper checksums. --- archive/archive.py | 5 ++++- tests/test_03_create_errors.py | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/archive/archive.py b/archive/archive.py index 01042ad..7ccb9ee 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -80,7 +80,10 @@ def create(self, path, compression, paths=None, fileinfos=None, if not isinstance(fileinfos, Sequence): fileinfos = list(fileinfos) self._check_paths([fi.path for fi in fileinfos], basedir) - self.manifest = Manifest(fileinfos=fileinfos, tags=tags) + try: + self.manifest = Manifest(fileinfos=fileinfos, tags=tags) + except ValueError as e: + raise ArchiveCreateError("invalid fileinfos: %s" % e) else: self._check_paths(paths, basedir, excludes) self.manifest = Manifest(paths=paths, excludes=excludes, diff --git a/tests/test_03_create_errors.py b/tests/test_03_create_errors.py index 04564f2..4fd3bb0 100644 --- a/tests/test_03_create_errors.py +++ b/tests/test_03_create_errors.py @@ -6,6 +6,7 @@ import pytest from archive import Archive from archive.exception import ArchiveCreateError +from archive.manifest import FileInfo, Manifest from conftest import * @@ -134,3 +135,41 @@ def test_create_metadata_vs_content(test_dir, testname, monkeypatch): archive.add_metadata("msg.txt", tmpf) archive.create(Path(name), "", [p]) assert "filename is reserved" in str(err.value) + +def test_create_fileinfos_missing_checksum(test_dir, testname, monkeypatch): + """When an archive is created from precompiled fileinfos, + they must already contain suitable checksums. + """ + monkeypatch.chdir(str(test_dir)) + name = archive_name(tags=[testname]) + with monkeypatch.context() as m: + m.setattr(FileInfo, "Checksums", ['md5']) + fileinfos = list(FileInfo.iterpaths([Path("base")], set())) + # Checksums are calculated lazily, we must explicitely access + # the attribute while monkeypatching FileInfo.Checksums is + # active. + for fi in fileinfos: + if fi.is_file(): + assert set(fi.checksum.keys()) == {'md5'} + with pytest.raises(ArchiveCreateError) as err: + Archive().create(Path(name), "", fileinfos=fileinfos) + assert "Missing checksum" in str(err.value) + +def test_create_manifest_missing_checksum(test_dir, testname, monkeypatch): + """Same as last test, but now creating the archive from a precompiled + manifest. + """ + monkeypatch.chdir(str(test_dir)) + name = archive_name(tags=[testname]) + with monkeypatch.context() as m: + m.setattr(FileInfo, "Checksums", ['md5']) + manifest = Manifest(paths=[Path("base")]) + # Checksums are calculated lazily, we must explicitely access + # the attribute while monkeypatching FileInfo.Checksums is + # active. + for fi in manifest: + if fi.is_file(): + assert set(fi.checksum.keys()) == {'md5'} + with pytest.raises(ArchiveCreateError) as err: + Archive().create(Path(name), "", fileinfos=manifest) + assert "Missing checksum" in str(err.value) From facb2bcb6d4a64743a7702774b7e80fc221976f1 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 9 May 2021 21:02:13 +0200 Subject: [PATCH 024/138] Add a function diff_manifest() --- archive/manifest.py | 92 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/archive/manifest.py b/archive/manifest.py index 89cd7ef..ecabbc1 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -4,6 +4,7 @@ from collections.abc import Sequence import datetime from distutils.version import StrictVersion +from enum import Enum import grp import os from pathlib import Path @@ -16,6 +17,18 @@ from archive.tools import now_str, parse_date, checksum, mode_ft, ft_mode +class DiffStatus(Enum): + """Status of an item as the result of comparing two iterables of FileInfo. + + See :func:`diff_manifest` for the semantic of the values. + """ + MATCH = 0 + META = 1 + CONTENT = 2 + MISSING_A = 3 + MISSING_B = 4 + + class FileInfo: Checksums = ['sha256'] @@ -214,3 +227,82 @@ def sort(self, *, key=None, reverse=False): if key is None: key = lambda fi: fi.path self.fileinfos.sort(key=key, reverse=reverse) + +def diff_manifest(manifest_a, manifest_b, checksum=FileInfo.Checksums[0]): + """Compare two iterables of :class:`~archive.manifest.FileInfo` objects. + + Items are matched by the :attr:`~archive.manifest.FileInfo.path`. + For each pair `fi_a` and `fi_b` of FileInfo objects with matching + path from `manifest_a` and `manifest_b` respectively, yield a + tuple (`status`, `fi_a`, `fi_b`), where `status` is a + :class:`~archive.manifest.DiffStatus`. The value of `status` will + be :const:`~archive.manifest.DiffStatus.CONTENT` if + :attr:`~archive.manifest.FileInfo.type` differ, or if both `fi_a` + and `fi_b` represent regular files and checksum differ, or if + `fi_a` and `fi_b` represent symbolic links and target differ. If + `fi_a` and `fi_b` represent regular files and there are mismatches + in any other metadata, `status` will be + :const:`~archive.manifest.DiffStatus.META`. It will be + :const:`~archive.manifest.DiffStatus.MATCH` if `fi_a` and `fi_b` + fully coincide. If an item `fi_a` from `manifest_a` has no match + in `manifest_b`, yield + (:const:`~archive.manifest.DiffStatus.MISSING_B`, `fi_a`, :const:`None`). + Accordingly, yield + (:const:`~archive.manifest.DiffStatus.MISSING_A`, :const:`None`, `fi_b`), + if there is no match for `fi_b`. + + It is assumed that `manifest_a` and `manifest_b` are sorted by + path. Spurious mismatches will be reported if this is not the + case. + """ + def _next(it): + try: + return next(it) + except StopIteration: + return None + + def _match(fi_a, fi_b, algorithm): + assert fi_a.path == fi_b.path + if fi_a.type != fi_b.type: + return DiffStatus.CONTENT + elif fi_a.type == "l": + if fi_a.target != fi_b.target: + return DiffStatus.CONTENT + elif fi_a.type == "f": + # Note: we don't need to compare the size, because if + # the size differs, it's mostly certain that also the + # checksum do. + if fi_a.checksum[algorithm] != fi_b.checksum[algorithm]: + return DiffStatus.CONTENT + elif (fi_a.uid != fi_b.uid or + fi_a.uname != fi_b.uname or + fi_a.gid != fi_b.gid or + fi_a.gname != fi_b.gname or + fi_a.mode != fi_b.mode or + int(fi_a.mtime) != int(fi_b.mtime)): + return DiffStatus.META + return DiffStatus.MATCH + + it_a = iter(manifest_a) + it_b = iter(manifest_b) + fi_a = _next(it_a) + fi_b = _next(it_b) + while True: + if fi_a is None and fi_b is None: + break + elif fi_a is None: + yield (DiffStatus.MISSING_A, None, fi_b) + fi_b = _next(it_b) + elif fi_b is None: + yield (DiffStatus.MISSING_B, fi_a, None) + fi_a = _next(it_a) + elif fi_a.path > fi_b.path: + yield (DiffStatus.MISSING_A, None, fi_b) + fi_b = _next(it_b) + elif fi_b.path > fi_a.path: + yield (DiffStatus.MISSING_B, fi_a, None) + fi_a = _next(it_a) + else: + yield (_match(fi_a, fi_b, checksum), fi_a, fi_b) + fi_a = _next(it_a) + fi_b = _next(it_b) From b31c5d2ce977716cbe535ef03ab9c0b61998c8ce Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 9 May 2021 21:44:02 +0200 Subject: [PATCH 025/138] Add a test for Issue #55 --- tests/test_04_cli_diff.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index 8e4d400..a614531 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -299,3 +299,28 @@ def test_diff_dircontent(test_data, testname, monkeypatch, abspath): out = list(get_output(f)) assert len(out) == 1 assert out[0] == "Only in %s: %s" % (archive_ref_path, pd) + +@pytest.mark.xfail(reason="Issue #55") +@pytest.mark.parametrize("abspath", [False, True]) +def test_diff_extrafile_end(test_data, testname, monkeypatch, abspath): + """The first archives has an extra entry as last item. Ref. #55 + """ + monkeypatch.chdir(str(test_data)) + if abspath: + archive_ref_path = Path("archive-abs.tar") + base_dir = test_data / "base" + else: + archive_ref_path = Path("archive-rel.tar") + base_dir = Path("base") + p = base_dir / "zzz.dat" + shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + flag = absflag(abspath) + archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) + Archive().create(archive_path, "bz2", [base_dir]) + with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + args = ["diff", str(archive_path), str(archive_ref_path)] + callscript("archive-tool.py", args, returncode=102, stdout=f) + f.seek(0) + out = list(get_output(f)) + assert len(out) == 1 + assert out[0] == "Only in %s: %s" % (archive_path, p) From ba668e5f532741e18365e933cc43a362e051fe92 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 11 May 2021 21:15:38 +0200 Subject: [PATCH 026/138] Add tests for the function diff_manifest() --- tests/test_01_diff_manifest.py | 230 +++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 tests/test_01_diff_manifest.py diff --git a/tests/test_01_diff_manifest.py b/tests/test_01_diff_manifest.py new file mode 100644 index 0000000..82c8136 --- /dev/null +++ b/tests/test_01_diff_manifest.py @@ -0,0 +1,230 @@ +"""Test diff_manifest() function in archive.manifest. +""" + +from pathlib import Path +import shutil +from tempfile import TemporaryFile +from archive.archive import Archive +from archive.manifest import DiffStatus, FileInfo, diff_manifest +import pytest +from conftest import * + + +# Setup a directory with some test data to be put into an archive. +# Make sure that we have all kind of different things in there. +testdata = [ + DataDir(Path("base"), 0o755), + DataDir(Path("base", "data"), 0o750), + DataDir(Path("base", "empty"), 0o755), + DataFile(Path("base", "msg.txt"), 0o644), + DataFile(Path("base", "data", "rnd.dat"), 0o600), + DataFile(Path("base", "rnd.dat"), 0o600), + DataSymLink(Path("base", "s.dat"), Path("data", "rnd.dat")), +] + +def get_fileinfos(base): + fileinfos = FileInfo.iterpaths([base], set()) + return sorted(fileinfos, key = lambda fi: fi.path) + +def non_match(t): + return t[0] != DiffStatus.MATCH + +@pytest.fixture(scope="module") +def test_dir(tmpdir): + setup_testdata(tmpdir, testdata) + Archive().create(Path("archive.tar"), "", [Path("base")], workdir=tmpdir) + return tmpdir + +@pytest.fixture(scope="function") +def test_data(request, test_dir): + shutil.rmtree(str(test_dir / "base"), ignore_errors=True) + with Archive().open(test_dir / "archive.tar") as archive: + archive.extract(test_dir) + return test_dir + +def test_diff_manifest_equal(test_data, testname, monkeypatch): + """Diff two fileinfo lists having equal content. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert diff == [] + +def test_diff_manifest_metadata(test_data, testname, monkeypatch): + """Diff two fileinfo lists having one file's metadata modified. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + p = base_dir / "rnd.dat" + p.chmod(0o0444) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 1 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.META + assert fi_a.type == fi_b.type == 'f' + assert fi_a.path == fi_b.path == p + +def test_diff_manifest_modified_file(test_data, testname, monkeypatch): + """Diff two fileinfo lists having one file's content modified. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + p = base_dir / "rnd.dat" + shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 1 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.CONTENT + assert fi_a.type == fi_b.type == 'f' + assert fi_a.path == fi_b.path == p + +def test_diff_manifest_symlink_target(test_data, testname, monkeypatch): + """Diff two fileinfo lists having one symlink's target modified. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + p = base_dir / "s.dat" + p.unlink() + p.symlink_to(Path("msg.txt")) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 1 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.CONTENT + assert fi_a.type == fi_b.type == 'l' + assert fi_a.path == fi_b.path == p + +def test_diff_manifest_wrong_type(test_data, testname, monkeypatch): + """Diff two fileinfo lists with one entry having a wrong type. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + p = base_dir / "rnd.dat" + p.unlink() + p.symlink_to(Path("data", "rnd.dat")) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 1 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.CONTENT + assert fi_a.type == 'l' + assert fi_b.type == 'f' + assert fi_a.path == fi_b.path == p + +def test_diff_manifest_missing_files(test_data, testname, monkeypatch): + """Diff two fileinfo lists having one file's name changed. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + p1 = base_dir / "rnd.dat" + p2 = base_dir / "a.dat" + p1.rename(p2) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 2 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.MISSING_B + assert fi_a.type == 'f' + assert fi_a.path == p2 + assert fi_b is None + status, fi_a, fi_b = diff[1] + assert status == DiffStatus.MISSING_A + assert fi_b.type == 'f' + assert fi_b.path == p1 + assert fi_a is None + +def test_diff_manifest_mult(test_data, testname, monkeypatch): + """Diff two fileinfo lists having multiple differences. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + pm = base_dir / "data" / "rnd.dat" + shutil.copy(str(gettestdata("rnd2.dat")), str(pm)) + p1 = base_dir / "msg.txt" + p2 = base_dir / "o.txt" + p1.rename(p2) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 3 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.CONTENT + assert fi_a.type == fi_b.type == 'f' + assert fi_a.path == fi_b.path == pm + status, fi_a, fi_b = diff[1] + assert status == DiffStatus.MISSING_A + assert fi_b.type == 'f' + assert fi_b.path == p1 + assert fi_a is None + status, fi_a, fi_b = diff[2] + assert status == DiffStatus.MISSING_B + assert fi_a.type == 'f' + assert fi_a.path == p2 + assert fi_b is None + +def test_diff_manifest_dircontent(test_data, testname, monkeypatch): + """Diff two fileinfo lists with one subdirectory missing. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + pd = base_dir / "data" + shutil.rmtree(str(pd)) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 2 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.MISSING_A + assert fi_b.type == 'd' + assert fi_b.path == pd + assert fi_a is None + status, fi_a, fi_b = diff[1] + assert status == DiffStatus.MISSING_A + assert fi_b.type == 'f' + assert fi_b.path == pd / "rnd.dat" + assert fi_a is None + +def test_diff_manifest_add_file_last(test_data, testname, monkeypatch): + """Diff two fileinfo lists, one having an additional file as last item. + + The implementation of the corresponding command line tool used to + have a flaw in this particular case, ref. #55. + """ + monkeypatch.chdir(str(test_data)) + with Archive().open(Path("archive.tar")) as archive: + manifest_ref = archive.manifest + base_dir = Path("base") + p = base_dir / "zzz.dat" + shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + fileinfos = get_fileinfos(base_dir) + diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) + assert len(diff) == 1 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.MISSING_B + assert fi_a.type == 'f' + assert fi_a.path == p + assert fi_b is None + diff = list(filter(non_match, diff_manifest(manifest_ref, fileinfos))) + assert len(diff) == 1 + status, fi_a, fi_b = diff[0] + assert status == DiffStatus.MISSING_A + assert fi_b.type == 'f' + assert fi_b.path == p + assert fi_a is None From b5fcfd6a85530c6884bc9d055c659e0a67166235 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 11 May 2021 22:56:33 +0200 Subject: [PATCH 027/138] Distinguish - different file type (DiffStatus.TYPE) and - different symbol link target (DiffStatus.SYMLNK_TARGET) from - different file content (DiffStatus.CONTENT) --- archive/manifest.py | 10 ++++++---- tests/test_01_diff_manifest.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/archive/manifest.py b/archive/manifest.py index ecabbc1..f576158 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -25,8 +25,10 @@ class DiffStatus(Enum): MATCH = 0 META = 1 CONTENT = 2 - MISSING_A = 3 - MISSING_B = 4 + SYMLNK_TARGET = 3 + TYPE = 4 + MISSING_A = 5 + MISSING_B = 6 class FileInfo: @@ -264,10 +266,10 @@ def _next(it): def _match(fi_a, fi_b, algorithm): assert fi_a.path == fi_b.path if fi_a.type != fi_b.type: - return DiffStatus.CONTENT + return DiffStatus.TYPE elif fi_a.type == "l": if fi_a.target != fi_b.target: - return DiffStatus.CONTENT + return DiffStatus.SYMLNK_TARGET elif fi_a.type == "f": # Note: we don't need to compare the size, because if # the size differs, it's mostly certain that also the diff --git a/tests/test_01_diff_manifest.py b/tests/test_01_diff_manifest.py index 82c8136..422afc3 100644 --- a/tests/test_01_diff_manifest.py +++ b/tests/test_01_diff_manifest.py @@ -101,7 +101,7 @@ def test_diff_manifest_symlink_target(test_data, testname, monkeypatch): diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] - assert status == DiffStatus.CONTENT + assert status == DiffStatus.SYMLNK_TARGET assert fi_a.type == fi_b.type == 'l' assert fi_a.path == fi_b.path == p @@ -119,7 +119,7 @@ def test_diff_manifest_wrong_type(test_data, testname, monkeypatch): diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] - assert status == DiffStatus.CONTENT + assert status == DiffStatus.TYPE assert fi_a.type == 'l' assert fi_b.type == 'f' assert fi_a.path == fi_b.path == p From d4642014f0bb3744add86dbb4edf58d4e3fe4069 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 11 May 2021 22:01:18 +0200 Subject: [PATCH 028/138] Renounce sorting the manifests in the archive-tool diff command --- archive/cli/diff.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/archive/cli/diff.py b/archive/cli/diff.py index e3a12eb..e69d5a2 100644 --- a/archive/cli/diff.py +++ b/archive/cli/diff.py @@ -44,11 +44,6 @@ def diff(args): archive2 = Archive().open(args.archive2) archive2.close() algorithm = _common_checksum(archive1.manifest, archive2.manifest) - # In principle, we might rely on the fact that the manifest of an - # archive is always sorted at creation time. On the other hand, - # as we depend on this, we sort them again to be on the safe side. - archive1.manifest.sort() - archive2.manifest.sort() it1 = iter(archive1.manifest) it2 = iter(archive2.manifest) fi1 = _next(it1) From 70be967a62a3d2e1bc44045f057acd1860d99f57 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 11 May 2021 22:05:41 +0200 Subject: [PATCH 029/138] Do not take the paths relative to the base directory in the archive-tool diff command --- archive/cli/diff.py | 19 ++++------------- tests/test_04_cli_diff.py | 45 --------------------------------------- 2 files changed, 4 insertions(+), 60 deletions(-) diff --git a/archive/cli/diff.py b/archive/cli/diff.py index e69d5a2..00da2fe 100644 --- a/archive/cli/diff.py +++ b/archive/cli/diff.py @@ -29,15 +29,6 @@ def _next(it, skip=None): except StopIteration: return None -def _relpath(fi, basedir): - if fi is not None: - if fi.path.is_absolute(): - return fi.path - else: - return fi.path.relative_to(basedir) - else: - return None - def diff(args): archive1 = Archive().open(args.archive1) archive1.close() @@ -50,18 +41,16 @@ def diff(args): fi2 = _next(it2) status = 0 while True: - path1 = _relpath(fi1, archive1.basedir) - path2 = _relpath(fi2, archive2.basedir) - if path1 is None and path2 is None: + if fi1 is None and fi2 is None: break - elif path1 is None or path1 > path2: + elif fi1 is None or fi1.path > fi2.path: print("Only in %s: %s" % (archive2.path, fi2.path)) if args.skip_dir_content and fi2.is_dir(): fi2 = _next(it2, skip=fi2.path) else: fi2 = _next(it2) status = max(status, 102) - elif path2 is None or path2 > path1: + elif fi2 is None or fi2.path > fi1.path: print("Only in %s: %s" % (archive1.path, fi1.path)) if args.skip_dir_content and fi1.is_dir(): fi1 = _next(it1, skip=fi1.path) @@ -69,7 +58,7 @@ def diff(args): fi1 = _next(it1) status = max(status, 102) else: - assert path1 == path2 + assert fi1.path == fi2.path if fi1.type != fi2.type: print("Entries %s:%s and %s:%s have different type" % (archive1.path, fi1.path, archive2.path, fi2.path)) diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index a614531..45de5ab 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -222,51 +222,6 @@ def test_diff_metadata(test_data, testname, monkeypatch, abspath): assert out[0] == ("File system metadata for %s:%s and %s:%s differ" % (archive_ref_path, p, archive_path, p)) -def test_diff_basedir_equal(test_data, testname, monkeypatch): - """Diff two archives with different base directories having equal - content. - - This test makes only sense for archives with relative path names. - """ - monkeypatch.chdir(str(test_data)) - newbase = Path("newbase") - shutil.rmtree(str(newbase), ignore_errors=True) - Path("base").rename(newbase) - archive_ref_path = Path("archive-rel.tar") - archive_path = Path(archive_name(ext="bz2", tags=[testname, "rel"])) - Archive().create(archive_path, "bz2", [newbase]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: - args = ["diff", str(archive_ref_path), str(archive_path)] - callscript("archive-tool.py", args, stdout=f) - f.seek(0) - assert list(get_output(f)) == [] - -def test_diff_basedir_mod_file(test_data, testname, monkeypatch): - """Diff two archives with different base directories having one file's - content modified. - - This test makes only sense for archives with relative path names. - """ - monkeypatch.chdir(str(test_data)) - base = Path("base") - newbase = Path("newbase") - shutil.rmtree(str(newbase), ignore_errors=True) - base.rename(newbase) - archive_ref_path = Path("archive-rel.tar") - p = base / "rnd.dat" - pn = newbase / "rnd.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(pn)) - archive_path = Path(archive_name(ext="bz2", tags=[testname, "rel"])) - Archive().create(archive_path, "bz2", [newbase]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: - args = ["diff", str(archive_ref_path), str(archive_path)] - callscript("archive-tool.py", args, returncode=101, stdout=f) - f.seek(0) - out = list(get_output(f)) - assert len(out) == 1 - assert out[0] == ("Files %s:%s and %s:%s differ" - % (archive_ref_path, p, archive_path, pn)) - @pytest.mark.parametrize("abspath", [False, True]) def test_diff_dircontent(test_data, testname, monkeypatch, abspath): """Diff two archives with one subdirectory missing. From 8f443d40161e9b5aaa993493d060a94e8f575ae9 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 11 May 2021 23:09:36 +0200 Subject: [PATCH 030/138] Rewrite archive-tool diff based on function diff_manifest() --- archive/cli/diff.py | 118 ++++++++++++++++++-------------------- tests/test_04_cli_diff.py | 1 - 2 files changed, 56 insertions(+), 63 deletions(-) diff --git a/archive/cli/diff.py b/archive/cli/diff.py index 00da2fe..9246179 100644 --- a/archive/cli/diff.py +++ b/archive/cli/diff.py @@ -4,6 +4,7 @@ from pathlib import Path from archive.archive import Archive from archive.exception import ArchiveReadError +from archive.manifest import DiffStatus, diff_manifest def _common_checksum(manifest1, manifest2): @@ -14,79 +15,72 @@ def _common_checksum(manifest1, manifest2): raise ArchiveReadError("No common checksum algorithm, " "cannot compare archive content.") -def _next(it, skip=None): - try: - while True: - fi = next(it) - if skip: - try: - fi.path.relative_to(skip) - except ValueError: - pass - else: - continue - return fi - except StopIteration: - return None +def _skip_dir_filter(diff): + skip_stat = None + skip_path = None + for t in diff: + diff_stat, fi1, fi2 = t + if diff_stat == skip_stat == DiffStatus.MISSING_A: + try: + fi2.path.relative_to(skip_path) + except ValueError: + pass + else: + continue + elif diff_stat == skip_stat == DiffStatus.MISSING_B: + try: + fi1.path.relative_to(skip_path) + except ValueError: + pass + else: + continue + yield t + if diff_stat == DiffStatus.MISSING_A and fi2.type == 'd': + skip_stat = diff_stat + skip_path = fi2.path + elif diff_stat == DiffStatus.MISSING_B and fi1.type == 'd': + skip_stat = diff_stat + skip_path = fi1.path + else: + skip_stat = None + skip_path = None + def diff(args): archive1 = Archive().open(args.archive1) + manifest1 = archive1.manifest archive1.close() archive2 = Archive().open(args.archive2) + manifest2 = archive2.manifest archive2.close() - algorithm = _common_checksum(archive1.manifest, archive2.manifest) - it1 = iter(archive1.manifest) - it2 = iter(archive2.manifest) - fi1 = _next(it1) - fi2 = _next(it2) + algorithm = _common_checksum(manifest1, manifest2) + diff = diff_manifest(manifest1, manifest2, algorithm) + if args.skip_dir_content: + diff = _skip_dir_filter(diff) status = 0 - while True: - if fi1 is None and fi2 is None: - break - elif fi1 is None or fi1.path > fi2.path: + for diff_stat, fi1, fi2 in diff: + if diff_stat == DiffStatus.MISSING_A: print("Only in %s: %s" % (archive2.path, fi2.path)) - if args.skip_dir_content and fi2.is_dir(): - fi2 = _next(it2, skip=fi2.path) - else: - fi2 = _next(it2) status = max(status, 102) - elif fi2 is None or fi2.path > fi1.path: + elif diff_stat == DiffStatus.MISSING_B: print("Only in %s: %s" % (archive1.path, fi1.path)) - if args.skip_dir_content and fi1.is_dir(): - fi1 = _next(it1, skip=fi1.path) - else: - fi1 = _next(it1) status = max(status, 102) - else: - assert fi1.path == fi2.path - if fi1.type != fi2.type: - print("Entries %s:%s and %s:%s have different type" - % (archive1.path, fi1.path, archive2.path, fi2.path)) - status = max(status, 102) - elif fi1.type == "l": - if fi1.target != fi2.target: - print("Symbol links %s:%s and %s:%s have different target" - % (archive1.path, fi1.path, archive2.path, fi2.path)) - status = max(status, 101) - elif fi1.type == "f": - # Note: we don't need to compare the size, because if - # the size differs, it's mostly certain that also the - # checksums do. - if fi1.checksum[algorithm] != fi2.checksum[algorithm]: - print("Files %s:%s and %s:%s differ" - % (archive1.path, fi1.path, archive2.path, fi2.path)) - status = max(status, 101) - elif args.report_meta and (fi1.uid != fi2.uid or - fi1.uname != fi2.uname or - fi1.gid != fi2.gid or - fi1.gname != fi2.gname or - fi1.mode != fi2.mode or - int(fi1.mtime) != int(fi2.mtime)): - print("File system metadata for %s:%s and %s:%s differ" - % (archive1.path, fi1.path, archive2.path, fi2.path)) - status = max(status, 100) - fi1 = _next(it1) - fi2 = _next(it2) + elif diff_stat == DiffStatus.TYPE: + print("Entries %s:%s and %s:%s have different type" + % (archive1.path, fi1.path, archive2.path, fi2.path)) + status = max(status, 102) + elif diff_stat == DiffStatus.SYMLNK_TARGET: + print("Symbol links %s:%s and %s:%s have different target" + % (archive1.path, fi1.path, archive2.path, fi2.path)) + status = max(status, 101) + elif diff_stat == DiffStatus.CONTENT: + print("Files %s:%s and %s:%s differ" + % (archive1.path, fi1.path, archive2.path, fi2.path)) + status = max(status, 101) + elif diff_stat == DiffStatus.META and args.report_meta: + print("File system metadata for %s:%s and %s:%s differ" + % (archive1.path, fi1.path, archive2.path, fi2.path)) + status = max(status, 100) return status def add_parser(subparsers): diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index 45de5ab..87d75f1 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -255,7 +255,6 @@ def test_diff_dircontent(test_data, testname, monkeypatch, abspath): assert len(out) == 1 assert out[0] == "Only in %s: %s" % (archive_ref_path, pd) -@pytest.mark.xfail(reason="Issue #55") @pytest.mark.parametrize("abspath", [False, True]) def test_diff_extrafile_end(test_data, testname, monkeypatch, abspath): """The first archives has an extra entry as last item. Ref. #55 From 040b1c98c055316b602346a8b8add8d2f383cc52 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 11 May 2021 23:19:16 +0200 Subject: [PATCH 031/138] Rename test module --- tests/{test_01_diff_manifest.py => test_02_diff_manifest.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_01_diff_manifest.py => test_02_diff_manifest.py} (100%) diff --git a/tests/test_01_diff_manifest.py b/tests/test_02_diff_manifest.py similarity index 100% rename from tests/test_01_diff_manifest.py rename to tests/test_02_diff_manifest.py From f7ca3cee3bc708240bf8d64caa1b0d3d9ee016c3 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 12 May 2021 08:05:09 +0200 Subject: [PATCH 032/138] User itertools to replace internal helper _next() --- archive/manifest.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/archive/manifest.py b/archive/manifest.py index f576158..c746254 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -6,6 +6,7 @@ from distutils.version import StrictVersion from enum import Enum import grp +import itertools import os from pathlib import Path import pwd @@ -257,12 +258,6 @@ def diff_manifest(manifest_a, manifest_b, checksum=FileInfo.Checksums[0]): path. Spurious mismatches will be reported if this is not the case. """ - def _next(it): - try: - return next(it) - except StopIteration: - return None - def _match(fi_a, fi_b, algorithm): assert fi_a.path == fi_b.path if fi_a.type != fi_b.type: @@ -285,26 +280,26 @@ def _match(fi_a, fi_b, algorithm): return DiffStatus.META return DiffStatus.MATCH - it_a = iter(manifest_a) - it_b = iter(manifest_b) - fi_a = _next(it_a) - fi_b = _next(it_b) + it_a = iter(itertools.chain(manifest_a, itertools.repeat(None))) + it_b = iter(itertools.chain(manifest_b, itertools.repeat(None))) + fi_a = next(it_a) + fi_b = next(it_b) while True: if fi_a is None and fi_b is None: break elif fi_a is None: yield (DiffStatus.MISSING_A, None, fi_b) - fi_b = _next(it_b) + fi_b = next(it_b) elif fi_b is None: yield (DiffStatus.MISSING_B, fi_a, None) - fi_a = _next(it_a) + fi_a = next(it_a) elif fi_a.path > fi_b.path: yield (DiffStatus.MISSING_A, None, fi_b) - fi_b = _next(it_b) + fi_b = next(it_b) elif fi_b.path > fi_a.path: yield (DiffStatus.MISSING_B, fi_a, None) - fi_a = _next(it_a) + fi_a = next(it_a) else: yield (_match(fi_a, fi_b, checksum), fi_a, fi_b) - fi_a = _next(it_a) - fi_b = _next(it_b) + fi_a = next(it_a) + fi_b = next(it_b) From 2b1a568fe9f3e227e2a80f5dea915834738d81a8 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 14 May 2021 18:02:07 +0200 Subject: [PATCH 033/138] Add a test for Issue #56 --- tests/test_04_cli_diff.py | 53 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index 8e4d400..7a45dbf 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -15,9 +15,14 @@ testdata = [ DataDir(Path("base"), 0o755), DataDir(Path("base", "data"), 0o750), + DataDir(Path("base", "data", "aa"), 0o750), + DataDir(Path("base", "data", "zz"), 0o750), DataDir(Path("base", "empty"), 0o755), DataFile(Path("base", "msg.txt"), 0o644), DataFile(Path("base", "data", "rnd.dat"), 0o600), + DataFile(Path("base", "data", "rnd2.dat"), 0o600), + DataRandomFile(Path("base", "data", "aa", "rnd_a.dat"), 0o600), + DataRandomFile(Path("base", "data", "zz", "rnd_z.dat"), 0o600), DataFile(Path("base", "rnd.dat"), 0o600), DataSymLink(Path("base", "s.dat"), Path("data", "rnd.dat")), ] @@ -268,7 +273,7 @@ def test_diff_basedir_mod_file(test_data, testname, monkeypatch): % (archive_ref_path, p, archive_path, pn)) @pytest.mark.parametrize("abspath", [False, True]) -def test_diff_dircontent(test_data, testname, monkeypatch, abspath): +def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): """Diff two archives with one subdirectory missing. """ monkeypatch.chdir(str(test_data)) @@ -278,7 +283,7 @@ def test_diff_dircontent(test_data, testname, monkeypatch, abspath): else: archive_ref_path = Path("archive-rel.tar") base_dir = Path("base") - pd = base_dir / "data" + pd = base_dir / "data" / "zz" shutil.rmtree(str(pd)) flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) @@ -290,7 +295,7 @@ def test_diff_dircontent(test_data, testname, monkeypatch, abspath): out = list(get_output(f)) assert len(out) == 2 assert out[0] == "Only in %s: %s" % (archive_ref_path, pd) - assert out[1] == "Only in %s: %s" % (archive_ref_path, pd / "rnd.dat") + assert out[1] == "Only in %s: %s" % (archive_ref_path, pd / "rnd_z.dat") with TemporaryFile(mode="w+t", dir=str(test_data)) as f: args = ["diff", "--skip-dir-content", str(archive_ref_path), str(archive_path)] @@ -299,3 +304,45 @@ def test_diff_dircontent(test_data, testname, monkeypatch, abspath): out = list(get_output(f)) assert len(out) == 1 assert out[0] == "Only in %s: %s" % (archive_ref_path, pd) + +@pytest.mark.xfail(reason="Issue #56") +@pytest.mark.parametrize("abspath", [False, True]) +def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): + """Diff archives having content in a missing directory. Ref. #56 + """ + monkeypatch.chdir(str(test_data)) + if abspath: + base_dir = test_data / "base" + else: + base_dir = Path("base") + pd = base_dir / "data" + excl_a = [ pd / "zz" ] + flag = absflag(abspath) + archive_a = Path(archive_name(ext="bz2", tags=[testname, "a", flag])) + Archive().create(archive_a, "bz2", [base_dir], excludes=excl_a) + pm = pd / "rnd2.dat" + shutil.copy(str(gettestdata("rnd.dat")), str(pm)) + incl_b = [ base_dir, pd / "aa", pd / "rnd2.dat", pd / "zz" ] + excl_b = [ pd, pd / "rnd.dat" ] + flag = absflag(abspath) + archive_b = Path(archive_name(ext="bz2", tags=[testname, "b", flag])) + Archive().create(archive_b, "bz2", incl_b, excludes=excl_b) + with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + args = ["diff", str(archive_a), str(archive_b)] + callscript("archive-tool.py", args, returncode=102, stdout=f) + f.seek(0) + out = list(get_output(f)) + assert len(out) == 5 + assert out[0] == "Only in %s: %s" % (archive_a, pd) + assert out[1] == "Only in %s: %s" % (archive_a, pd / "rnd.dat") + assert out[2] == ("Files %s:%s and %s:%s differ" + % (archive_a, pm, archive_b, pm)) + assert out[3] == "Only in %s: %s" % (archive_b, pd / "zz") + assert out[4] == "Only in %s: %s" % (archive_b, pd / "zz" / "rnd_z.dat") + with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + args = ["diff", "--skip-dir-content", str(archive_a), str(archive_b)] + callscript("archive-tool.py", args, returncode=102, stdout=f) + f.seek(0) + out = list(get_output(f)) + assert len(out) == 1 + assert out[0] == "Only in %s: %s" % (archive_a, pd) From cd37a2f67a6f41b0a2605ed54cf3f767f266be1b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 14 May 2021 18:28:00 +0200 Subject: [PATCH 034/138] Fix #56 --- archive/cli/diff.py | 16 +++------------- tests/test_04_cli_diff.py | 1 - 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/archive/cli/diff.py b/archive/cli/diff.py index 9246179..a470be7 100644 --- a/archive/cli/diff.py +++ b/archive/cli/diff.py @@ -16,33 +16,23 @@ def _common_checksum(manifest1, manifest2): "cannot compare archive content.") def _skip_dir_filter(diff): - skip_stat = None skip_path = None for t in diff: diff_stat, fi1, fi2 = t - if diff_stat == skip_stat == DiffStatus.MISSING_A: + if skip_path: + p = (fi1 or fi2).path try: - fi2.path.relative_to(skip_path) - except ValueError: - pass - else: - continue - elif diff_stat == skip_stat == DiffStatus.MISSING_B: - try: - fi1.path.relative_to(skip_path) + p.relative_to(skip_path) except ValueError: pass else: continue yield t if diff_stat == DiffStatus.MISSING_A and fi2.type == 'd': - skip_stat = diff_stat skip_path = fi2.path elif diff_stat == DiffStatus.MISSING_B and fi1.type == 'd': - skip_stat = diff_stat skip_path = fi1.path else: - skip_stat = None skip_path = None diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index 70df36c..e6a620a 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -260,7 +260,6 @@ def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): assert len(out) == 1 assert out[0] == "Only in %s: %s" % (archive_ref_path, pd) -@pytest.mark.xfail(reason="Issue #56") @pytest.mark.parametrize("abspath", [False, True]) def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): """Diff archives having content in a missing directory. Ref. #56 From f9932dee62ace7de2d095e3f73a0cb8dee0256ac Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 14 May 2021 21:01:01 +0200 Subject: [PATCH 035/138] Update Changelog --- CHANGES.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 24eb7f2..fb053ee 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,8 @@ Changelog New features ------------ ++ `#57`_: Add a function :func:`diff_manifest`. + + `#50`_, `#51`_: Add a header with some metadata to the index in a mail archive created by :class:`MailArchive`. @@ -20,11 +22,22 @@ Incompatible changes Bug fixes and minor changes --------------------------- ++ `#57`_: Do not take the paths relative to the base directory in the + `archive-tool diff` command. + ++ `#55`_, `#57`_: `archive-tool diff` fails with :exc:`TypeError`. + ++ `#56`_, `#57`_: Inconsistent result from `archive-tool diff` with + option `--skip-dir-content`. + + `#48`_: Review and standardize some error messages. .. _#48: https://github.com/RKrahl/archive-tools/pull/48 .. _#50: https://github.com/RKrahl/archive-tools/issues/50 .. _#51: https://github.com/RKrahl/archive-tools/pull/51 +.. _#55: https://github.com/RKrahl/archive-tools/issues/55 +.. _#56: https://github.com/RKrahl/archive-tools/issues/56 +.. _#57: https://github.com/RKrahl/archive-tools/pull/57 0.5.1 (2020-12-12) From cf4b16c5741dce6769bf84be79bf9d5773b2d1aa Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 15 May 2021 18:18:56 +0200 Subject: [PATCH 036/138] Move _common_checksum() to module archive.manifest --- archive/cli/diff.py | 10 +--------- archive/manifest.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/archive/cli/diff.py b/archive/cli/diff.py index a470be7..efa754a 100644 --- a/archive/cli/diff.py +++ b/archive/cli/diff.py @@ -4,17 +4,9 @@ from pathlib import Path from archive.archive import Archive from archive.exception import ArchiveReadError -from archive.manifest import DiffStatus, diff_manifest +from archive.manifest import DiffStatus, _common_checksum, diff_manifest -def _common_checksum(manifest1, manifest2): - for algorithm in manifest1.checksums: - if algorithm in manifest2.checksums: - return algorithm - else: - raise ArchiveReadError("No common checksum algorithm, " - "cannot compare archive content.") - def _skip_dir_filter(diff): skip_path = None for t in diff: diff --git a/archive/manifest.py b/archive/manifest.py index c746254..1296fda 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -231,6 +231,18 @@ def sort(self, *, key=None, reverse=False): key = lambda fi: fi.path self.fileinfos.sort(key=key, reverse=reverse) + +def _common_checksum(manifest_a, manifest_b): + """Return a checksum algorithm that is present in both manifest objects. + """ + for algorithm in manifest_a.checksums: + if algorithm in manifest_b.checksums: + return algorithm + else: + raise ArchiveReadError("No common checksum algorithm, " + "cannot compare archive content.") + + def diff_manifest(manifest_a, manifest_b, checksum=FileInfo.Checksums[0]): """Compare two iterables of :class:`~archive.manifest.FileInfo` objects. From acf9bc9af78bc95747d588b5695d4fe40c5f3da2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 15 May 2021 15:37:46 +0200 Subject: [PATCH 037/138] Move adding an item to the tarfile from Archive._create() into a separate helper method Archive._add_item() --- archive/archive.py | 63 +++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 7ccb9ee..2a5a251 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -60,6 +60,8 @@ def __init__(self): self.manifest = None self._file = None self._metadata = [] + self._dedup = None + self._dupindex = None def create(self, path, compression, paths=None, fileinfos=None, basedir=None, workdir=None, excludes=None, @@ -76,6 +78,8 @@ def create(self, path, compression, paths=None, fileinfos=None, save_wd = os.getcwd() os.chdir(str(workdir)) self.path = path + self._dedup = dedup + self._dupindex = {} if fileinfos is not None: if not isinstance(fileinfos, Sequence): fileinfos = list(fileinfos) @@ -92,41 +96,42 @@ def create(self, path, compression, paths=None, fileinfos=None, for md in self._metadata: md.set_path(self.basedir) self.manifest.add_metadata(md.path) - self._create(mode, dedup) + self._create(mode) finally: if save_wd: os.chdir(save_wd) return self - def _create(self, mode, dedup): + def _create(self, mode): with tarfile.open(str(self.path), mode) as tarf: with tempfile.TemporaryFile() as tmpf: self.manifest.write(tmpf) tmpf.seek(0) self.add_metadata(".manifest.yaml", tmpf) md_names = self._add_metadata_files(tarf) - dupindex = {} for fi in self.manifest: - p = fi.path - name = self._arcname(p) - if name in md_names: - raise ArchiveCreateError("invalid path '%s': " - "this filename is reserved" % p) - if fi.is_file(): - ti = tarf.gettarinfo(str(p), arcname=name) - dup = self._check_duplicate(fi, name, dedup, dupindex) - if dup: - ti.type = tarfile.LNKTYPE - ti.linkname = dup - tarf.addfile(ti) - else: - ti.size = fi.size - ti.type = tarfile.REGTYPE - ti.linkname = '' - with p.open("rb") as f: - tarf.addfile(ti, fileobj=f) - else: - tarf.add(str(p), arcname=name, recursive=False) + arcname = self._arcname(fi.path) + if arcname in md_names: + raise ArchiveCreateError("invalid path '%s': this " + "filename is reserved" % fi.path) + self._add_item(tarf, fi, arcname) + + def _add_item(self, tarf, fi, arcname): + ti = tarf.gettarinfo(str(fi.path), arcname=arcname) + if fi.is_file(): + dup = self._check_duplicate(fi, arcname) + if dup: + ti.type = tarfile.LNKTYPE + ti.linkname = dup + tarf.addfile(ti) + else: + ti.size = fi.size + ti.type = tarfile.REGTYPE + ti.linkname = '' + with fi.path.open("rb") as f: + tarf.addfile(ti, fileobj=f) + else: + tarf.addfile(ti) def _check_paths(self, paths, basedir, excludes=None): """Check the paths to be added to an archive for several error @@ -185,17 +190,17 @@ def _add_metadata_files(self, tarf): tarf.addfile(ti, md.fileobj) return md_names - def _check_duplicate(self, fileinfo, name, dedup, dupindex): + def _check_duplicate(self, fileinfo, name): """Check if the archive item fileinfo should be linked to another item already added to the archive. """ assert fileinfo.is_file() - if dedup == DedupMode.LINK: + if self._dedup == DedupMode.LINK: st = fileinfo.path.stat() if st.st_nlink == 1: return None idxkey = (st.st_dev, st.st_ino) - elif dedup == DedupMode.CONTENT: + elif self._dedup == DedupMode.CONTENT: try: hashalg = fileinfo.Checksums[0] except IndexError: @@ -203,10 +208,10 @@ def _check_duplicate(self, fileinfo, name, dedup, dupindex): idxkey = fileinfo.checksum[hashalg] else: return None - if idxkey in dupindex: - return dupindex[idxkey] + if idxkey in self._dupindex: + return self._dupindex[idxkey] else: - dupindex[idxkey] = name + self._dupindex[idxkey] = name return None def add_metadata(self, name, fileobj, mode=0o444): From c04d103cb48f95e455a237645ce7516a5a924d46 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 16 May 2021 13:26:09 +0200 Subject: [PATCH 038/138] Weaken the condition that basedir must be a directory --- archive/archive.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 2a5a251..a1edcb5 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -92,6 +92,10 @@ def create(self, path, compression, paths=None, fileinfos=None, self._check_paths(paths, basedir, excludes) self.manifest = Manifest(paths=paths, excludes=excludes, tags=tags) + bd_fi = self.manifest.find(self.basedir) + if bd_fi and not bd_fi.is_dir(): + raise ArchiveCreateError("base directory %s must " + "be a directory" % self.basedir) self.manifest.add_metadata(self.basedir / ".manifest.yaml") for md in self._metadata: md.set_path(self.basedir) @@ -172,9 +176,6 @@ def _check_paths(self, paths, basedir, excludes=None): raise ArchiveCreateError("invalid path '%s': must be a " "subpath of base directory %s" % (p, self.basedir)) - if not abspath: - if self.basedir.is_symlink() or not self.basedir.is_dir(): - raise ArchiveCreateError("basedir must be a directory") def _add_metadata_files(self, tarf): """Add the metadata files to the tar file. From 3ad31014a8aa44fe17948c02a647e664b36e7332 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 16 May 2021 13:47:55 +0200 Subject: [PATCH 039/138] Update changelog --- CHANGES.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9d53b3b..9e7da1a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -32,6 +32,9 @@ Bug fixes and minor changes + `#57`_: Do not take the paths relative to the base directory in the `archive-tool diff` command. ++ `#58`_: Weaken the condition introduced in `#9`_ that basedir must + be a directory. + + `#53`_, `#54`_: Spurious :exc:`FileNotFoundError` from :meth:`Archive.create` when passing a relative path as `workdir` argument. @@ -51,6 +54,7 @@ Bug fixes and minor changes .. _#55: https://github.com/RKrahl/archive-tools/issues/55 .. _#56: https://github.com/RKrahl/archive-tools/issues/56 .. _#57: https://github.com/RKrahl/archive-tools/pull/57 +.. _#58: https://github.com/RKrahl/archive-tools/pull/58 0.5.1 (2020-12-12) @@ -221,3 +225,5 @@ Bug fixes and minor changes ~~~~~~~~~~~~~~~~ + Initial release. + +.. _#9: https://github.com/RKrahl/archive-tools/issues/9 From 8b4d708a2e0ea55aa03065b1e38e7da1b3a46ce8 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 16 May 2021 15:05:34 +0200 Subject: [PATCH 040/138] Add the extra directory --- extra/README.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 extra/README.rst diff --git a/extra/README.rst b/extra/README.rst new file mode 100644 index 0000000..a43c3df --- /dev/null +++ b/extra/README.rst @@ -0,0 +1,17 @@ +Extra Stuff +=========== + +This directory is destined to be a place for various stuff. It may +contain experimental code to explore some ideas or proof of concept +implementations for new features. Or it may be broken fragments of +something that didn't turn out to be useful. + +*Note*: the stuff in this directory unmaintained. It is not supposed +to be part of the `archive-tools` package and is not included in the +distribution. Nothing is stable here. There is no documentation. It +may be incompatible with the current version of the package. It may +or may not work at all. + +Obviously, some bits and pieces may get resurrected later on and moved +out of this directory to become a part of a future version of the +package. From d85e208715e1489f444428043c5149cd5b71a9e2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 15 May 2021 18:46:13 +0200 Subject: [PATCH 041/138] Add makeincr.py --- extra/makeincr.py | 100 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100755 extra/makeincr.py diff --git a/extra/makeincr.py b/extra/makeincr.py new file mode 100755 index 0000000..2adfe3b --- /dev/null +++ b/extra/makeincr.py @@ -0,0 +1,100 @@ +#! /usr/bin/python3 +"""Create an incremental archive. + +The script takes one or more basis archives and an input archive as +input. It creates an output archive that contains all items from the +input archive that are not present in any of the basis archives. +""" + +import argparse +from pathlib import Path +import tarfile +from archive.archive import Archive +from archive.manifest import DiffStatus, _common_checksum, diff_manifest + + +suffix_map = { + '.tar': '', + '.tar.gz': 'gz', + '.tar.bz2': 'bz2', + '.tar.xz': 'xz', +} +"""Map path suffix to compression mode.""" + + +class CopyArchive(Archive): + """Read items from a TarFile. + + An Archive that copies all items from another Archive rather then + reading them from the file system on create(). + """ + + def __init__(self, inp_arch): + self.inp_arch = inp_arch + super().__init__() + + def _add_item(self, tarf, fi, arcname): + inp_tarf = self.inp_arch._file + inp_arcname = self.inp_arch._arcname(fi.path) + ti = inp_tarf.getmember(inp_arcname) + if fi.is_file(): + dup = self._check_duplicate(ti, arcname) + if dup: + ti.type = tarfile.LNKTYPE + ti.linkname = dup + ti.name = arcname + tarf.addfile(ti) + else: + with inp_tarf.extractfile(ti) as f: + ti.type = tarfile.REGTYPE + ti.linkname = '' + ti.name = arcname + tarf.addfile(ti, fileobj=f) + else: + ti.name = arcname + tarf.addfile(ti) + + def _check_duplicate(self, ti, name): + if ti.islnk() and ti.linkname in self._dupindex: + return self._dupindex[ti.linkname] + else: + if ti.isreg(): + self._dupindex[ti.name] = name + elif ti.islnk(): + self._dupindex[ti.linkname] = name + return None + +def filter_fileinfos(base, fileinfos, algorithm): + for stat, fi1, fi2 in diff_manifest(base, fileinfos, algorithm): + if stat == DiffStatus.MISSING_B or stat == DiffStatus.MATCH: + continue + yield fi2 + + +def main(): + argparser = argparse.ArgumentParser() + argparser.add_argument('base', type=Path, nargs='+', + help=("basis archives")) + argparser.add_argument('input', type=Path, + help=("input archive")) + argparser.add_argument('output', type=Path, + help=("input archive")) + args = argparser.parse_args() + + inp_archive = Archive().open(args.input) + fileinfos = inp_archive.manifest + algorithm = fileinfos.checksums[0] + for p in args.base: + with Archive().open(p) as base: + fileinfos = filter_fileinfos(base.manifest, fileinfos, algorithm) + + try: + compression = suffix_map["".join(args.output.suffixes)] + except KeyError: + compression = 'gz' + archive = CopyArchive(inp_archive).create(args.output, compression, + fileinfos=fileinfos) + + +if __name__ == "__main__": + main() From 3b4cba4c2fd3054a7c2092378b987cc649f45561 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 15 May 2021 21:16:21 +0200 Subject: [PATCH 042/138] Call undocumented method TarFile._getmember() rather then TarFile.getmember(): apparently, member names in the tarfile may occasionally not be normalized. --- extra/makeincr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/makeincr.py b/extra/makeincr.py index 2adfe3b..3f06a5e 100755 --- a/extra/makeincr.py +++ b/extra/makeincr.py @@ -36,7 +36,7 @@ def __init__(self, inp_arch): def _add_item(self, tarf, fi, arcname): inp_tarf = self.inp_arch._file inp_arcname = self.inp_arch._arcname(fi.path) - ti = inp_tarf.getmember(inp_arcname) + ti = inp_tarf._getmember(inp_arcname, normalize=True) if fi.is_file(): dup = self._check_duplicate(ti, arcname) if dup: From 36f06230b9b5973e8e98c599cfa05e2a8d2c8875 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 16:23:23 +0200 Subject: [PATCH 043/138] Switch from Travis CI to GitHub Actions --- .travis_require => .github/requirements.txt | 0 .github/workflows/run-tests.yaml | 40 +++++++++++++++++++++ .travis.yml | 14 -------- 3 files changed, 40 insertions(+), 14 deletions(-) rename .travis_require => .github/requirements.txt (100%) create mode 100644 .github/workflows/run-tests.yaml delete mode 100644 .travis.yml diff --git a/.travis_require b/.github/requirements.txt similarity index 100% rename from .travis_require rename to .github/requirements.txt diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml new file mode 100644 index 0000000..cde52f6 --- /dev/null +++ b/.github/workflows/run-tests.yaml @@ -0,0 +1,40 @@ +name: Run Test +on: [push, pull_request] +jobs: + Test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.5, 3.6, 3.7, 3.8, 3.9, 3.10] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + pip install -r .github/requirements.txt + - name: Test with pytest + run: | + python setup.py test + Test_3_4: + runs-on: ubuntu-18.04 + steps: + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: 3.4 + - name: Install dependencies + run: | + pip install -r .github/requirements.txt + - name: Test with pytest + run: | + python setup.py test diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 2a281f0..0000000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: python -python: - - "3.4" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "3.9" -install: pip install -r .travis_require -script: make test - -# Local Variables: -# mode: yaml -# End: From d58ea27f1168c72a3c5b0f197ce5a7cc06465fac Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 16:33:46 +0200 Subject: [PATCH 044/138] Fix Python version number notation --- .github/workflows/run-tests.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index cde52f6..1f7c9b8 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -5,7 +5,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8, 3.9, 3.10] + python-version: + - '3.5' + - '3.6' + - '3.7' + - '3.8' + - '3.9' + - '3.10.0-beta - 3.10.0' steps: - name: Check out repository code uses: actions/checkout@v2 From ba51196d5de31900912cafc7a3572c38ae7bb8e4 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 16:43:45 +0200 Subject: [PATCH 045/138] - Require PyYAML >=5.4 with Python > 3.4 - Require setuptools_scm - Require typing with Python 3.4 --- .github/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/requirements.txt b/.github/requirements.txt index 08665bc..9e82fe0 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -1,6 +1,8 @@ PyYAML <=5.2 ; python_version == '3.4' -PyYAML ; python_version > '3.4' +PyYAML >=5.4 ; python_version > '3.4' distutils-pytest pytest >=3.6.0 pytest-dependency >=0.2 python-dateutil +setuptools_scm +typing ; python_version == '3.4' From 41703c2ca3028177409a468ccb19302e5cc84f4a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 16:48:00 +0200 Subject: [PATCH 046/138] PyYAML 5.4 not available for Python 3.5 --- .github/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/requirements.txt b/.github/requirements.txt index 9e82fe0..6a7943f 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -1,4 +1,5 @@ PyYAML <=5.2 ; python_version == '3.4' +PyYAML >=5.3.1 ; python_version == '3.5' PyYAML >=5.4 ; python_version > '3.4' distutils-pytest pytest >=3.6.0 From 32baba5b7ed8fce89bcaae2330dbd6b263f6a8ad Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 16:50:46 +0200 Subject: [PATCH 047/138] Fixpup 41703c2 --- .github/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/requirements.txt b/.github/requirements.txt index 6a7943f..20ae149 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -1,6 +1,6 @@ PyYAML <=5.2 ; python_version == '3.4' PyYAML >=5.3.1 ; python_version == '3.5' -PyYAML >=5.4 ; python_version > '3.4' +PyYAML >=5.4 ; python_version > '3.5' distutils-pytest pytest >=3.6.0 pytest-dependency >=0.2 From 0a99f6fd977917204a32c36c3e4574a4ed2d2274 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 17:05:44 +0200 Subject: [PATCH 048/138] Update Build status badge in README --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index f9e57c5..6d914d3 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,9 @@ -|travis| |pypi| - -.. |travis| image:: https://img.shields.io/travis/com/RKrahl/archive-tools - :target: https://travis-ci.com/RKrahl/archive-tools - :alt: Travis build status +|gh-test| |pypi| +.. |gh-test| image:: https://img.shields.io/github/workflow/status/RKrahl/archive-tools/Run%20Test + :target: https://github.com/RKrahl/archive-tools/actions/workflows/run-tests.yaml + :alt: GitHub Workflow Status + .. |pypi| image:: https://img.shields.io/pypi/v/archive-tools :target: https://pypi.org/project/archive-tools/ :alt: PyPI version From 78fa33457f49888e4098489008ebd964f207c569 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 17:15:44 +0200 Subject: [PATCH 049/138] Display the package version and path in the test --- tests/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 2420a2b..a8c01db 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,6 +10,7 @@ import sys import tempfile import pytest +import archive from archive.tools import ft_mode @@ -292,3 +293,10 @@ def get_output(fileobj): line = line.strip() print("< %s" % line) yield line + +def pytest_report_header(config): + """Add information on the package version used in the tests. + """ + modpath = Path(archive.__file__).resolve().parent + return [ "archive-tools: %s" % (archive.__version__), + " %s" % (modpath)] From 7aec7860c564ad9a49ba1f8d38192df92aa08d96 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 17:49:59 +0200 Subject: [PATCH 050/138] Reformulate workflow configuration --- .github/workflows/run-tests.yaml | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 1f7c9b8..d9fd79d 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -12,6 +12,9 @@ jobs: - '3.8' - '3.9' - '3.10.0-beta - 3.10.0' + include: + - python-version: '3.4' + os: ubuntu-18.04 steps: - name: Check out repository code uses: actions/checkout@v2 @@ -27,20 +30,3 @@ jobs: - name: Test with pytest run: | python setup.py test - Test_3_4: - runs-on: ubuntu-18.04 - steps: - - name: Check out repository code - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: 3.4 - - name: Install dependencies - run: | - pip install -r .github/requirements.txt - - name: Test with pytest - run: | - python setup.py test From e5771ee538bc4ba71db222eee753b15379568588 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 17:54:07 +0200 Subject: [PATCH 051/138] Fixup 7aec786 --- .github/workflows/run-tests.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index d9fd79d..9b8e27d 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -2,9 +2,10 @@ name: Run Test on: [push, pull_request] jobs: Test: - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: matrix: + os: ubuntu-latest python-version: - '3.5' - '3.6' From 8661d14f4cf288d365988b6a94c304263ed33530 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 17:56:02 +0200 Subject: [PATCH 052/138] Fixup e5771ee --- .github/workflows/run-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 9b8e27d..04bb33e 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -5,7 +5,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ubuntu-latest + os: [ubuntu-latest] python-version: - '3.5' - '3.6' From 678decf399b7cf4ee0ef9d64ca62e28251ecacc9 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 17 May 2021 21:00:15 +0200 Subject: [PATCH 053/138] Minor change in workflow config --- .github/workflows/run-tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index 04bb33e..c92c8b4 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -5,7 +5,6 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest] python-version: - '3.5' - '3.6' @@ -13,6 +12,7 @@ jobs: - '3.8' - '3.9' - '3.10.0-beta - 3.10.0' + os: [ubuntu-latest] include: - python-version: '3.4' os: ubuntu-18.04 From 454907b0f5a05ab3c66ff2f59208408a79371258 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 15:56:20 +0200 Subject: [PATCH 054/138] Fix many tests still passing str instead of Path to Archive.create() and Archive.open() --- tests/test_03_create_filetype.py | 12 ++++++------ tests/test_03_create_misc.py | 6 +++--- tests/test_04_cli.py | 4 ++-- tests/test_04_cli_create_misc.py | 2 +- tests/test_04_cli_warn.py | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/test_03_create_filetype.py b/tests/test_03_create_filetype.py index 03b4fd8..d39191d 100644 --- a/tests/test_03_create_filetype.py +++ b/tests/test_03_create_filetype.py @@ -54,13 +54,13 @@ def test_create_invalid_file_socket(test_dir, testname, monkeypatch): """Create an archive from a directory containing a socket. """ monkeypatch.chdir(str(test_dir)) - name = archive_name(tags=[testname]) + archive_path = Path(archive_name(tags=[testname])) p = Path("base") fp = p / "socket" with tmp_socket(fp): with pytest.warns(ArchiveWarning, match="%s: socket ignored" % fp): - Archive().create(name, "", [p]) - with Archive().open(name) as archive: + Archive().create(archive_path, "", [p]) + with Archive().open(archive_path) as archive: assert archive.basedir == Path("base") check_manifest(archive.manifest, testdata) archive.verify() @@ -69,13 +69,13 @@ def test_create_invalid_file_fifo(test_dir, testname, monkeypatch): """Create an archive from a directory containing a FIFO. """ monkeypatch.chdir(str(test_dir)) - name = archive_name(tags=[testname]) + archive_path = Path(archive_name(tags=[testname])) p = Path("base") fp = p / "fifo" with tmp_fifo(fp): with pytest.warns(ArchiveWarning, match="%s: FIFO ignored" % fp): - Archive().create(name, "", [p]) - with Archive().open(name) as archive: + Archive().create(archive_path, "", [p]) + with Archive().open(archive_path) as archive: assert archive.basedir == Path("base") check_manifest(archive.manifest, testdata) archive.verify() diff --git a/tests/test_03_create_misc.py b/tests/test_03_create_misc.py index 4e154ef..c0187a8 100644 --- a/tests/test_03_create_misc.py +++ b/tests/test_03_create_misc.py @@ -28,7 +28,7 @@ def test_create_default_basedir_rel(test_dir, monkeypatch): """Check the default basedir with relative paths. (Issue #8) """ monkeypatch.chdir(str(test_dir)) - archive_path = "archive-rel.tar" + archive_path = Path("archive-rel.tar") p = Path("base", "data") Archive().create(archive_path, "", [p]) with Archive().open(archive_path) as archive: @@ -91,7 +91,7 @@ def test_create_add_symlink(test_dir, monkeypatch): """Check adding explicitly adding a symbolic link. (Issue #37) """ monkeypatch.chdir(str(test_dir)) - archive_path = "archive-symlink.tar" + archive_path = Path("archive-symlink.tar") paths = [Path("base", "data", "misc"), Path("base", "data", "s.dat")] data = [ testdata[i] for i in (1,3,4) ] Archive().create(archive_path, "", paths) @@ -109,7 +109,7 @@ def test_create_tags(test_dir, monkeypatch, tags, expected): """Test setting tags. """ monkeypatch.chdir(str(test_dir)) - archive_path = archive_name(tags=["tags"], counter="create_tags") + archive_path = Path(archive_name(tags=["tags"], counter="create_tags")) Archive().create(archive_path, "", [Path("base")], tags=tags) with Archive().open(archive_path) as archive: assert archive.manifest.tags == expected diff --git a/tests/test_04_cli.py b/tests/test_04_cli.py index 0c3d4c4..acef2ae 100644 --- a/tests/test_04_cli.py +++ b/tests/test_04_cli.py @@ -54,7 +54,7 @@ def test_cli_create(test_dir, monkeypatch, testcase): compression, abspath = testcase require_compression(compression) monkeypatch.chdir(str(test_dir)) - archive_path = archive_name(ext=compression, tags=[absflag(abspath)]) + archive_path = Path(archive_name(ext=compression, tags=[absflag(abspath)])) if abspath: paths = str(test_dir / "base") basedir = "archive" @@ -64,7 +64,7 @@ def test_cli_create(test_dir, monkeypatch, testcase): if compression is None: compression = "none" args = ["create", "--compression", compression, "--basedir", basedir, - archive_path, paths] + str(archive_path), paths] callscript("archive-tool.py", args) with Archive().open(archive_path) as archive: assert str(archive.basedir) == basedir diff --git a/tests/test_04_cli_create_misc.py b/tests/test_04_cli_create_misc.py index c429a65..e250a70 100644 --- a/tests/test_04_cli_create_misc.py +++ b/tests/test_04_cli_create_misc.py @@ -39,7 +39,7 @@ def test_cli_create_tags(test_dir, monkeypatch, tags, expected): args += ("--tag", t) args += (archive_path, "base") callscript("archive-tool.py", args) - with Archive().open(archive_path) as archive: + with Archive().open(Path(archive_path)) as archive: assert archive.manifest.tags == expected check_manifest(archive.manifest, testdata) diff --git a/tests/test_04_cli_warn.py b/tests/test_04_cli_warn.py index 3f48c1b..cb4d812 100644 --- a/tests/test_04_cli_warn.py +++ b/tests/test_04_cli_warn.py @@ -55,7 +55,7 @@ def test_cli_warn_ignore_socket(test_dir, testname, monkeypatch): f.seek(0) line = f.readline().strip() assert line == ("archive-tool.py: %s: socket ignored" % fp) - with Archive().open(name) as archive: + with Archive().open(Path(name)) as archive: assert archive.basedir == basedir check_manifest(archive.manifest, testdata) archive.verify() From 0fa4dcf76ba851cd6ed3bf2816b9adb1b0c590f7 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 16:21:58 +0200 Subject: [PATCH 055/138] Make Archive.path attribute absolute, e.g. resolved --- archive/archive.py | 6 +++--- archive/cli/diff.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index a1edcb5..1da0463 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -77,7 +77,7 @@ def create(self, path, compression, paths=None, fileinfos=None, if workdir: save_wd = os.getcwd() os.chdir(str(workdir)) - self.path = path + self.path = path.resolve() self._dedup = dedup self._dupindex = {} if fileinfos is not None: @@ -221,11 +221,11 @@ def add_metadata(self, name, fileobj, mode=0o444): self._metadata.insert(0, md) def open(self, path): - self.path = path try: - self._file = tarfile.open(str(self.path), 'r') + self._file = tarfile.open(str(path), 'r') except OSError as e: raise ArchiveReadError(str(e)) + self.path = path.resolve() md = self.get_metadata(".manifest.yaml") self.basedir = md.path.parent self.manifest = Manifest(fileobj=md.fileobj) diff --git a/archive/cli/diff.py b/archive/cli/diff.py index efa754a..72fec7c 100644 --- a/archive/cli/diff.py +++ b/archive/cli/diff.py @@ -42,26 +42,26 @@ def diff(args): status = 0 for diff_stat, fi1, fi2 in diff: if diff_stat == DiffStatus.MISSING_A: - print("Only in %s: %s" % (archive2.path, fi2.path)) + print("Only in %s: %s" % (args.archive2, fi2.path)) status = max(status, 102) elif diff_stat == DiffStatus.MISSING_B: - print("Only in %s: %s" % (archive1.path, fi1.path)) + print("Only in %s: %s" % (args.archive1, fi1.path)) status = max(status, 102) elif diff_stat == DiffStatus.TYPE: print("Entries %s:%s and %s:%s have different type" - % (archive1.path, fi1.path, archive2.path, fi2.path)) + % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 102) elif diff_stat == DiffStatus.SYMLNK_TARGET: print("Symbol links %s:%s and %s:%s have different target" - % (archive1.path, fi1.path, archive2.path, fi2.path)) + % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 101) elif diff_stat == DiffStatus.CONTENT: print("Files %s:%s and %s:%s differ" - % (archive1.path, fi1.path, archive2.path, fi2.path)) + % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 101) elif diff_stat == DiffStatus.META and args.report_meta: print("File system metadata for %s:%s and %s:%s differ" - % (archive1.path, fi1.path, archive2.path, fi2.path)) + % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 100) return status From 4ed3276969cd91a99696f086aa726db14645d244 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 16:59:39 +0200 Subject: [PATCH 056/138] Formally drop support for Python 3.4 and Python 3.5 --- .github/requirements.txt | 5 +---- .github/workflows/run-tests.yaml | 4 ---- README.rst | 2 +- python-archive-tools.spec | 2 +- setup.py | 3 --- 5 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/requirements.txt b/.github/requirements.txt index 20ae149..1911bb1 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -1,9 +1,6 @@ -PyYAML <=5.2 ; python_version == '3.4' -PyYAML >=5.3.1 ; python_version == '3.5' -PyYAML >=5.4 ; python_version > '3.5' +PyYAML >=5.4 distutils-pytest pytest >=3.6.0 pytest-dependency >=0.2 python-dateutil setuptools_scm -typing ; python_version == '3.4' diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index c92c8b4..d9e7509 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -6,16 +6,12 @@ jobs: strategy: matrix: python-version: - - '3.5' - '3.6' - '3.7' - '3.8' - '3.9' - '3.10.0-beta - 3.10.0' os: [ubuntu-latest] - include: - - python-version: '3.4' - os: ubuntu-18.04 steps: - name: Check out repository code uses: actions/checkout@v2 diff --git a/README.rst b/README.rst index 6d914d3..f3fc22c 100644 --- a/README.rst +++ b/README.rst @@ -50,7 +50,7 @@ System requirements Python: -+ Python 3.4 or newer. ++ Python 3.6 or newer. Required library packages: diff --git a/python-archive-tools.spec b/python-archive-tools.spec index 7589767..5afa5e7 100644 --- a/python-archive-tools.spec +++ b/python-archive-tools.spec @@ -10,7 +10,7 @@ License: Apache-2.0 Group: Development/Libraries/Python Source: %{distname}-%{version}.tar.gz BuildRequires: fdupes -BuildRequires: python3-base >= 3.4 +BuildRequires: python3-base >= 3.6 %if %{with tests} BuildRequires: python3-PyYAML BuildRequires: python3-distutils-pytest diff --git a/setup.py b/setup.py index a587a96..0ff04a0 100644 --- a/setup.py +++ b/setup.py @@ -126,8 +126,6 @@ def run(self): "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", @@ -136,4 +134,3 @@ def run(self): ], cmdclass = {'build_py': build_py, 'sdist': sdist, 'init_py': init_py}, ) - From 7fe1ccf31d332f156f90f2e1b76c5fefaa1f2a51 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 17:09:37 +0200 Subject: [PATCH 057/138] Remove code explicitely considering compatibility with Python 3.5 --- archive/archive.py | 7 +------ tests/conftest.py | 15 ++++----------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index a1edcb5..0b52e6a 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -66,12 +66,7 @@ def __init__(self): def create(self, path, compression, paths=None, fileinfos=None, basedir=None, workdir=None, excludes=None, dedup=DedupMode.LINK, tags=None): - if sys.version_info < (3, 5): - # The 'x' (exclusive creation) mode was added to tarfile - # in Python 3.5. - mode = 'w:' + compression - else: - mode = 'x:' + compression + mode = 'x:' + compression save_wd = None try: if workdir: diff --git a/tests/conftest.py b/tests/conftest.py index a8c01db..ab83c0d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -112,13 +112,6 @@ def _get_checksums(): checksums[fp] = cs return checksums -def _mk_dir(path): - # path.mkdir(parents=True, exist_ok=True) requires Python 3.5. - try: - path.mkdir(parents=True) - except FileExistsError: - pass - def _set_fs_attrs(path, mode, mtime): if mode is not None: path.chmod(mode) @@ -164,7 +157,7 @@ def type(self): def create(self, main_dir): path = main_dir / self.path - _mk_dir(path) + path.mkdir(parents=True, exist_ok=True) _set_fs_attrs(path, self.mode, self.mtime) class DataFile(DataFileOrDir): @@ -185,7 +178,7 @@ def checksum(self): def create(self, main_dir): path = main_dir / self.path - _mk_dir(path.parent) + path.parent.mkdir(parents=True, exist_ok=True) shutil.copy(str(gettestdata(self.path.name)), str(path)) _set_fs_attrs(path, self.mode, self.mtime) @@ -209,7 +202,7 @@ def create(self, main_dir): data = bytearray(getrandbits(8) for _ in range(self._size)) h.update(data) self._checksum = h.hexdigest() - _mk_dir(path.parent) + path.parent.mkdir(parents=True, exist_ok=True) with path.open("wb") as f: f.write(data) _set_fs_attrs(path, self.mode, self.mtime) @@ -230,7 +223,7 @@ def mode(self): def create(self, main_dir): path = main_dir / self.path - _mk_dir(path.parent) + path.parent.mkdir(parents=True, exist_ok=True) path.symlink_to(self.target) _set_fs_attrs(path, None, self.mtime) From 6442a7787862771684e53c47b8201d2354fe0fea Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 19:43:41 +0200 Subject: [PATCH 058/138] Remove conversion from Path to str in many cases --- archive/archive.py | 2 +- archive/cli/check.py | 4 +- archive/exception.py | 2 +- archive/mailarchive.py | 2 +- archive/manifest.py | 2 +- archive/tools.py | 2 +- tests/conftest.py | 6 +-- tests/test_01_fileinfo.py | 2 +- tests/test_01_manifest.py | 18 ++++----- tests/test_02_create.py | 6 +-- tests/test_02_diff_manifest.py | 28 ++++++------- tests/test_03_create_dedup.py | 10 ++--- tests/test_03_create_errors.py | 22 +++++----- tests/test_03_create_exclude.py | 8 ++-- tests/test_03_create_fileinfos.py | 8 ++-- tests/test_03_create_filetype.py | 6 +-- tests/test_03_create_misc.py | 12 +++--- tests/test_03_create_workdir.py | 2 +- tests/test_03_verify_errors.py | 22 +++++----- tests/test_04_cli.py | 4 +- tests/test_04_cli_check.py | 63 ++++++++++++++--------------- tests/test_04_cli_create_dedup.py | 6 +-- tests/test_04_cli_create_exclude.py | 6 +-- tests/test_04_cli_create_misc.py | 2 +- tests/test_04_cli_diff.py | 32 +++++++-------- tests/test_04_cli_error.py | 30 +++++++------- tests/test_04_cli_find.py | 2 +- tests/test_04_cli_warn.py | 2 +- tests/test_05_mailarchive_create.py | 2 +- 29 files changed, 156 insertions(+), 157 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 0b52e6a..c375365 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -71,7 +71,7 @@ def create(self, path, compression, paths=None, fileinfos=None, try: if workdir: save_wd = os.getcwd() - os.chdir(str(workdir)) + os.chdir(workdir) self.path = path self._dedup = dedup self._dupindex = {} diff --git a/archive/cli/check.py b/archive/cli/check.py index 7fcbeaf..4e79220 100644 --- a/archive/cli/check.py +++ b/archive/cli/check.py @@ -47,10 +47,10 @@ def check(args): if (args.prefix / fi.path in metadata or entry and _matches(args.prefix, fi, entry)): if args.present and not fi.is_dir(): - print(str(fi.path)) + print(fi.path) else: if not args.present: - print(str(fi.path)) + print(fi.path) if fi.is_dir(): skip = True return 0 diff --git a/archive/exception.py b/archive/exception.py index 30bfc9c..0207501 100644 --- a/archive/exception.py +++ b/archive/exception.py @@ -48,7 +48,7 @@ def __init__(self, path, ftype): tstr = "socket" else: tstr = "unsuported type %x" % ftype - super().__init__("%s: %s" % (str(path), tstr)) + super().__init__("%s: %s" % (path, tstr)) class ArchiveWarning(Warning): pass diff --git a/archive/mailarchive.py b/archive/mailarchive.py index 7342a12..11a08c0 100644 --- a/archive/mailarchive.py +++ b/archive/mailarchive.py @@ -58,7 +58,7 @@ def create(self, path, mails, compression='xz', server=None): with TemporaryDirectory(prefix="mailarchive-") as tmpdir: with tmp_chdir(tmpdir), tmp_umask(0o077): basedir = Path(path.name.split('.')[0]) - maildir = Maildir(str(basedir), create=True) + maildir = Maildir(basedir, create=True) self.mailindex = MailIndex(server=server) last_folder = None for folder, msgbytes in mails: diff --git a/archive/manifest.py b/archive/manifest.py index d5612bd..505b147 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -71,7 +71,7 @@ def __init__(self, data=None, path=None): elif stat.S_ISDIR(fstat.st_mode): pass elif stat.S_ISLNK(fstat.st_mode): - self.target = Path(os.readlink(str(self.path))) + self.target = Path(os.readlink(self.path)) else: ftype = stat.S_IFMT(fstat.st_mode) raise ArchiveInvalidTypeError(self.path, ftype) diff --git a/archive/tools.py b/archive/tools.py index f5e818e..bf7af65 100644 --- a/archive/tools.py +++ b/archive/tools.py @@ -25,7 +25,7 @@ class tmp_chdir(): """ def __init__(self, dir): self.save_dir = None - self.dir = str(dir) + self.dir = dir def __enter__(self): self.save_dir = os.getcwd() os.chdir(self.dir) diff --git a/tests/conftest.py b/tests/conftest.py index ab83c0d..5778fc5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,7 +62,7 @@ def __init__(self): self.dir = Path(tempfile.mkdtemp(prefix="archive-tools-test-")) def cleanup(self): if self.dir and _cleanup: - shutil.rmtree(str(self.dir)) + shutil.rmtree(self.dir) self.dir = None def __enter__(self): return self.dir @@ -116,7 +116,7 @@ def _set_fs_attrs(path, mode, mtime): if mode is not None: path.chmod(mode) if mtime is not None: - os.utime(str(path), (mtime, mtime), follow_symlinks=False) + os.utime(path, (mtime, mtime), follow_symlinks=False) class DataItem: @@ -179,7 +179,7 @@ def checksum(self): def create(self, main_dir): path = main_dir / self.path path.parent.mkdir(parents=True, exist_ok=True) - shutil.copy(str(gettestdata(self.path.name)), str(path)) + shutil.copy(gettestdata(self.path.name), path) _set_fs_attrs(path, self.mode, self.mtime) class DataRandomFile(DataFileOrDir): diff --git a/tests/test_01_fileinfo.py b/tests/test_01_fileinfo.py index 5bcde76..e8a1a7c 100644 --- a/tests/test_01_fileinfo.py +++ b/tests/test_01_fileinfo.py @@ -31,7 +31,7 @@ def test_dir(tmpdir): def test_fileinfo_lazy_checksum(test_dir, monkeypatch): """Check that checksums are calculated lazily. Ref. #35. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) checksum_count = ChecksumCounter() entry = next(filter(lambda i: i.type == 'f', testdata)) monkeypatch.setattr(archive.manifest, "checksum", checksum_count.checksum) diff --git a/tests/test_01_manifest.py b/tests/test_01_manifest.py index 6f31110..692b555 100644 --- a/tests/test_01_manifest.py +++ b/tests/test_01_manifest.py @@ -45,7 +45,7 @@ def test_manifest_from_fileobj(): def test_manifest_from_paths(test_dir, monkeypatch): """Create a manifest reading the files in test_dir. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) manifest = Manifest(paths=[Path("base")]) head = manifest.head assert set(head.keys()) == { @@ -63,7 +63,7 @@ def test_manifest_exclude_nonexistent(test_dir, monkeypatch): This is legal, but should have no effect. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) paths = [Path("base")] excludes = [Path("base", "non-existent.dat")] manifest = Manifest(paths=paths, excludes=excludes) @@ -74,7 +74,7 @@ def test_manifest_exclude_nonexistent(test_dir, monkeypatch): def test_manifest_exclude_file(test_dir, monkeypatch): """Test excludes: excluding one single file. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) paths = [Path("base")] excludes = [Path("base", "msg.txt")] manifest = Manifest(paths=paths, excludes=excludes) @@ -85,7 +85,7 @@ def test_manifest_exclude_file(test_dir, monkeypatch): def test_manifest_exclude_subdir(test_dir, monkeypatch): """Test excludes: excluding a subdirectory. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) paths = [Path("base")] excludes = [Path("base", "data")] manifest = Manifest(paths=paths, excludes=excludes) @@ -96,7 +96,7 @@ def test_manifest_exclude_subdir(test_dir, monkeypatch): def test_manifest_exclude_samelevel(test_dir, monkeypatch): """Test excludes: exclude things explictely named in paths. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) paths = [Path("base", "data"), Path("base", "empty")] excludes = [paths[1]] manifest = Manifest(paths=paths, excludes=excludes) @@ -108,7 +108,7 @@ def test_manifest_exclude_explicit_include(test_dir, monkeypatch): """Test excludes: it is possible to explicitely include files, even if their parent directory is excluded. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) paths = [Path("base"), Path("base", "data", "rnd.dat")] excludes = [Path("base", "data")] manifest = Manifest(paths=paths, excludes=excludes) @@ -119,7 +119,7 @@ def test_manifest_exclude_explicit_include(test_dir, monkeypatch): def test_manifest_from_fileinfos(test_dir, monkeypatch): """Create a manifest providing an iterable of fileinfos. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) fileinfos = FileInfo.iterpaths([Path("base")], set()) manifest = Manifest(fileinfos=fileinfos) head = manifest.head @@ -136,7 +136,7 @@ def test_manifest_from_fileinfos(test_dir, monkeypatch): def test_manifest_sort(test_dir, monkeypatch): """Test the Manifest.sort() method. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) manifest = Manifest(paths=[Path("base")]) check_manifest(manifest, testdata) fileinfos = set(manifest) @@ -175,6 +175,6 @@ def test_manifest_sort(test_dir, monkeypatch): def test_manifest_tags(test_dir, monkeypatch, tags, expected): """Set tags in a manifest reading the files in test_dir. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) manifest = Manifest(paths=[Path("base")], tags=tags) assert manifest.tags == expected diff --git a/tests/test_02_create.py b/tests/test_02_create.py index cbc788e..b07f9e9 100644 --- a/tests/test_02_create.py +++ b/tests/test_02_create.py @@ -54,7 +54,7 @@ def dep_testcase(request, testcase): def test_create(test_dir, monkeypatch, testcase): compression, abspath = testcase require_compression(compression) - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = Path(archive_name(ext=compression, tags=[absflag(abspath)])) if abspath: paths = [test_dir / "base"] @@ -89,7 +89,7 @@ def test_check_content(test_dir, dep_testcase, inclmeta): flag = absflag(abspath) archive_path = test_dir / archive_name(ext=compression, tags=[flag]) outdir = test_dir / "out" - shutil.rmtree(str(outdir), ignore_errors=True) + shutil.rmtree(outdir, ignore_errors=True) outdir.mkdir() if abspath: cwd = outdir / "archive" / test_dir.relative_to(test_dir.anchor) @@ -102,7 +102,7 @@ def test_check_content(test_dir, dep_testcase, inclmeta): assert (outdir / f).is_file() == inclmeta try: sha256 = subprocess.Popen([sha256sum, "--check"], - cwd=str(cwd), stdin=subprocess.PIPE) + cwd=cwd, stdin=subprocess.PIPE) except FileNotFoundError: pytest.skip("%s program not found" % sha256sum) for f in testdata: diff --git a/tests/test_02_diff_manifest.py b/tests/test_02_diff_manifest.py index 422afc3..5e284a6 100644 --- a/tests/test_02_diff_manifest.py +++ b/tests/test_02_diff_manifest.py @@ -37,7 +37,7 @@ def test_dir(tmpdir): @pytest.fixture(scope="function") def test_data(request, test_dir): - shutil.rmtree(str(test_dir / "base"), ignore_errors=True) + shutil.rmtree(test_dir / "base", ignore_errors=True) with Archive().open(test_dir / "archive.tar") as archive: archive.extract(test_dir) return test_dir @@ -45,7 +45,7 @@ def test_data(request, test_dir): def test_diff_manifest_equal(test_data, testname, monkeypatch): """Diff two fileinfo lists having equal content. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") @@ -56,7 +56,7 @@ def test_diff_manifest_equal(test_data, testname, monkeypatch): def test_diff_manifest_metadata(test_data, testname, monkeypatch): """Diff two fileinfo lists having one file's metadata modified. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") @@ -73,12 +73,12 @@ def test_diff_manifest_metadata(test_data, testname, monkeypatch): def test_diff_manifest_modified_file(test_data, testname, monkeypatch): """Diff two fileinfo lists having one file's content modified. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") p = base_dir / "rnd.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + shutil.copy(gettestdata("rnd2.dat"), p) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 @@ -90,7 +90,7 @@ def test_diff_manifest_modified_file(test_data, testname, monkeypatch): def test_diff_manifest_symlink_target(test_data, testname, monkeypatch): """Diff two fileinfo lists having one symlink's target modified. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") @@ -108,7 +108,7 @@ def test_diff_manifest_symlink_target(test_data, testname, monkeypatch): def test_diff_manifest_wrong_type(test_data, testname, monkeypatch): """Diff two fileinfo lists with one entry having a wrong type. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") @@ -127,7 +127,7 @@ def test_diff_manifest_wrong_type(test_data, testname, monkeypatch): def test_diff_manifest_missing_files(test_data, testname, monkeypatch): """Diff two fileinfo lists having one file's name changed. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") @@ -151,12 +151,12 @@ def test_diff_manifest_missing_files(test_data, testname, monkeypatch): def test_diff_manifest_mult(test_data, testname, monkeypatch): """Diff two fileinfo lists having multiple differences. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") pm = base_dir / "data" / "rnd.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(pm)) + shutil.copy(gettestdata("rnd2.dat"), pm) p1 = base_dir / "msg.txt" p2 = base_dir / "o.txt" p1.rename(p2) @@ -181,12 +181,12 @@ def test_diff_manifest_mult(test_data, testname, monkeypatch): def test_diff_manifest_dircontent(test_data, testname, monkeypatch): """Diff two fileinfo lists with one subdirectory missing. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") pd = base_dir / "data" - shutil.rmtree(str(pd)) + shutil.rmtree(pd) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 2 @@ -207,12 +207,12 @@ def test_diff_manifest_add_file_last(test_data, testname, monkeypatch): The implementation of the corresponding command line tool used to have a flaw in this particular case, ref. #55. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") p = base_dir / "zzz.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + shutil.copy(gettestdata("rnd2.dat"), p) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 diff --git a/tests/test_03_create_dedup.py b/tests/test_03_create_dedup.py index cae60ad..73461af 100644 --- a/tests/test_03_create_dedup.py +++ b/tests/test_03_create_dedup.py @@ -31,9 +31,9 @@ def test_dir(tmpdir): setup_testdata(tmpdir, testdata) sf = next(filter(lambda f: f.path == src, testdata)) - os.link(str(tmpdir / src), str(tmpdir / dest_lnk)) + os.link(tmpdir / src, tmpdir / dest_lnk) testdata.append(DataFile(dest_lnk, sf.mode, checksum=sf.checksum)) - shutil.copy(str(tmpdir / src), str(tmpdir / dest_cp)) + shutil.copy(tmpdir / src, tmpdir / dest_cp) testdata.append(DataFile(dest_cp, sf.mode, checksum=sf.checksum)) return tmpdir @@ -55,7 +55,7 @@ def dep_testcase(request, testcase): @pytest.mark.dependency() def test_create(test_dir, monkeypatch, testcase): dedup = testcase - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = Path(archive_name(tags=[dedup.value])) paths = [Path("base")] Archive().create(archive_path, '', paths, dedup=dedup) @@ -72,13 +72,13 @@ def test_check_content(test_dir, dep_testcase): dedup = dep_testcase archive_path = test_dir / archive_name(tags=[dedup.value]) outdir = test_dir / "out" - shutil.rmtree(str(outdir), ignore_errors=True) + shutil.rmtree(outdir, ignore_errors=True) outdir.mkdir() with tarfile.open(str(archive_path), "r") as tarf: tarf.extractall(path=str(outdir)) try: sha256 = subprocess.Popen([sha256sum, "--check"], - cwd=str(outdir), stdin=subprocess.PIPE) + cwd=outdir, stdin=subprocess.PIPE) except FileNotFoundError: pytest.skip("%s program not found" % sha256sum) for f in testdata: diff --git a/tests/test_03_create_errors.py b/tests/test_03_create_errors.py index 4fd3bb0..a264dc8 100644 --- a/tests/test_03_create_errors.py +++ b/tests/test_03_create_errors.py @@ -30,7 +30,7 @@ def test_dir(tmpdir): def test_create_empty(test_dir, testname, monkeypatch): """Creating an empty archive will be refused. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with pytest.raises(ArchiveCreateError): Archive().create(Path(name), "", [], basedir=Path("base")) @@ -38,7 +38,7 @@ def test_create_empty(test_dir, testname, monkeypatch): def test_create_abs_basedir(test_dir, testname, monkeypatch): """Base dir must be a a relative path. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [Path("base")] basedir = test_dir / "base" @@ -48,7 +48,7 @@ def test_create_abs_basedir(test_dir, testname, monkeypatch): def test_create_mixing_abs_rel(test_dir, testname, monkeypatch): """Mixing absolute and relative paths is not allowed. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [ Path("base", "msg.txt"), test_dir / "base" / "data" ] with pytest.raises(ArchiveCreateError): @@ -57,7 +57,7 @@ def test_create_mixing_abs_rel(test_dir, testname, monkeypatch): def test_create_rel_not_in_base(test_dir, testname, monkeypatch): """Relative paths must be in the base directory. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [ Path("other", "rnd.dat") ] with pytest.raises(ArchiveCreateError): @@ -66,7 +66,7 @@ def test_create_rel_not_in_base(test_dir, testname, monkeypatch): def test_create_norm_path(test_dir, testname, monkeypatch): """Items in paths must be normalized. (Issue #6) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [ Path("base"), Path("base/../../../etc/passwd") ] with pytest.raises(ArchiveCreateError): @@ -75,7 +75,7 @@ def test_create_norm_path(test_dir, testname, monkeypatch): def test_create_rel_check_basedir(test_dir, testname, monkeypatch): """Base directory must be a directory. (Issue #9) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("msg.txt") with pytest.raises(ArchiveCreateError): @@ -92,7 +92,7 @@ def test_create_rel_no_manifest_file(test_dir, testname, monkeypatch): /.manifest.yaml to the archive. Obviously, we cannot create such a file for the test. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) base = Path("base") manifest = base / ".manifest.yaml" @@ -108,7 +108,7 @@ def test_create_duplicate_metadata(test_dir, testname, monkeypatch): """Add additional custom metadata to the archive, using a name that is already taken. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("base") with TemporaryFile(dir=str(test_dir)) as tmpf: @@ -124,7 +124,7 @@ def test_create_metadata_vs_content(test_dir, testname, monkeypatch): """Add additional custom metadata to the archive, using a name that conflicts with a content file. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("base") with TemporaryFile(dir=str(test_dir)) as tmpf: @@ -140,7 +140,7 @@ def test_create_fileinfos_missing_checksum(test_dir, testname, monkeypatch): """When an archive is created from precompiled fileinfos, they must already contain suitable checksums. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with monkeypatch.context() as m: m.setattr(FileInfo, "Checksums", ['md5']) @@ -159,7 +159,7 @@ def test_create_manifest_missing_checksum(test_dir, testname, monkeypatch): """Same as last test, but now creating the archive from a precompiled manifest. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with monkeypatch.context() as m: m.setattr(FileInfo, "Checksums", ['md5']) diff --git a/tests/test_03_create_exclude.py b/tests/test_03_create_exclude.py index c0f0fd2..dfd344b 100644 --- a/tests/test_03_create_exclude.py +++ b/tests/test_03_create_exclude.py @@ -31,7 +31,7 @@ def test_dir(tmpdir): def test_create_exclude_file(test_dir, testname, monkeypatch): """Exclude one single file. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [Path("base")] excludes = [Path("base", "msg.txt")] @@ -45,7 +45,7 @@ def test_create_exclude_file(test_dir, testname, monkeypatch): def test_create_exclude_subdir(test_dir, testname, monkeypatch): """Exclude a subdirectory. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [Path("base")] excludes = [Path("base", "data")] @@ -59,7 +59,7 @@ def test_create_exclude_subdir(test_dir, testname, monkeypatch): def test_create_exclude_samelevel(test_dir, testname, monkeypatch): """Exclude a directory explictely named in paths. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [Path("base", "data"), Path("base", "empty")] excludes = [paths[1]] @@ -74,7 +74,7 @@ def test_create_exclude_explicit_include(test_dir, testname, monkeypatch): """Exclude a directory, but explicitely include an item in that directory. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = [Path("base"), Path("base", "data", "rnd1.dat")] excludes = [Path("base", "data")] diff --git a/tests/test_03_create_fileinfos.py b/tests/test_03_create_fileinfos.py index bc3d87b..1da7f1c 100644 --- a/tests/test_03_create_fileinfos.py +++ b/tests/test_03_create_fileinfos.py @@ -30,7 +30,7 @@ def test_dir(tmpdir): def test_create_fileinfos_list(test_dir, monkeypatch): """Create the archive from a list of FileInfo objects. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) fileinfos = list(FileInfo.iterpaths([Path("base")], set())) archive_path = Path("archive-fi-list.tar") Archive().create(archive_path, "", fileinfos=fileinfos) @@ -41,7 +41,7 @@ def test_create_fileinfos_list(test_dir, monkeypatch): def test_create_fileinfos_generator(test_dir, monkeypatch): """Create the archive from FileInfo.iterpaths() which returns a generator. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) fileinfos = FileInfo.iterpaths([Path("base")], set()) archive_path = Path("archive-fi-generator.tar") Archive().create(archive_path, "", fileinfos=fileinfos) @@ -53,7 +53,7 @@ def test_create_fileinfos_manifest(test_dir, monkeypatch): """Create the archive from a Manifest. A Manifest is an iterable of FileInfo objects. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) manifest = Manifest(paths=[Path("base")]) archive_path = Path("archive-fi-manifest.tar") Archive().create(archive_path, "", fileinfos=manifest) @@ -66,7 +66,7 @@ def test_create_fileinfos_subset(test_dir, monkeypatch): This test verifies that creating an archive from fileinfos does not implicitly descend subdirectories. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) excludes = [Path("base", "data", "rnd.dat")] fileinfos = FileInfo.iterpaths([Path("base")], set(excludes)) data = sub_testdata(testdata, excludes[0]) diff --git a/tests/test_03_create_filetype.py b/tests/test_03_create_filetype.py index 03b4fd8..cb9b8be 100644 --- a/tests/test_03_create_filetype.py +++ b/tests/test_03_create_filetype.py @@ -44,7 +44,7 @@ class tmp_fifo(): """ def __init__(self, path): self.path = path - os.mkfifo(str(self.path)) + os.mkfifo(self.path) def __enter__(self): return self.path def __exit__(self, type, value, tb): @@ -53,7 +53,7 @@ def __exit__(self, type, value, tb): def test_create_invalid_file_socket(test_dir, testname, monkeypatch): """Create an archive from a directory containing a socket. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("base") fp = p / "socket" @@ -68,7 +68,7 @@ def test_create_invalid_file_socket(test_dir, testname, monkeypatch): def test_create_invalid_file_fifo(test_dir, testname, monkeypatch): """Create an archive from a directory containing a FIFO. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("base") fp = p / "fifo" diff --git a/tests/test_03_create_misc.py b/tests/test_03_create_misc.py index 4e154ef..e1a7acb 100644 --- a/tests/test_03_create_misc.py +++ b/tests/test_03_create_misc.py @@ -27,7 +27,7 @@ def test_dir(tmpdir): def test_create_default_basedir_rel(test_dir, monkeypatch): """Check the default basedir with relative paths. (Issue #8) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = "archive-rel.tar" p = Path("base", "data") Archive().create(archive_path, "", [p]) @@ -39,7 +39,7 @@ def test_create_default_basedir_rel(test_dir, monkeypatch): def test_create_default_basedir_abs(test_dir, monkeypatch): """Check the default basedir with absolute paths. (Issue #8) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = Path("archive-abs.tar") p = test_dir / Path("base", "data") Archive().create(archive_path, "", [p]) @@ -51,7 +51,7 @@ def test_create_default_basedir_abs(test_dir, monkeypatch): def test_create_sorted(test_dir, monkeypatch): """The entries in the manifest should be sorted. (Issue #11) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = Path("archive-sort.tar") files = [ Path("base", fn) for fn in ("c", "a", "d", "b") ] for p in files: @@ -69,7 +69,7 @@ def test_create_sorted(test_dir, monkeypatch): def test_create_custom_metadata(test_dir, monkeypatch): """Add additional custom metadata to the archive. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = Path("archive-custom-md.tar") p = Path("base", "data") with TemporaryFile(dir=str(test_dir)) as tmpf: @@ -90,7 +90,7 @@ def test_create_custom_metadata(test_dir, monkeypatch): def test_create_add_symlink(test_dir, monkeypatch): """Check adding explicitly adding a symbolic link. (Issue #37) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = "archive-symlink.tar" paths = [Path("base", "data", "misc"), Path("base", "data", "s.dat")] data = [ testdata[i] for i in (1,3,4) ] @@ -108,7 +108,7 @@ def test_create_add_symlink(test_dir, monkeypatch): def test_create_tags(test_dir, monkeypatch, tags, expected): """Test setting tags. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = archive_name(tags=["tags"], counter="create_tags") Archive().create(archive_path, "", [Path("base")], tags=tags) with Archive().open(archive_path) as archive: diff --git a/tests/test_03_create_workdir.py b/tests/test_03_create_workdir.py index 7e48f74..07a0245 100644 --- a/tests/test_03_create_workdir.py +++ b/tests/test_03_create_workdir.py @@ -24,7 +24,7 @@ def test_create_workdir(test_dir, monkeypatch, abs_wd): """Pass an absolute or relative workdir to Archive.create(). (Issue #53) """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) if abs_wd: workdir = test_dir / "work" else: diff --git a/tests/test_03_verify_errors.py b/tests/test_03_verify_errors.py index 0180635..1106f66 100644 --- a/tests/test_03_verify_errors.py +++ b/tests/test_03_verify_errors.py @@ -28,7 +28,7 @@ @pytest.fixture(scope="function") def test_data(tmpdir, monkeypatch): - monkeypatch.chdir(str(tmpdir)) + monkeypatch.chdir(tmpdir) shutil.rmtree("base", ignore_errors=True) setup_testdata(tmpdir, testdata) manifest = Manifest(paths=[Path("base")]) @@ -77,9 +77,9 @@ def test_verify_missing_metadata_item(test_data, testname): def test_verify_missing_file(test_data, testname): name = archive_name(tags=[testname]) path = Path("base", "msg.txt") - mtime_parent = os.stat(str(path.parent)).st_mtime + mtime_parent = os.stat(path.parent).st_mtime path.unlink() - os.utime(str(path.parent), times=(mtime_parent, mtime_parent)) + os.utime(path.parent, times=(mtime_parent, mtime_parent)) create_archive(name) with Archive().open(Path(name)) as archive: with pytest.raises(ArchiveIntegrityError) as err: @@ -110,7 +110,7 @@ def test_verify_wrong_mtime(test_data, testname): name = archive_name(tags=[testname]) path = Path("base", "msg.txt") hour_ago = time.time() - 3600 - os.utime(str(path), times=(hour_ago, hour_ago)) + os.utime(path, times=(hour_ago, hour_ago)) create_archive(name) with Archive().open(Path(name)) as archive: with pytest.raises(ArchiveIntegrityError) as err: @@ -120,14 +120,14 @@ def test_verify_wrong_mtime(test_data, testname): def test_verify_wrong_type(test_data, testname): name = archive_name(tags=[testname]) path = Path("base", "msg.txt") - mode = os.stat(str(path)).st_mode - mtime = os.stat(str(path)).st_mtime - mtime_parent = os.stat(str(path.parent)).st_mtime + mode = os.stat(path).st_mode + mtime = os.stat(path).st_mtime + mtime_parent = os.stat(path.parent).st_mtime path.unlink() path.mkdir() path.chmod(mode) - os.utime(str(path), times=(mtime, mtime)) - os.utime(str(path.parent), times=(mtime_parent, mtime_parent)) + os.utime(path, times=(mtime, mtime)) + os.utime(path.parent, times=(mtime_parent, mtime_parent)) create_archive(name) with Archive().open(Path(name)) as archive: with pytest.raises(ArchiveIntegrityError) as err: @@ -137,14 +137,14 @@ def test_verify_wrong_type(test_data, testname): def test_verify_wrong_checksum(test_data, testname): name = archive_name(tags=[testname]) path = Path("base", "data", "rnd.dat") - stat = os.stat(str(path)) + stat = os.stat(path) mode = stat.st_mode mtime = stat.st_mtime size = stat.st_size with path.open("wb") as f: f.write(b'0' * size) path.chmod(mode) - os.utime(str(path), times=(mtime, mtime)) + os.utime(path, times=(mtime, mtime)) create_archive(name) with Archive().open(Path(name)) as archive: with pytest.raises(ArchiveIntegrityError) as err: diff --git a/tests/test_04_cli.py b/tests/test_04_cli.py index 0c3d4c4..01650c0 100644 --- a/tests/test_04_cli.py +++ b/tests/test_04_cli.py @@ -53,7 +53,7 @@ def dep_testcase(request, testcase): def test_cli_create(test_dir, monkeypatch, testcase): compression, abspath = testcase require_compression(compression) - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = archive_name(ext=compression, tags=[absflag(abspath)]) if abspath: paths = str(test_dir / "base") @@ -110,7 +110,7 @@ def test_cli_checksums(test_dir, dep_testcase): args = ["ls", "--format=checksum", str(archive_path)] callscript("archive-tool.py", args, stdout=f) f.seek(0) - cwd = None if abspath else str(test_dir) + cwd = None if abspath else test_dir try: sha256 = subprocess.Popen([sha256sum, "--check"], cwd=cwd, stdin=subprocess.PIPE) diff --git a/tests/test_04_cli_check.py b/tests/test_04_cli_check.py index 4e0d2c1..b6eb1d2 100644 --- a/tests/test_04_cli_check.py +++ b/tests/test_04_cli_check.py @@ -31,8 +31,7 @@ def test_dir(tmpdir): @pytest.fixture(scope="function") def copy_data(testname, test_dir): copy_dir = test_dir / testname - shutil.copytree(str(test_dir / "base"), str(copy_dir / "base"), - symlinks=True) + shutil.copytree(test_dir / "base", copy_dir / "base", symlinks=True) return copy_dir @pytest.fixture(scope="function") @@ -45,7 +44,7 @@ def extract_archive(testname, test_dir): return check_dir def test_check_allmatch(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -57,7 +56,7 @@ def test_check_allmatch_default_files(test_dir, copy_data, monkeypatch): Rely on the fact that the file argument defaults to the archives's basedir. Ref. #45. """ - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", str(test_dir / "archive.tar")] callscript("archive-tool.py", args, stdout=f) @@ -65,7 +64,7 @@ def test_check_allmatch_default_files(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == set() def test_check_add_file(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "new_msg.txt") with fp.open("wt") as f: print("Greeting!", file=f) @@ -76,10 +75,10 @@ def test_check_add_file(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == {str(fp)} def test_check_change_type(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "s.dat") fp.unlink() - shutil.copy2(str(Path("base", "data", "rnd.dat")), str(fp)) + shutil.copy2(Path("base", "data", "rnd.dat"), fp) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -87,7 +86,7 @@ def test_check_change_type(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == {str(fp)} def test_check_touch_file(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "data", "rnd.dat") fp.touch() with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: @@ -97,12 +96,12 @@ def test_check_touch_file(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == {str(fp)} def test_check_modify_file(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "data", "rnd.dat") st = fp.stat() with fp.open("wb") as f: f.write(b" " * st.st_size) - os.utime(str(fp), (st.st_mtime, st.st_mtime)) + os.utime(fp, (st.st_mtime, st.st_mtime)) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -110,7 +109,7 @@ def test_check_modify_file(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == {str(fp)} def test_check_symlink_target(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "s.dat") fp.unlink() fp.symlink_to(Path("msg.txt")) @@ -121,7 +120,7 @@ def test_check_symlink_target(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == {str(fp)} def test_check_present_allmatch(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -129,7 +128,7 @@ def test_check_present_allmatch(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == all_test_files def test_check_present_add_file(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "new_msg.txt") with fp.open("wt") as f: print("Greeting!", file=f) @@ -140,10 +139,10 @@ def test_check_present_add_file(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == all_test_files - {str(fp)} def test_check_present_change_type(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "s.dat") fp.unlink() - shutil.copy2(str(Path("base", "data", "rnd.dat")), str(fp)) + shutil.copy2(Path("base", "data", "rnd.dat"), fp) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -151,7 +150,7 @@ def test_check_present_change_type(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == all_test_files - {str(fp)} def test_check_present_touch_file(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "data", "rnd.dat") fp.touch() with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: @@ -161,12 +160,12 @@ def test_check_present_touch_file(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == all_test_files - {str(fp)} def test_check_present_modify_file(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "data", "rnd.dat") st = fp.stat() with fp.open("wb") as f: f.write(b" " * st.st_size) - os.utime(str(fp), (st.st_mtime, st.st_mtime)) + os.utime(fp, (st.st_mtime, st.st_mtime)) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -174,7 +173,7 @@ def test_check_present_modify_file(test_dir, copy_data, monkeypatch): assert set(get_output(f)) == all_test_files - {str(fp)} def test_check_present_symlink_target(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) fp = Path("base", "s.dat") fp.unlink() fp.symlink_to(Path("msg.txt")) @@ -192,7 +191,7 @@ def test_check_extract_archive(test_dir, extract_archive, monkeypatch): file to be missing in the archive, even though these metadata are not listed in the manifest. Issue #25. """ - monkeypatch.chdir(str(extract_archive)) + monkeypatch.chdir(extract_archive) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) @@ -215,7 +214,7 @@ def test_check_extract_archive_custom_metadata(test_dir, testname, monkeypatch): archive.create(archive_path, "", [Path("base")], workdir=test_dir) check_dir = test_dir / testname check_dir.mkdir() - monkeypatch.chdir(str(check_dir)) + monkeypatch.chdir(check_dir) with tarfile.open(str(archive_path), "r") as tarf: tarf.extractall() with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: @@ -233,7 +232,7 @@ def test_check_present_extract_archive(test_dir, extract_archive, monkeypatch): metadata such as the manifest file, even though these metadata are not listed in the manifest. Issue #25. """ - monkeypatch.chdir(str(extract_archive)) + monkeypatch.chdir(extract_archive) all_files = all_test_files | { 'base/.manifest.yaml' } with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] @@ -250,7 +249,7 @@ def test_check_prefix_allmatch(test_dir, copy_data, monkeypatch): """ archive_path = test_dir / "archive.tar" prefix = Path("base", "data") - monkeypatch.chdir(str(copy_data / prefix)) + monkeypatch.chdir(copy_data / prefix) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--prefix", str(prefix), str(archive_path), "."] callscript("archive-tool.py", args, stdout=f) @@ -265,7 +264,7 @@ def test_check_prefix_present_allmatch(test_dir, copy_data, monkeypatch): """ archive_path = test_dir / "archive.tar" prefix = Path("base", "data") - monkeypatch.chdir(str(copy_data / prefix)) + monkeypatch.chdir(copy_data / prefix) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--prefix", str(prefix), "--present", str(archive_path), "."] @@ -282,7 +281,7 @@ def test_check_prefix_extract(test_dir, extract_archive, monkeypatch): """ archive_path = test_dir / "archive.tar" prefix = Path("base") - monkeypatch.chdir(str(extract_archive / prefix)) + monkeypatch.chdir(extract_archive / prefix) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["check", "--prefix", str(prefix), str(archive_path), "."] callscript("archive-tool.py", args, stdout=f) @@ -297,7 +296,7 @@ def test_check_prefix_present_extract(test_dir, extract_archive, monkeypatch): """ archive_path = test_dir / "archive.tar" prefix = Path("base") - monkeypatch.chdir(str(extract_archive / prefix)) + monkeypatch.chdir(extract_archive / prefix) all_files = { str(f.path.relative_to(prefix)) for f in testdata if f.type in {'f', 'l'} @@ -310,7 +309,7 @@ def test_check_prefix_present_extract(test_dir, extract_archive, monkeypatch): assert set(get_output(f)) == all_files def test_check_stdin(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) old_file = Path("base", "data", "rnd.dat") new_file = Path("base", "new_msg.txt") with new_file.open("wt") as f: @@ -318,15 +317,15 @@ def test_check_stdin(test_dir, copy_data, monkeypatch): with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_out: args = ["check", "--stdin", str(test_dir / "archive.tar")] with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_in: - print(str(old_file), file=f_in) - print(str(new_file), file=f_in) + print(old_file, file=f_in) + print(new_file, file=f_in) f_in.seek(0) callscript("archive-tool.py", args, stdin=f_in, stdout=f_out) f_out.seek(0) assert set(get_output(f_out)) == {str(new_file)} def test_check_stdin_present(test_dir, copy_data, monkeypatch): - monkeypatch.chdir(str(copy_data)) + monkeypatch.chdir(copy_data) old_file = Path("base", "data", "rnd.dat") new_file = Path("base", "new_msg.txt") with new_file.open("wt") as f: @@ -334,8 +333,8 @@ def test_check_stdin_present(test_dir, copy_data, monkeypatch): with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_out: args = ["check", "--present", "--stdin", str(test_dir / "archive.tar")] with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_in: - print(str(old_file), file=f_in) - print(str(new_file), file=f_in) + print(old_file, file=f_in) + print(new_file, file=f_in) f_in.seek(0) callscript("archive-tool.py", args, stdin=f_in, stdout=f_out) f_out.seek(0) diff --git a/tests/test_04_cli_create_dedup.py b/tests/test_04_cli_create_dedup.py index eaf09ff..2422fec 100644 --- a/tests/test_04_cli_create_dedup.py +++ b/tests/test_04_cli_create_dedup.py @@ -29,9 +29,9 @@ def test_dir(tmpdir): setup_testdata(tmpdir, testdata) sf = next(filter(lambda f: f.path == src, testdata)) - os.link(str(tmpdir / src), str(tmpdir / dest_lnk)) + os.link(tmpdir / src, tmpdir / dest_lnk) testdata.append(DataFile(dest_lnk, sf.mode, checksum=sf.checksum)) - shutil.copy(str(tmpdir / src), str(tmpdir / dest_cp)) + shutil.copy(tmpdir / src, tmpdir / dest_cp) testdata.append(DataFile(dest_cp, sf.mode, checksum=sf.checksum)) return tmpdir @@ -53,7 +53,7 @@ def dep_testcase(request, testcase): @pytest.mark.dependency() def test_cli_create(test_dir, monkeypatch, testcase): dedup = testcase - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = archive_name(tags=[dedup.value]) basedir = "base" args = ["create", "--deduplicate", dedup.value, archive_path, basedir] diff --git a/tests/test_04_cli_create_exclude.py b/tests/test_04_cli_create_exclude.py index 0e16062..fb5b61b 100644 --- a/tests/test_04_cli_create_exclude.py +++ b/tests/test_04_cli_create_exclude.py @@ -31,7 +31,7 @@ def test_dir(tmpdir): def test_cli_create_exclude_dir(test_dir, testname, monkeypatch): """Exclude a single directory. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = "base" exclude = Path("base", "data") @@ -45,7 +45,7 @@ def test_cli_create_exclude_dir(test_dir, testname, monkeypatch): def test_cli_create_exclude_mult(test_dir, testname, monkeypatch): """Exclude multiple items. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) paths = "base" excludes = [ @@ -68,7 +68,7 @@ def test_cli_create_exclude_include(test_dir, testname, monkeypatch): """Exclude a directory, but explicitely include an item in that directory. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) exclude = Path("base", "data") include = Path("base", "data", "rnd1.dat") diff --git a/tests/test_04_cli_create_misc.py b/tests/test_04_cli_create_misc.py index c429a65..639a3f0 100644 --- a/tests/test_04_cli_create_misc.py +++ b/tests/test_04_cli_create_misc.py @@ -32,7 +32,7 @@ def test_dir(tmpdir): def test_cli_create_tags(test_dir, monkeypatch, tags, expected): """Set tags using the --tags argument. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) archive_path = archive_name(tags=["tags"], counter="cli_create_tags") args = ["create"] for t in tags: diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index e6a620a..32956fb 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -37,7 +37,7 @@ def test_dir(tmpdir): @pytest.fixture(scope="function") def test_data(request, test_dir): - shutil.rmtree(str(test_dir / "base"), ignore_errors=True) + shutil.rmtree(test_dir / "base", ignore_errors=True) with Archive().open(test_dir / "archive-rel.tar") as archive: archive.extract(test_dir) return test_dir @@ -46,7 +46,7 @@ def test_data(request, test_dir): def test_diff_equal(test_data, testname, monkeypatch, abspath): """Diff two archives having equal content. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -66,7 +66,7 @@ def test_diff_equal(test_data, testname, monkeypatch, abspath): def test_diff_modified_file(test_data, testname, monkeypatch, abspath): """Diff two archives having one file's content modified. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -74,7 +74,7 @@ def test_diff_modified_file(test_data, testname, monkeypatch, abspath): archive_ref_path = Path("archive-rel.tar") base_dir = Path("base") p = base_dir / "rnd.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + shutil.copy(gettestdata("rnd2.dat"), p) flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) @@ -91,7 +91,7 @@ def test_diff_modified_file(test_data, testname, monkeypatch, abspath): def test_diff_symlink_target(test_data, testname, monkeypatch, abspath): """Diff two archives having one symlink's target modified. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -117,7 +117,7 @@ def test_diff_symlink_target(test_data, testname, monkeypatch, abspath): def test_diff_wrong_type(test_data, testname, monkeypatch, abspath): """Diff two archives with one entry having a wrong type. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -143,7 +143,7 @@ def test_diff_wrong_type(test_data, testname, monkeypatch, abspath): def test_diff_missing_files(test_data, testname, monkeypatch, abspath): """Diff two archives having one file's name changed. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -169,7 +169,7 @@ def test_diff_missing_files(test_data, testname, monkeypatch, abspath): def test_diff_mult(test_data, testname, monkeypatch, abspath): """Diff two archives having multiple differences. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -177,7 +177,7 @@ def test_diff_mult(test_data, testname, monkeypatch, abspath): archive_ref_path = Path("archive-rel.tar") base_dir = Path("base") pm = base_dir / "data" / "rnd.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(pm)) + shutil.copy(gettestdata("rnd2.dat"), pm) p1 = base_dir / "msg.txt" p2 = base_dir / "o.txt" p1.rename(p2) @@ -200,7 +200,7 @@ def test_diff_metadata(test_data, testname, monkeypatch, abspath): """Diff two archives having one file's file system metadata modified. This difference should be ignored by default. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -231,7 +231,7 @@ def test_diff_metadata(test_data, testname, monkeypatch, abspath): def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): """Diff two archives with one subdirectory missing. """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -239,7 +239,7 @@ def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): archive_ref_path = Path("archive-rel.tar") base_dir = Path("base") pd = base_dir / "data" / "zz" - shutil.rmtree(str(pd)) + shutil.rmtree(pd) flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) @@ -264,7 +264,7 @@ def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): """Diff archives having content in a missing directory. Ref. #56 """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: base_dir = test_data / "base" else: @@ -275,7 +275,7 @@ def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): archive_a = Path(archive_name(ext="bz2", tags=[testname, "a", flag])) Archive().create(archive_a, "bz2", [base_dir], excludes=excl_a) pm = pd / "rnd2.dat" - shutil.copy(str(gettestdata("rnd.dat")), str(pm)) + shutil.copy(gettestdata("rnd.dat"), pm) incl_b = [ base_dir, pd / "aa", pd / "rnd2.dat", pd / "zz" ] excl_b = [ pd, pd / "rnd.dat" ] flag = absflag(abspath) @@ -305,7 +305,7 @@ def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): def test_diff_extrafile_end(test_data, testname, monkeypatch, abspath): """The first archives has an extra entry as last item. Ref. #55 """ - monkeypatch.chdir(str(test_data)) + monkeypatch.chdir(test_data) if abspath: archive_ref_path = Path("archive-abs.tar") base_dir = test_data / "base" @@ -313,7 +313,7 @@ def test_diff_extrafile_end(test_data, testname, monkeypatch, abspath): archive_ref_path = Path("archive-rel.tar") base_dir = Path("base") p = base_dir / "zzz.dat" - shutil.copy(str(gettestdata("rnd2.dat")), str(p)) + shutil.copy(gettestdata("rnd2.dat"), p) flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) diff --git a/tests/test_04_cli_error.py b/tests/test_04_cli_error.py index 8eb47e1..00799d3 100644 --- a/tests/test_04_cli_error.py +++ b/tests/test_04_cli_error.py @@ -27,7 +27,7 @@ def test_dir(tmpdir): return tmpdir def test_cli_helpmessage(test_dir, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["-h"] callscript("archive-tool.py", args, stdout=f) @@ -36,7 +36,7 @@ def test_cli_helpmessage(test_dir, monkeypatch): assert line.startswith("usage: archive-tool.py ") def test_cli_missing_command(test_dir, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = [] callscript("archive-tool.py", args, returncode=2, stderr=f) @@ -50,7 +50,7 @@ def test_cli_missing_command(test_dir, monkeypatch): assert "subcommand is required" in line def test_cli_bogus_command(test_dir, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["bogus_cmd"] callscript("archive-tool.py", args, returncode=2, stderr=f) @@ -64,7 +64,7 @@ def test_cli_bogus_command(test_dir, monkeypatch): assert "invalid choice: 'bogus_cmd'" in line def test_cli_create_bogus_compression(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["create", "--compression=bogus_comp", name, "base"] @@ -79,7 +79,7 @@ def test_cli_create_bogus_compression(test_dir, testname, monkeypatch): assert "--compression: invalid choice: 'bogus_comp'" in line def test_cli_ls_bogus_format(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) @@ -96,7 +96,7 @@ def test_cli_ls_bogus_format(test_dir, testname, monkeypatch): assert "--format: invalid choice: 'bogus_fmt'" in line def test_cli_create_normalized_path(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["create", name, "base/empty/.."] @@ -106,7 +106,7 @@ def test_cli_create_normalized_path(test_dir, testname, monkeypatch): assert "invalid path 'base/empty/..': must be normalized" in line def test_cli_create_rel_start_basedir(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["create", "--basedir=base/data", name, "base/msg.txt"] @@ -117,7 +117,7 @@ def test_cli_create_rel_start_basedir(test_dir, testname, monkeypatch): "base directory base/data") in line def test_cli_ls_archive_not_found(test_dir, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: args = ["ls", "bogus.tar"] callscript("archive-tool.py", args, returncode=1, stderr=f) @@ -126,7 +126,7 @@ def test_cli_ls_archive_not_found(test_dir, monkeypatch): assert "No such file or directory: 'bogus.tar'" in line def test_cli_ls_checksum_invalid_hash(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) @@ -138,7 +138,7 @@ def test_cli_ls_checksum_invalid_hash(test_dir, testname, monkeypatch): assert "'bogus' hashes not available" in line def test_cli_info_missing_entry(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) @@ -150,7 +150,7 @@ def test_cli_info_missing_entry(test_dir, testname, monkeypatch): assert "base/data/not-present: not found in archive" in line def test_cli_integrity_no_manifest(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) with tarfile.open(name, "w") as tarf: tarf.add("base", recursive=True) @@ -162,7 +162,7 @@ def test_cli_integrity_no_manifest(test_dir, testname, monkeypatch): assert "metadata item '.manifest.yaml' not found" in line def test_cli_integrity_missing_file(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) base = Path("base") missing = base / "data" / "not-present" @@ -171,9 +171,9 @@ def test_cli_integrity_missing_file(test_dir, testname, monkeypatch): manifest = Manifest(paths=[base]) with open("manifest.yaml", "wb") as f: manifest.write(f) - mtime_parent = os.stat(str(missing.parent)).st_mtime + mtime_parent = os.stat(missing.parent).st_mtime missing.unlink() - os.utime(str(missing.parent), times=(mtime_parent, mtime_parent)) + os.utime(missing.parent, times=(mtime_parent, mtime_parent)) with tarfile.open(name, "w") as tarf: with open("manifest.yaml", "rb") as f: manifest_info = tarf.gettarinfo(arcname="base/.manifest.yaml", @@ -189,7 +189,7 @@ def test_cli_integrity_missing_file(test_dir, testname, monkeypatch): assert "%s:%s: missing" % (name, missing) in line def test_cli_check_stdin_and_files(test_dir, testname, monkeypatch): - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) diff --git a/tests/test_04_cli_find.py b/tests/test_04_cli_find.py index aa962e9..7bb3d21 100644 --- a/tests/test_04_cli_find.py +++ b/tests/test_04_cli_find.py @@ -59,7 +59,7 @@ def test_dir(tmpdir): setup_testdata(tmpdir, data) Archive().create(rel_paths[i], "bz2", [base]) Archive().create(abs_paths[i], "bz2", [tmpdir / base]) - shutil.rmtree(str(base)) + shutil.rmtree(base) return tmpdir @pytest.mark.parametrize("abspath", [False, True]) diff --git a/tests/test_04_cli_warn.py b/tests/test_04_cli_warn.py index 3f48c1b..1a547d6 100644 --- a/tests/test_04_cli_warn.py +++ b/tests/test_04_cli_warn.py @@ -44,7 +44,7 @@ def test_cli_warn_ignore_socket(test_dir, testname, monkeypatch): archive-tool.py should issue a warning that the socket has been ignored, but otherwise proceed to create the archive. """ - monkeypatch.chdir(str(test_dir)) + monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) basedir = Path("base") fp = basedir / "socket" diff --git a/tests/test_05_mailarchive_create.py b/tests/test_05_mailarchive_create.py index 47d6c7c..ff32058 100644 --- a/tests/test_05_mailarchive_create.py +++ b/tests/test_05_mailarchive_create.py @@ -42,7 +42,7 @@ def test_create_mailarchive(tmpdir, monkeypatch, testcase): if testcase == "abs": archive_path = tmpdir / "mailarchive-abs.tar.xz" else: - monkeypatch.chdir(str(tmpdir)) + monkeypatch.chdir(tmpdir) archive_path = "mailarchive-rel.tar.xz" archive = MailArchive() archive.create(archive_path, getmsgs(), server="imap.example.org") From 78a25e340831df1bd713c6afad8525892a290af6 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 20:20:22 +0200 Subject: [PATCH 059/138] Remove conversion from Path to str in some more, not so obvious cases --- archive/archive.py | 4 +-- archive/manifest.py | 2 +- tests/test_03_create_dedup.py | 2 +- tests/test_03_create_errors.py | 4 +-- tests/test_03_create_misc.py | 2 +- tests/test_03_verify_errors.py | 2 +- tests/test_04_cli.py | 6 ++-- tests/test_04_cli_check.py | 54 +++++++++++++++++----------------- tests/test_04_cli_diff.py | 26 ++++++++-------- tests/test_04_cli_error.py | 26 ++++++++-------- tests/test_04_cli_find.py | 14 ++++----- tests/test_04_cli_warn.py | 2 +- 12 files changed, 72 insertions(+), 72 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index c375365..0761cda 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -102,7 +102,7 @@ def create(self, path, compression, paths=None, fileinfos=None, return self def _create(self, mode): - with tarfile.open(str(self.path), mode) as tarf: + with tarfile.open(self.path, mode) as tarf: with tempfile.TemporaryFile() as tmpf: self.manifest.write(tmpf) tmpf.seek(0) @@ -218,7 +218,7 @@ def add_metadata(self, name, fileobj, mode=0o444): def open(self, path): self.path = path try: - self._file = tarfile.open(str(self.path), 'r') + self._file = tarfile.open(self.path, 'r') except OSError as e: raise ArchiveReadError(str(e)) md = self.get_metadata(".manifest.yaml") diff --git a/archive/manifest.py b/archive/manifest.py index 505b147..307af0e 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -129,7 +129,7 @@ def __str__(self): if self.type == 'l': p = "%s -> %s" % (self.path, self.target) else: - p = str(self.path) + p = self.path return "%s %s %s %s %s" % (m, ug, s, d, p) @classmethod diff --git a/tests/test_03_create_dedup.py b/tests/test_03_create_dedup.py index 73461af..b8d5d49 100644 --- a/tests/test_03_create_dedup.py +++ b/tests/test_03_create_dedup.py @@ -74,7 +74,7 @@ def test_check_content(test_dir, dep_testcase): outdir = test_dir / "out" shutil.rmtree(outdir, ignore_errors=True) outdir.mkdir() - with tarfile.open(str(archive_path), "r") as tarf: + with tarfile.open(archive_path, "r") as tarf: tarf.extractall(path=str(outdir)) try: sha256 = subprocess.Popen([sha256sum, "--check"], diff --git a/tests/test_03_create_errors.py b/tests/test_03_create_errors.py index a264dc8..1a0fd84 100644 --- a/tests/test_03_create_errors.py +++ b/tests/test_03_create_errors.py @@ -111,7 +111,7 @@ def test_create_duplicate_metadata(test_dir, testname, monkeypatch): monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("base") - with TemporaryFile(dir=str(test_dir)) as tmpf: + with TemporaryFile(dir=test_dir) as tmpf: archive = Archive() tmpf.write("Hello world!\n".encode("ascii")) tmpf.seek(0) @@ -127,7 +127,7 @@ def test_create_metadata_vs_content(test_dir, testname, monkeypatch): monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) p = Path("base") - with TemporaryFile(dir=str(test_dir)) as tmpf: + with TemporaryFile(dir=test_dir) as tmpf: archive = Archive() tmpf.write("Hello world!\n".encode("ascii")) tmpf.seek(0) diff --git a/tests/test_03_create_misc.py b/tests/test_03_create_misc.py index e1a7acb..58d9aa2 100644 --- a/tests/test_03_create_misc.py +++ b/tests/test_03_create_misc.py @@ -72,7 +72,7 @@ def test_create_custom_metadata(test_dir, monkeypatch): monkeypatch.chdir(test_dir) archive_path = Path("archive-custom-md.tar") p = Path("base", "data") - with TemporaryFile(dir=str(test_dir)) as tmpf: + with TemporaryFile(dir=test_dir) as tmpf: archive = Archive() tmpf.write("Hello world!\n".encode("ascii")) tmpf.seek(0) diff --git a/tests/test_03_verify_errors.py b/tests/test_03_verify_errors.py index 1106f66..0d58dee 100644 --- a/tests/test_03_verify_errors.py +++ b/tests/test_03_verify_errors.py @@ -61,7 +61,7 @@ def test_verify_missing_metadata_item(test_data, testname): manifest.add_metadata(Path("base", ".manifest.yaml")) manifest.add_metadata(Path("base", ".msg.txt")) with tarfile.open(name, "w") as tarf: - with tempfile.TemporaryFile(dir=str(test_data)) as tmpf: + with tempfile.TemporaryFile(dir=test_data) as tmpf: manifest.write(tmpf) tmpf.seek(0) ti = tarf.gettarinfo(arcname="base/.manifest.yaml", diff --git a/tests/test_04_cli.py b/tests/test_04_cli.py index 01650c0..aee2ecb 100644 --- a/tests/test_04_cli.py +++ b/tests/test_04_cli.py @@ -85,7 +85,7 @@ def test_cli_ls(test_dir, dep_testcase): flag = absflag(abspath) archive_path = test_dir / archive_name(ext=compression, tags=[flag]) prefix_dir = test_dir if abspath else Path(".") - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["ls", str(archive_path)] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -106,7 +106,7 @@ def test_cli_checksums(test_dir, dep_testcase): compression, abspath = dep_testcase flag = absflag(abspath) archive_path = test_dir / archive_name(ext=compression, tags=[flag]) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["ls", "--format=checksum", str(archive_path)] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -133,7 +133,7 @@ def test_cli_info(test_dir, dep_testcase): for entry in testdata: if entry.type in types_done: continue - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["info", str(archive_path), str(prefix_dir / entry.path)] callscript("archive-tool.py", args, stdout=f) f.seek(0) diff --git a/tests/test_04_cli_check.py b/tests/test_04_cli_check.py index b6eb1d2..c86650c 100644 --- a/tests/test_04_cli_check.py +++ b/tests/test_04_cli_check.py @@ -39,13 +39,13 @@ def extract_archive(testname, test_dir): archive_path = test_dir / "archive.tar" check_dir = test_dir / testname check_dir.mkdir() - with tarfile.open(str(archive_path), "r") as tarf: + with tarfile.open(archive_path, "r") as tarf: tarf.extractall(path=str(check_dir)) return check_dir def test_check_allmatch(test_dir, copy_data, monkeypatch): monkeypatch.chdir(copy_data) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -57,7 +57,7 @@ def test_check_allmatch_default_files(test_dir, copy_data, monkeypatch): basedir. Ref. #45. """ monkeypatch.chdir(copy_data) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar")] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -68,7 +68,7 @@ def test_check_add_file(test_dir, copy_data, monkeypatch): fp = Path("base", "new_msg.txt") with fp.open("wt") as f: print("Greeting!", file=f) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -79,7 +79,7 @@ def test_check_change_type(test_dir, copy_data, monkeypatch): fp = Path("base", "s.dat") fp.unlink() shutil.copy2(Path("base", "data", "rnd.dat"), fp) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -89,7 +89,7 @@ def test_check_touch_file(test_dir, copy_data, monkeypatch): monkeypatch.chdir(copy_data) fp = Path("base", "data", "rnd.dat") fp.touch() - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -102,7 +102,7 @@ def test_check_modify_file(test_dir, copy_data, monkeypatch): with fp.open("wb") as f: f.write(b" " * st.st_size) os.utime(fp, (st.st_mtime, st.st_mtime)) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -113,7 +113,7 @@ def test_check_symlink_target(test_dir, copy_data, monkeypatch): fp = Path("base", "s.dat") fp.unlink() fp.symlink_to(Path("msg.txt")) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -121,7 +121,7 @@ def test_check_symlink_target(test_dir, copy_data, monkeypatch): def test_check_present_allmatch(test_dir, copy_data, monkeypatch): monkeypatch.chdir(copy_data) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -132,7 +132,7 @@ def test_check_present_add_file(test_dir, copy_data, monkeypatch): fp = Path("base", "new_msg.txt") with fp.open("wt") as f: print("Greeting!", file=f) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -143,7 +143,7 @@ def test_check_present_change_type(test_dir, copy_data, monkeypatch): fp = Path("base", "s.dat") fp.unlink() shutil.copy2(Path("base", "data", "rnd.dat"), fp) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -153,7 +153,7 @@ def test_check_present_touch_file(test_dir, copy_data, monkeypatch): monkeypatch.chdir(copy_data) fp = Path("base", "data", "rnd.dat") fp.touch() - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -166,7 +166,7 @@ def test_check_present_modify_file(test_dir, copy_data, monkeypatch): with fp.open("wb") as f: f.write(b" " * st.st_size) os.utime(fp, (st.st_mtime, st.st_mtime)) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -177,7 +177,7 @@ def test_check_present_symlink_target(test_dir, copy_data, monkeypatch): fp = Path("base", "s.dat") fp.unlink() fp.symlink_to(Path("msg.txt")) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -192,7 +192,7 @@ def test_check_extract_archive(test_dir, extract_archive, monkeypatch): not listed in the manifest. Issue #25. """ monkeypatch.chdir(extract_archive) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -206,7 +206,7 @@ def test_check_extract_archive_custom_metadata(test_dir, testname, monkeypatch): having custom metadata. Issue #25. """ archive_path = test_dir / "archive-custom-md.tar" - with TemporaryFile(dir=str(test_dir)) as tmpf: + with TemporaryFile(dir=test_dir) as tmpf: archive = Archive() tmpf.write("Hello world!\n".encode("ascii")) tmpf.seek(0) @@ -215,9 +215,9 @@ def test_check_extract_archive_custom_metadata(test_dir, testname, monkeypatch): check_dir = test_dir / testname check_dir.mkdir() monkeypatch.chdir(check_dir) - with tarfile.open(str(archive_path), "r") as tarf: + with tarfile.open(archive_path, "r") as tarf: tarf.extractall() - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", str(archive_path), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -234,7 +234,7 @@ def test_check_present_extract_archive(test_dir, extract_archive, monkeypatch): """ monkeypatch.chdir(extract_archive) all_files = all_test_files | { 'base/.manifest.yaml' } - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--present", str(test_dir / "archive.tar"), "base"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -250,7 +250,7 @@ def test_check_prefix_allmatch(test_dir, copy_data, monkeypatch): archive_path = test_dir / "archive.tar" prefix = Path("base", "data") monkeypatch.chdir(copy_data / prefix) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--prefix", str(prefix), str(archive_path), "."] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -265,7 +265,7 @@ def test_check_prefix_present_allmatch(test_dir, copy_data, monkeypatch): archive_path = test_dir / "archive.tar" prefix = Path("base", "data") monkeypatch.chdir(copy_data / prefix) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--prefix", str(prefix), "--present", str(archive_path), "."] callscript("archive-tool.py", args, stdout=f) @@ -282,7 +282,7 @@ def test_check_prefix_extract(test_dir, extract_archive, monkeypatch): archive_path = test_dir / "archive.tar" prefix = Path("base") monkeypatch.chdir(extract_archive / prefix) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--prefix", str(prefix), str(archive_path), "."] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -301,7 +301,7 @@ def test_check_prefix_present_extract(test_dir, extract_archive, monkeypatch): str(f.path.relative_to(prefix)) for f in testdata if f.type in {'f', 'l'} } | { '.manifest.yaml' } - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--prefix", str(prefix), "--present", str(archive_path), "."] callscript("archive-tool.py", args, stdout=f) @@ -314,9 +314,9 @@ def test_check_stdin(test_dir, copy_data, monkeypatch): new_file = Path("base", "new_msg.txt") with new_file.open("wt") as f: print("Greeting!", file=f) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_out: + with TemporaryFile(mode="w+t", dir=test_dir) as f_out: args = ["check", "--stdin", str(test_dir / "archive.tar")] - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_in: + with TemporaryFile(mode="w+t", dir=test_dir) as f_in: print(old_file, file=f_in) print(new_file, file=f_in) f_in.seek(0) @@ -330,9 +330,9 @@ def test_check_stdin_present(test_dir, copy_data, monkeypatch): new_file = Path("base", "new_msg.txt") with new_file.open("wt") as f: print("Greeting!", file=f) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_out: + with TemporaryFile(mode="w+t", dir=test_dir) as f_out: args = ["check", "--present", "--stdin", str(test_dir / "archive.tar")] - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f_in: + with TemporaryFile(mode="w+t", dir=test_dir) as f_in: print(old_file, file=f_in) print(new_file, file=f_in) f_in.seek(0) diff --git a/tests/test_04_cli_diff.py b/tests/test_04_cli_diff.py index 32956fb..0d31f2e 100644 --- a/tests/test_04_cli_diff.py +++ b/tests/test_04_cli_diff.py @@ -56,7 +56,7 @@ def test_diff_equal(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -78,7 +78,7 @@ def test_diff_modified_file(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=101, stdout=f) f.seek(0) @@ -104,7 +104,7 @@ def test_diff_symlink_target(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=101, stdout=f) f.seek(0) @@ -130,7 +130,7 @@ def test_diff_wrong_type(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) @@ -156,7 +156,7 @@ def test_diff_missing_files(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) @@ -184,7 +184,7 @@ def test_diff_mult(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) @@ -212,12 +212,12 @@ def test_diff_metadata(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, stdout=f) f.seek(0) assert list(get_output(f)) == [] - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", "--report-meta", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=100, stdout=f) @@ -243,7 +243,7 @@ def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) @@ -251,7 +251,7 @@ def test_diff_missing_dir(test_data, testname, monkeypatch, abspath): assert len(out) == 2 assert out[0] == "Only in %s: %s" % (archive_ref_path, pd) assert out[1] == "Only in %s: %s" % (archive_ref_path, pd / "rnd_z.dat") - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", "--skip-dir-content", str(archive_ref_path), str(archive_path)] callscript("archive-tool.py", args, returncode=102, stdout=f) @@ -281,7 +281,7 @@ def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_b = Path(archive_name(ext="bz2", tags=[testname, "b", flag])) Archive().create(archive_b, "bz2", incl_b, excludes=excl_b) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_a), str(archive_b)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) @@ -293,7 +293,7 @@ def test_diff_orphan_dir_content(test_data, testname, monkeypatch, abspath): % (archive_a, pm, archive_b, pm)) assert out[3] == "Only in %s: %s" % (archive_b, pd / "zz") assert out[4] == "Only in %s: %s" % (archive_b, pd / "zz" / "rnd_z.dat") - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", "--skip-dir-content", str(archive_a), str(archive_b)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) @@ -317,7 +317,7 @@ def test_diff_extrafile_end(test_data, testname, monkeypatch, abspath): flag = absflag(abspath) archive_path = Path(archive_name(ext="bz2", tags=[testname, flag])) Archive().create(archive_path, "bz2", [base_dir]) - with TemporaryFile(mode="w+t", dir=str(test_data)) as f: + with TemporaryFile(mode="w+t", dir=test_data) as f: args = ["diff", str(archive_path), str(archive_ref_path)] callscript("archive-tool.py", args, returncode=102, stdout=f) f.seek(0) diff --git a/tests/test_04_cli_error.py b/tests/test_04_cli_error.py index 00799d3..c2c9421 100644 --- a/tests/test_04_cli_error.py +++ b/tests/test_04_cli_error.py @@ -28,7 +28,7 @@ def test_dir(tmpdir): def test_cli_helpmessage(test_dir, monkeypatch): monkeypatch.chdir(test_dir) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["-h"] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -37,7 +37,7 @@ def test_cli_helpmessage(test_dir, monkeypatch): def test_cli_missing_command(test_dir, monkeypatch): monkeypatch.chdir(test_dir) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = [] callscript("archive-tool.py", args, returncode=2, stderr=f) f.seek(0) @@ -51,7 +51,7 @@ def test_cli_missing_command(test_dir, monkeypatch): def test_cli_bogus_command(test_dir, monkeypatch): monkeypatch.chdir(test_dir) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["bogus_cmd"] callscript("archive-tool.py", args, returncode=2, stderr=f) f.seek(0) @@ -66,7 +66,7 @@ def test_cli_bogus_command(test_dir, monkeypatch): def test_cli_create_bogus_compression(test_dir, testname, monkeypatch): monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["create", "--compression=bogus_comp", name, "base"] callscript("archive-tool.py", args, returncode=2, stderr=f) f.seek(0) @@ -83,7 +83,7 @@ def test_cli_ls_bogus_format(test_dir, testname, monkeypatch): name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["ls", "--format=bogus_fmt", name] callscript("archive-tool.py", args, returncode=2, stderr=f) f.seek(0) @@ -98,7 +98,7 @@ def test_cli_ls_bogus_format(test_dir, testname, monkeypatch): def test_cli_create_normalized_path(test_dir, testname, monkeypatch): monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["create", name, "base/empty/.."] callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) @@ -108,7 +108,7 @@ def test_cli_create_normalized_path(test_dir, testname, monkeypatch): def test_cli_create_rel_start_basedir(test_dir, testname, monkeypatch): monkeypatch.chdir(test_dir) name = archive_name(tags=[testname]) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["create", "--basedir=base/data", name, "base/msg.txt"] callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) @@ -118,7 +118,7 @@ def test_cli_create_rel_start_basedir(test_dir, testname, monkeypatch): def test_cli_ls_archive_not_found(test_dir, monkeypatch): monkeypatch.chdir(test_dir) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["ls", "bogus.tar"] callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) @@ -130,7 +130,7 @@ def test_cli_ls_checksum_invalid_hash(test_dir, testname, monkeypatch): name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["ls", "--format=checksum", "--checksum=bogus", name] callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) @@ -142,7 +142,7 @@ def test_cli_info_missing_entry(test_dir, testname, monkeypatch): name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["info", name, "base/data/not-present"] callscript("archive-tool.py", args, returncode=1, stderr=f) f.seek(0) @@ -154,7 +154,7 @@ def test_cli_integrity_no_manifest(test_dir, testname, monkeypatch): name = archive_name(tags=[testname]) with tarfile.open(name, "w") as tarf: tarf.add("base", recursive=True) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["ls", name] callscript("archive-tool.py", args, returncode=3, stderr=f) f.seek(0) @@ -181,7 +181,7 @@ def test_cli_integrity_missing_file(test_dir, testname, monkeypatch): manifest_info.mode = stat.S_IFREG | 0o444 tarf.addfile(manifest_info, f) tarf.add("base") - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["verify", name] callscript("archive-tool.py", args, returncode=3, stderr=f) f.seek(0) @@ -193,7 +193,7 @@ def test_cli_check_stdin_and_files(test_dir, testname, monkeypatch): name = archive_name(tags=[testname]) args = ["create", name, "base"] callscript("archive-tool.py", args) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["check", "--stdin", name, "base"] callscript("archive-tool.py", args, returncode=2, stderr=f) f.seek(0) diff --git a/tests/test_04_cli_find.py b/tests/test_04_cli_find.py index 7bb3d21..e1128d1 100644 --- a/tests/test_04_cli_find.py +++ b/tests/test_04_cli_find.py @@ -68,7 +68,7 @@ def test_find_all(test_dir, abspath): Expect the call to list all entries from the archives. """ archives = archive_paths(test_dir, abspath) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find"] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -88,7 +88,7 @@ def test_find_bytype(test_dir, abspath, type): """Call archive-tool to find entries by type. """ archives = archive_paths(test_dir, abspath) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find", "--type", type] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -111,7 +111,7 @@ def test_find_byname_exact(test_dir, abspath): """Call archive-tool to find entries by exact name. """ archives = archive_paths(test_dir, abspath) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find", "--name", "rnd.dat"] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -135,7 +135,7 @@ def test_find_byname_wildcard(test_dir, pattern, abspath): """Call archive-tool to find entries with matching name. """ archives = archive_paths(test_dir, abspath) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find", "--name", pattern] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -173,7 +173,7 @@ def matches(direct, timestamp, entry): elif direct == '-': return entry.mtime is None or entry.mtime > timestamp archives = archive_paths(test_dir, False) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find", "--mtime=%s" % mtime] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -204,7 +204,7 @@ def matches(direct, timestamp, entry): elif direct == '>': return entry.mtime is None or entry.mtime > timestamp archives = archive_paths(test_dir, False) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find", "--mtime=%s" % mtime] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) @@ -238,7 +238,7 @@ def matches(direct, timestamp, entry): elif direct == '>': return entry.mtime is None or entry.mtime > timestamp archives = archive_paths(test_dir, False) - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["find", "--mtime=%s" % mtime] + [str(p) for p in archives] callscript("archive-tool.py", args, stdout=f) f.seek(0) diff --git a/tests/test_04_cli_warn.py b/tests/test_04_cli_warn.py index 1a547d6..511fb29 100644 --- a/tests/test_04_cli_warn.py +++ b/tests/test_04_cli_warn.py @@ -49,7 +49,7 @@ def test_cli_warn_ignore_socket(test_dir, testname, monkeypatch): basedir = Path("base") fp = basedir / "socket" with tmp_socket(fp): - with TemporaryFile(mode="w+t", dir=str(test_dir)) as f: + with TemporaryFile(mode="w+t", dir=test_dir) as f: args = ["create", name, "base"] callscript("archive-tool.py", args, stderr=f) f.seek(0) From 82179e881ea9860c959715077e586537dc55ded4 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 20:38:23 +0200 Subject: [PATCH 060/138] Remove needless imports --- scripts/imap-to-archive.py | 1 - tests/test_05_mailarchive_create.py | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/imap-to-archive.py b/scripts/imap-to-archive.py index ab422ee..b586091 100644 --- a/scripts/imap-to-archive.py +++ b/scripts/imap-to-archive.py @@ -12,7 +12,6 @@ import sys from imapclient import IMAPClient from archive.mailarchive import MailArchive -from archive.tools import now_str logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") logging.getLogger('imapclient').setLevel(logging.WARNING) diff --git a/tests/test_05_mailarchive_create.py b/tests/test_05_mailarchive_create.py index 47d6c7c..01cf4c1 100644 --- a/tests/test_05_mailarchive_create.py +++ b/tests/test_05_mailarchive_create.py @@ -8,7 +8,6 @@ import yaml from archive import Archive from archive.mailarchive import MailIndex, MailArchive -from archive.tools import now_str from conftest import gettestdata testdata = gettestdata("mails.tar.gz") From 327410c8cafc4121c4e94002c99cb958b1fb73b3 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 20:43:26 +0200 Subject: [PATCH 061/138] Update changelog --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9e7da1a..2456be1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,6 +23,8 @@ New features Incompatible changes -------------------- ++ `#60`_: Drop support for Python 3.4 and 3.5. + + The `comment` keyword argument to :class:`MailArchive` has been dropped, ref. `#51`_. @@ -55,6 +57,7 @@ Bug fixes and minor changes .. _#56: https://github.com/RKrahl/archive-tools/issues/56 .. _#57: https://github.com/RKrahl/archive-tools/pull/57 .. _#58: https://github.com/RKrahl/archive-tools/pull/58 +.. _#60: https://github.com/RKrahl/archive-tools/pull/60 0.5.1 (2020-12-12) From 582f4ffb4410b00d2aacabe542dd50b08eee14aa Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Wed, 19 May 2021 21:05:28 +0200 Subject: [PATCH 062/138] Update changelog --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 2456be1..3ed1d55 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -31,6 +31,9 @@ Incompatible changes Bug fixes and minor changes --------------------------- ++ `#59`_: Change :attr:`Archive.path` to the absolute path of the + archive. + + `#57`_: Do not take the paths relative to the base directory in the `archive-tool diff` command. @@ -57,6 +60,7 @@ Bug fixes and minor changes .. _#56: https://github.com/RKrahl/archive-tools/issues/56 .. _#57: https://github.com/RKrahl/archive-tools/pull/57 .. _#58: https://github.com/RKrahl/archive-tools/pull/58 +.. _#59: https://github.com/RKrahl/archive-tools/pull/59 .. _#60: https://github.com/RKrahl/archive-tools/pull/60 From ef33db66f5f3d738d331f8bd390de46c8990010e Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 20 May 2021 15:05:08 +0200 Subject: [PATCH 063/138] - Add function date_str_rfc5322() - parse_date() now also accepts date strings as returned by datetime.isoformat(). --- archive/tools.py | 58 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 8 deletions(-) diff --git a/archive/tools.py b/archive/tools.py index bf7af65..d743878 100644 --- a/archive/tools.py +++ b/archive/tools.py @@ -20,6 +20,34 @@ _dateutil_parse = None +if hasattr(datetime.datetime, 'fromisoformat'): + # Python 3.7 and newer + _dt_fromisoformat = datetime.datetime.fromisoformat +else: + # Python 3.6 + import re + _dt_isofmt_re = re.compile(r'''^ + (?P\d{4})-(?P\d{2})-(?P
\d{2}) # date + . # separator (any character) + (?P\d{2}):(?P\d{2}):(?P\d{2}) # time + (?:(?P[+-]\d{2}):(?P\d{2}))? # time zone (optional) + $''', re.X) + def _dt_fromisoformat(date_string): + m = _dt_isofmt_re.match(date_string) + if m: + dt = [int(i) for i in m.group('dy', 'dm', 'dd', 'th', 'tm', 'ts')] + if m.group('zh'): + zh = int(m.group('zh')) + zm = int(m.group('zm')) + offs = datetime.timedelta(hours=zh, minutes=zm) + tz = datetime.timezone(offs) + else: + tz = None + return datetime.datetime(*dt, tzinfo=tz) + else: + raise ValueError("Invalid isoformat string: '%s'" % date_string) + + class tmp_chdir(): """A context manager to temporarily change directory. """ @@ -57,30 +85,44 @@ def __del__(self): self._restore_mask() +def date_str_rfc5322(dt): + """Return a RFC 5322 string representation of a datetime. + """ + return dt.strftime("%a, %d %b %Y %H:%M:%S %z").strip() + + def now_str(): """Return the current local date and time as a string. """ if gettz: now = datetime.datetime.now(tz=gettz()) - date_fmt = "%a, %d %b %Y %H:%M:%S %z" else: now = datetime.datetime.now() - date_fmt = "%a, %d %b %Y %H:%M:%S" - return now.strftime(date_fmt) + return date_str_rfc5322(now) def parse_date(date_string): - """Parse a date string as returned from now_str() into a datetime object. + """Parse a date string into a datetime object. + + The function accepts strings as returned by datetime.isoformat() + and date_str_rfc5322(). """ if _dateutil_parse: return _dateutil_parse(date_string) else: try: - date_fmt = "%a, %d %b %Y %H:%M:%S %z" - return datetime.datetime.strptime(date_string, date_fmt) + return _dt_fromisoformat(date_string) except ValueError: - date_fmt = "%a, %d %b %Y %H:%M:%S" - return datetime.datetime.strptime(date_string, date_fmt) + try: + date_fmt = "%a, %d %b %Y %H:%M:%S %z" + return datetime.datetime.strptime(date_string, date_fmt) + except ValueError: + try: + date_fmt = "%a, %d %b %Y %H:%M:%S" + return datetime.datetime.strptime(date_string, date_fmt) + except ValueError: + raise ValueError("Invalid date string: '%s'" + % date_string) from None def checksum(fileobj, hashalg): From ece1b06cce593c4ed221da953c8ea1b126584bce Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 20 May 2021 16:18:37 +0200 Subject: [PATCH 064/138] Add tests for the date funcions in archive.tools --- tests/test_01_dates.py | 130 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 tests/test_01_dates.py diff --git a/tests/test_01_dates.py b/tests/test_01_dates.py new file mode 100644 index 0000000..6c98d16 --- /dev/null +++ b/tests/test_01_dates.py @@ -0,0 +1,130 @@ +"""Test helper functions dealing with dates in the archive.tools module. +""" + +import datetime +import pytest +import archive.tools + +tz_utc = datetime.timezone.utc +tz_cest = datetime.timezone(datetime.timedelta(hours=2)) +tz_mst = datetime.timezone(datetime.timedelta(hours=-7)) +testdates = [ + { + 'dt': datetime.datetime(2021, 5, 1, 15, 21, 5), + 'str_iso': "2021-05-01T15:21:05", + 'str_iso_sp': "2021-05-01 15:21:05", + 'str_rfc5322': "Sat, 01 May 2021 15:21:05", + }, + { + 'dt': datetime.datetime(1967, 7, 12, 4, 30, 21, tzinfo=tz_utc), + 'str_iso': "1967-07-12T04:30:21+00:00", + 'str_iso_sp': "1967-07-12 04:30:21+00:00", + 'str_rfc5322': "Wed, 12 Jul 1967 04:30:21 +0000", + }, + { + 'dt': datetime.datetime(2021, 5, 20, 15, 21, 5, tzinfo=tz_cest), + 'str_iso': "2021-05-20T15:21:05+02:00", + 'str_iso_sp': "2021-05-20 15:21:05+02:00", + 'str_rfc5322': "Thu, 20 May 2021 15:21:05 +0200", + }, + { + 'dt': datetime.datetime(2019, 12, 6, 4, 27, 58, tzinfo=tz_mst), + 'str_iso': "2019-12-06T04:27:58-07:00", + 'str_iso_sp': "2019-12-06 04:27:58-07:00", + 'str_rfc5322': "Fri, 06 Dec 2019 04:27:58 -0700", + }, +] + + +@pytest.mark.parametrize("date", testdates) +@pytest.mark.skipif(archive.tools._dateutil_parse is None, + reason="Need dateutil.parser") +def test_date_str_rfc5322_dateutil(date): + """Test date_str_rfc5322() and conversion back with parse_date() in + the case that dateutil.parser is available. + """ + date_string = archive.tools.date_str_rfc5322(date['dt']) + assert date_string == date['str_rfc5322'] + dt = archive.tools.parse_date(date_string) + assert dt == date['dt'] + +@pytest.mark.parametrize("date", testdates) +def test_date_str_rfc5322_no_dateutil(monkeypatch, date): + """Test date_str_rfc5322() and conversion back with parse_date() in + the case that dateutil.parser is not available. + """ + monkeypatch.setattr(archive.tools, "_dateutil_parse", None) + date_string = archive.tools.date_str_rfc5322(date['dt']) + assert date_string == date['str_rfc5322'] + dt = archive.tools.parse_date(date_string) + assert dt == date['dt'] + +@pytest.mark.parametrize("date", testdates) +@pytest.mark.skipif(archive.tools._dateutil_parse is None, + reason="Need dateutil.parser") +def test_date_str_iso_dateutil(date): + """Test parse_date() with ISO 8601 dates in + the case that dateutil.parser is available. + """ + date_string = date['dt'].isoformat() + assert date_string == date['str_iso'] + dt = archive.tools.parse_date(date_string) + assert dt == date['dt'] + +@pytest.mark.parametrize("date", testdates) +def test_date_str_iso_no_dateutil(monkeypatch, date): + """Test parse_date() with ISO 8601 dates in + the case that dateutil.parser is not available. + """ + monkeypatch.setattr(archive.tools, "_dateutil_parse", None) + date_string = date['dt'].isoformat() + assert date_string == date['str_iso'] + dt = archive.tools.parse_date(date_string) + assert dt == date['dt'] + +@pytest.mark.parametrize("date", testdates) +@pytest.mark.skipif(archive.tools._dateutil_parse is None, + reason="Need dateutil.parser") +def test_date_str_iso_blanksep_dateutil(date): + """Test parse_date() with ISO 8601 dates using a space as separator in + the case that dateutil.parser is available. + """ + date_string = date['dt'].isoformat(sep=' ') + assert date_string == date['str_iso_sp'] + dt = archive.tools.parse_date(date_string) + assert dt == date['dt'] + +@pytest.mark.parametrize("date", testdates) +def test_date_str_iso_no_blanksep_dateutil(monkeypatch, date): + """Test parse_date() with ISO 8601 dates using a space as separator in + the case that dateutil.parser is not available. + """ + monkeypatch.setattr(archive.tools, "_dateutil_parse", None) + date_string = date['dt'].isoformat(sep=' ') + assert date_string == date['str_iso_sp'] + dt = archive.tools.parse_date(date_string) + assert dt == date['dt'] + +@pytest.mark.skipif(archive.tools.gettz is None, reason="Need dateutil.tz") +def test_now_str_dateutil(): + """Test now_str() in the case that dateutil.tz is available. + """ + date_string = archive.tools.now_str() + # It doesn't make much sense to inspect the result as it depends + # on the current date and time. Just check that parse_date() can + # make sense of it and returns a datetime that includes a time + # zone. + dt = archive.tools.parse_date(date_string) + assert dt.tzinfo is not None + +def test_now_str_no_dateutil(monkeypatch): + """Test now_str() in the case that dateutil.tz is not available. + """ + monkeypatch.setattr(archive.tools, "gettz", None) + date_string = archive.tools.now_str() + # It doesn't make much sense to inspect the result as it depends + # on the current date and time. Just check that parse_date() can + # make sense of it and returns a datetime that does not include a + # time zone. + dt = archive.tools.parse_date(date_string) + assert dt.tzinfo is None From 4753c73902cf485f41eff97ff09d48ec6ee0c17f Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 20 May 2021 16:32:51 +0200 Subject: [PATCH 065/138] Update changelog --- CHANGES.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 3ed1d55..dc4069f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -40,6 +40,13 @@ Bug fixes and minor changes + `#58`_: Weaken the condition introduced in `#9`_ that basedir must be a directory. ++ `#61`_: Review date helper functions in :mod:`archive.tools` + + - Add :func:`date_str_rfc5322`. + + - :func:`parse_date` now also accepts date strings as returned by + :meth:`datetime.datetime.isoformat`. + + `#53`_, `#54`_: Spurious :exc:`FileNotFoundError` from :meth:`Archive.create` when passing a relative path as `workdir` argument. @@ -62,6 +69,7 @@ Bug fixes and minor changes .. _#58: https://github.com/RKrahl/archive-tools/pull/58 .. _#59: https://github.com/RKrahl/archive-tools/pull/59 .. _#60: https://github.com/RKrahl/archive-tools/pull/60 +.. _#61: https://github.com/RKrahl/archive-tools/pull/61 0.5.1 (2020-12-12) From d06dbb7bd85f573feebc1058564a86cea1f78d88 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 20 May 2021 18:01:21 +0200 Subject: [PATCH 066/138] Start an archive.index module --- archive/index.py | 140 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 archive/index.py diff --git a/archive/index.py b/archive/index.py new file mode 100644 index 0000000..11521a1 --- /dev/null +++ b/archive/index.py @@ -0,0 +1,140 @@ +"""Provide the ArchiveIndex class that represents an index of archives. +""" + +from collections.abc import Mapping, Sequence +from distutils.version import StrictVersion +from pathlib import Path +import yaml +from archive.archive import Archive +from archive.tools import parse_date + + +class IndexItem: + + def __init__(self, data=None, archive=None): + if data is not None: + self.date = parse_date(data['date']) + self.path = Path(data['path']) + self.host = data.get('host') + self.policy = data.get('policy') + self.user = data.get('user') + self.schedule = data.get('schedule') + elif archive is not None: + self.date = parse_date(archive.manifest.head['Date']) + self.path = archive.path + tagmap = dict() + try: + tags = archive.manifest.head['Tags'] + except KeyError: + pass + else: + for t in tags: + try: + k, v = t.split(':') + except ValueError: + continue + tagmap[k] = v + self.host = tagmap.get('host') + self.policy = tagmap.get('policy') + self.user = tagmap.get('user') + self.schedule = tagmap.get('schedule') + else: + raise TypeError("Either data or archive must be provided") + + def as_dict(self): + """Return a dictionary representation of this objects. + """ + d = { + 'date': self.date.isoformat(sep=' '), + 'path': str(self.path), + } + for k in ('host', 'policy', 'user', 'schedule'): + v = getattr(self, k, None) + if v: + d[k] = v + return d + + def __ge__(self, other): + """self >= other + + Only implemented if other is a mapping. In this case, return + True if all key value pair in other are also set in self, + False otherwise. + """ + if isinstance(other, Mapping): + d = self.as_dict() + for k, v in other.items(): + try: + if d[k] != v: + return False + except KeyError: + return False + else: + return True + else: + return NotImplemented + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self.as_dict()) + + +class ArchiveIndex(Sequence): + + Version = "1.0" + + def __init__(self, fileobj=None): + if fileobj is not None: + docs = yaml.safe_load_all(fileobj) + self.head = next(docs) + self.items = [ IndexItem(data=d) for d in next(docs) ] + else: + self.head = { + "Version": self.Version, + } + self.items = [] + + def __len__(self): + return len(self.items) + + def __getitem__(self, index): + return self.items.__getitem__(index) + + def append(self, i): + self.items.append(i) + + @property + def version(self): + return StrictVersion(self.head["Version"]) + + def find(self, path): + for i in self: + if i.path == path: + return i + else: + return None + + def write(self, fileobj): + fileobj.write("%YAML 1.1\n".encode("ascii")) + yaml.dump(self.head, stream=fileobj, encoding="ascii", + default_flow_style=False, explicit_start=True) + yaml.dump([ i.as_dict() for i in self ], + stream=fileobj, encoding="ascii", + default_flow_style=False, explicit_start=True) + + def add_archives(self, paths, prune=False): + seen = set() + for p in paths: + p = p.resolve() + seen.add(p) + if self.find(p): + continue + with Archive().open(p) as archive: + self.append(IndexItem(archive=archive)) + if prune: + items = [ i for i in self if i.path in seen ] + self.items = items + + def sort(self, *, key=None, reverse=False): + if key is None: + key = lambda i: i.date + self.items.sort(key=key, reverse=reverse) From d0eaf5d8e5ca1ba6e1912dd57d06e1b8ece3c227 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 20 May 2021 21:59:08 +0200 Subject: [PATCH 067/138] Copy the date and a few tags from input to the output archive --- extra/makeincr.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/extra/makeincr.py b/extra/makeincr.py index 3f06a5e..7dc0d47 100755 --- a/extra/makeincr.py +++ b/extra/makeincr.py @@ -33,6 +33,21 @@ def __init__(self, inp_arch): self.inp_arch = inp_arch super().__init__() + def _create(self, mode): + self.manifest.head['Date'] = self.inp_arch.manifest.head['Date'] + tags = [] + for t in self.inp_arch.manifest.tags: + try: + k, v = t.split(':') + except ValueError: + continue + else: + if k in ('host', 'policy', 'user'): + tags.append(t) + if tags: + self.manifest.head['Tags'] = tags + super()._create(mode) + def _add_item(self, tarf, fi, arcname): inp_tarf = self.inp_arch._file inp_arcname = self.inp_arch._arcname(fi.path) From f86cded0dc4d5a26533d66750f69fff1dc5db87f Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 21 May 2021 11:39:39 +0200 Subject: [PATCH 068/138] Make compression argument to Archive.create() optional --- CHANGES.rst | 4 ++++ archive/archive.py | 17 ++++++++++++++++- archive/cli/create.py | 14 -------------- extra/makeincr.py | 16 +--------------- 4 files changed, 21 insertions(+), 30 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index dc4069f..9100714 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -47,6 +47,10 @@ Bug fixes and minor changes - :func:`parse_date` now also accepts date strings as returned by :meth:`datetime.datetime.isoformat`. ++ Make `compression` keyword argument to :meth:`Archive.create` + optional. The default will be derived from the suffixes of the + `path` argument. + + `#53`_, `#54`_: Spurious :exc:`FileNotFoundError` from :meth:`Archive.create` when passing a relative path as `workdir` argument. diff --git a/archive/archive.py b/archive/archive.py index c2e63d4..2365d49 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -52,6 +52,15 @@ def set_path(self, basedir): self.path = basedir / self.name +compression_map = { + '.tar': '', + '.tar.gz': 'gz', + '.tar.bz2': 'bz2', + '.tar.xz': 'xz', +} +"""Map path suffix to compression mode.""" + + class Archive: def __init__(self): @@ -63,9 +72,15 @@ def __init__(self): self._dedup = None self._dupindex = None - def create(self, path, compression, paths=None, fileinfos=None, + def create(self, path, compression=None, paths=None, fileinfos=None, basedir=None, workdir=None, excludes=None, dedup=DedupMode.LINK, tags=None): + if compression is None: + try: + compression = compression_map["".join(path.suffixes)] + except KeyError: + # Last ressort default + compression = 'gz' mode = 'x:' + compression save_wd = None try: diff --git a/archive/cli/create.py b/archive/cli/create.py index bf5fc4d..7e65f38 100644 --- a/archive/cli/create.py +++ b/archive/cli/create.py @@ -5,21 +5,7 @@ from archive.archive import Archive, DedupMode -suffix_map = { - '.tar': 'none', - '.tar.gz': 'gz', - '.tar.bz2': 'bz2', - '.tar.xz': 'xz', -} -"""Map path suffix to compression mode.""" - def create(args): - if args.compression is None: - try: - args.compression = suffix_map["".join(args.archive.suffixes)] - except KeyError: - # Last ressort default - args.compression = 'gz' if args.compression == 'none': args.compression = '' archive = Archive().create(args.archive, args.compression, args.files, diff --git a/extra/makeincr.py b/extra/makeincr.py index 7dc0d47..83cdea5 100755 --- a/extra/makeincr.py +++ b/extra/makeincr.py @@ -13,15 +13,6 @@ from archive.manifest import DiffStatus, _common_checksum, diff_manifest -suffix_map = { - '.tar': '', - '.tar.gz': 'gz', - '.tar.bz2': 'bz2', - '.tar.xz': 'xz', -} -"""Map path suffix to compression mode.""" - - class CopyArchive(Archive): """Read items from a TarFile. @@ -103,12 +94,7 @@ def main(): with Archive().open(p) as base: fileinfos = filter_fileinfos(base.manifest, fileinfos, algorithm) - try: - compression = suffix_map["".join(args.output.suffixes)] - except KeyError: - compression = 'gz' - archive = CopyArchive(inp_archive).create(args.output, compression, - fileinfos=fileinfos) + archive = CopyArchive(inp_archive).create(args.output, fileinfos=fileinfos) if __name__ == "__main__": From 57d8fa4c09b9561ecc134d639bbf90d99c9333c4 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 21 May 2021 13:55:17 +0200 Subject: [PATCH 069/138] Review the design of class Config: Config is now derived from ChainMap rather then having a ChainMap as attribute --- archive/config.py | 16 ++++++++-------- scripts/imap-to-archive.py | 22 ++++++++++------------ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/archive/config.py b/archive/config.py index 365a5c0..01e0137 100644 --- a/archive/config.py +++ b/archive/config.py @@ -11,7 +11,7 @@ import configparser from archive.exception import ConfigError -class Config: +class Config(ChainMap): defaults = dict() config_file = None @@ -21,7 +21,7 @@ def __init__(self, args, config_section=None): args_cfg = { k:vars(args)[k] for k in self.args_options if vars(args)[k] is not None } - self.config = ChainMap({}, args_cfg) + super().__init__({}, args_cfg) if self.config_file and config_section: cp = configparser.ConfigParser(comment_prefixes=('#', '!'), interpolation=None) @@ -31,20 +31,20 @@ def __init__(self, args, config_section=None): self.config_section = [] for section in config_section: try: - self.config.maps.append(cp[section]) + self.maps.append(cp[section]) self.config_section.append(section) except KeyError: pass - self.config.maps.append(self.defaults) + self.maps.append(self.defaults) - def get(self, option, required=False, subst=True, split=False): - value = self.config[option] + def get(self, key, required=False, subst=True, split=False): + value = super().get(key) if value is None: if required: - raise ConfigError("%s not specified" % option) + raise ConfigError("%s not specified" % key) else: if subst: - value = value % self.config + value = value % self if split: value = value.split() return value diff --git a/scripts/imap-to-archive.py b/scripts/imap-to-archive.py index 46420c0..368b93f 100644 --- a/scripts/imap-to-archive.py +++ b/scripts/imap-to-archive.py @@ -41,18 +41,17 @@ def __init__(self, args): if not self.config_section: raise ConfigError("configuration section %s not found" % args.config_section) - if self.config['security'] not in security_methods: - raise ConfigError("invalid security method '%s'" - % self.config['security']) - if not self.config['host']: + if self['security'] not in security_methods: + raise ConfigError("invalid security method '%s'" % self['security']) + if not self['host']: raise ConfigError("IMAP4 host name not specified") - if self.config['port'] is not None: - self.config['port'] = int(config['port']) - self.config['ssl'] = self.config['security'] == 'imaps' - if not self.config['user']: + if self['port'] is not None: + self['port'] = int(config['port']) + self['ssl'] = self['security'] == 'imaps' + if not self['user']: raise ConfigError("IMAP4 user name not specified") - if self.config['pass'] is None: - self.config['pass'] = getpass.getpass() + if self['pass'] is None: + self['pass'] = getpass.getpass() argparser = argparse.ArgumentParser(add_help=False) @@ -81,7 +80,7 @@ def __init__(self, args): logging.getLogger().setLevel(logging.DEBUG) try: - config = Config(args).config + config = Config(args) except ConfigError as e: print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) sys.exit(2) @@ -120,4 +119,3 @@ def getmsgs(imap, basedir): log.debug("Login to %s successful", config['host']) archive = MailArchive() archive.create(archive_path, getmsgs(imap, "INBOX"), server=config['host']) - From 7204b336a592899c3b3ad9982cd250ed0a647321 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 21 May 2021 12:26:11 +0200 Subject: [PATCH 070/138] Add optional type conversion in Config.get() --- archive/config.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/archive/config.py b/archive/config.py index 01e0137..d711316 100644 --- a/archive/config.py +++ b/archive/config.py @@ -37,7 +37,7 @@ def __init__(self, args, config_section=None): pass self.maps.append(self.defaults) - def get(self, key, required=False, subst=True, split=False): + def get(self, key, required=False, subst=True, split=False, type=None): value = super().get(key) if value is None: if required: @@ -46,5 +46,11 @@ def get(self, key, required=False, subst=True, split=False): if subst: value = value % self if split: - value = value.split() + if type: + value = [type(v) for v in value.split()] + else: + value = value.split() + else: + if type: + value = type(value) return value From 606a782754fb0328f690f87c3cc017aff1cd0ad8 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 20 May 2021 21:39:48 +0200 Subject: [PATCH 071/138] Add a comment with an implementation note on the schedule --- scripts/backup-tool.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/backup-tool.py b/scripts/backup-tool.py index 5f9a1e0..e29c165 100644 --- a/scripts/backup-tool.py +++ b/scripts/backup-tool.py @@ -17,6 +17,15 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") log = logging.getLogger(__name__) + +# Note: in the long run, we want to select the schedule (e.g. set the +# conditions, when to choose which schedule) in the configuration +# file, and even put the definition and semantics (e.g. which +# schedules exist and what do they mean) there. But this seem to be +# most tricky part of the whole project. We want to get the basics +# working first. So for the moment, we hard code definition and +# semantics here and select the schedule as a command line argument. + schedules = {'full', 'cumu', 'incr'} def get_config_file(): try: From c28cec87213e6b83eb163bc263378244ae5b9631 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 21 May 2021 15:24:40 +0200 Subject: [PATCH 072/138] Review of configuration --- etc/backup.cfg | 6 ++-- scripts/backup-tool.py | 65 +++++++++++++++++++++++++++++++----------- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/etc/backup.cfg b/etc/backup.cfg index f95ffbb..0ba5a91 100644 --- a/etc/backup.cfg +++ b/etc/backup.cfg @@ -15,7 +15,7 @@ ! /usr/local ! excludes = ! /root/.cache -! targetdir = /proj/backup/auto +! backupdir = /proj/backup/auto # The special policy user is used when the --user command line option is used. [user] @@ -25,11 +25,11 @@ ! %(home)s/.cache ! %(home)s/.thumbnails ! %(home)s/tmp -! targetdir = /proj/backup/auto +! backupdir = /proj/backup/auto # Override settings on a particular host ! [db-host] -! targetdir = /proj/db +! backupdir = /proj/db ! ! [db-host/sys] ! dirs = diff --git a/scripts/backup-tool.py b/scripts/backup-tool.py index e29c165..9f56078 100644 --- a/scripts/backup-tool.py +++ b/scripts/backup-tool.py @@ -38,34 +38,67 @@ class Config(archive.config.Config): defaults = { 'dirs': None, 'excludes': "", - 'targetdir': None, - 'backupdir': "%(targetdir)s", + 'backupdir': None, + 'targetdir': "%(backupdir)s", 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", - 'tags': "", } - config_file = get_config_file() args_options = ('policy', 'user', 'schedule') def __init__(self, args): host = socket.gethostname() + config_file = get_config_file() sections = ("%s/%s" % (host, args.policy), host, args.policy) + self.config_file = config_file super().__init__(args, config_section=sections) if not self.config_file: - raise ConfigError("configuration file %s not found" - % self.config_file) - self.config['host'] = host - self.config['date'] = datetime.date.today().strftime("%y%m%d") + raise ConfigError("configuration file %s not found" % config_file) + self['host'] = host + self['date'] = datetime.date.today().strftime("%y%m%d") if args.user: try: - self.config['home'] = pwd.getpwnam(self.config['user']).pw_dir + self['home'] = pwd.getpwnam(args.user).pw_dir except KeyError: pass - self.config['name'] = self.get('name', required=True) - self.config['dirs'] = self.get('dirs', required=True, split=True) - self.config['excludes'] = self.get('excludes', split=True) - self.config['targetdir'] = self.get('targetdir', required=True) - self.config['backupdir'] = self.get('backupdir') - self.config['tags'] = self.get('tags', split=True) + + @property + def host(self): + return self.get('host') + + @property + def policy(self): + return self.get('policy') + + @property + def user(self): + return self.get('user') + + @property + def schedule(self): + return self.get('schedule') + + @property + def name(self): + return self.get('name', required=True) + + @property + def dirs(self): + return self.get('dirs', required=True, split=True, type=Path) + + @property + def excludes(self): + return self.get('excludes', split=True, type=Path) + + @property + def backupdir(self): + return self.get('backupdir', required=True, type=Path) + + @property + def targetdir(self): + return self.get('targetdir', required=True, type=Path) + + @property + def path(self): + return self.targetdir / self.name argparser = argparse.ArgumentParser() clsgrp = argparser.add_mutually_exclusive_group() @@ -82,7 +115,7 @@ def __init__(self, args): args.policy = 'user' try: - config = Config(args).config + config = Config(args) except ConfigError as e: print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) sys.exit(2) From 8f613d1068af857de041ff134440549c4f70240d Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Fri, 21 May 2021 16:15:47 +0200 Subject: [PATCH 073/138] backup-tool: implement creation of the archive --- scripts/backup-tool.py | 75 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/scripts/backup-tool.py b/scripts/backup-tool.py index 9f56078..559cfd6 100644 --- a/scripts/backup-tool.py +++ b/scripts/backup-tool.py @@ -10,13 +10,16 @@ import pwd import socket import sys +from archive.archive import Archive import archive.config -from archive.exception import ConfigError -from archive.tools import now_str +from archive.exception import ConfigError, ArchiveCreateError +from archive.index import ArchiveIndex +from archive.manifest import Manifest, DiffStatus, diff_manifest logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") log = logging.getLogger(__name__) +os.umask(0o277) # Note: in the long run, we want to select the schedule (e.g. set the # conditions, when to choose which schedule) in the configuration @@ -26,6 +29,11 @@ # working first. So for the moment, we hard code definition and # semantics here and select the schedule as a command line argument. +# TODO: +# - consider add configuration options for dedup mode and for checksum +# algorithm. +# - consider adding more log messages and logging configuration. + schedules = {'full', 'cumu', 'incr'} def get_config_file(): try: @@ -100,6 +108,14 @@ def targetdir(self): def path(self): return self.targetdir / self.name + +def filter_fileinfos(base, fileinfos): + for stat, fi1, fi2 in diff_manifest(base, fileinfos): + if stat == DiffStatus.MISSING_B or stat == DiffStatus.MATCH: + continue + yield fi2 + + argparser = argparse.ArgumentParser() clsgrp = argparser.add_mutually_exclusive_group() clsgrp.add_argument('--policy', default='sys') @@ -119,3 +135,58 @@ def path(self): except ConfigError as e: print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) sys.exit(2) + +log.info("host:%s, policy:%s", config.host, config.policy) + +idx_file = config.backupdir / ".index.yaml" +if idx_file.is_file(): + log.debug("reading index file %s", str(idx_file)) + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) +else: + log.debug("index file not found") + idx = ArchiveIndex() +idx.sort() + +f_d = dict(host=config.host, policy=config.policy) +if config.policy == 'user': + f_d['user'] = config.user +last_full = None +last_cumu = None +last_incr = [] +for i in filter(lambda i: i >= f_d, idx): + if i.schedule == 'full': + last_full = i + last_cumu = None + last_incr = [] + elif i.schedule == 'cumu': + last_cumu = i + last_incr = [] + elif i.schedule == 'incr': + last_incr.append(i) + +fileinfos = Manifest(paths=config.dirs, excludes=config.excludes) +tags = [ + "host:%s" % config.host, + "policy:%s" % config.policy, + "schedule:%s" % config.schedule, +] +if config.user: + tags.append("user:%s" % config.user) + +if config.schedule != 'full': + if not last_full: + raise ArchiveCreateError("No previous full backup found, " + "can not create %s archive" % config.schedule) + base_archives = [last_full.path] + if config.schedule == 'incr': + if last_cumu: + base_archives.append(last_cumu.path) + base_archives.extend([i.path for i in last_incr]) + for p in base_archives: + log.debug("considering %s to create differential archive", p) + with Archive().open(p) as base: + fileinfos = filter_fileinfos(base.manifest, fileinfos) + +log.debug("creating archive %s", config.path) +archive = Archive().create(config.path, fileinfos=fileinfos, tags=tags) From 91d2973dbf04815cdcf7af29be85bd83bb4bd18d Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 25 May 2021 21:21:41 +0200 Subject: [PATCH 074/138] Add a test for Issue #62 --- tests/test_03_create_misc.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_03_create_misc.py b/tests/test_03_create_misc.py index ba01bc6..ac66796 100644 --- a/tests/test_03_create_misc.py +++ b/tests/test_03_create_misc.py @@ -2,9 +2,11 @@ """ from pathlib import Path +import sys from tempfile import TemporaryFile import pytest from archive import Archive +from archive.exception import ArchiveIntegrityError from conftest import * @@ -17,6 +19,15 @@ DataFile(Path("base", "data", "misc", "rnd.dat"), 0o644), DataSymLink(Path("base", "data", "s.dat"), Path("misc", "rnd.dat")), ] +# Test data having a directory with a long path name, needed in +# test_create_long_directory_name() +long_dir_path = Path("lets_start_with_a_somewhat_long_directory_name_" + "because_we_need_a_very_long_overall_path") +testdata_long_dir = [ + DataDir(long_dir_path, 0o755), + DataDir(long_dir_path / "sub-1", 0o755), + DataDir(long_dir_path / "sub-directory-2", 0o755), +] @pytest.fixture(scope="module") def test_dir(tmpdir): @@ -113,3 +124,18 @@ def test_create_tags(test_dir, monkeypatch, tags, expected): Archive().create(archive_path, "", [Path("base")], tags=tags) with Archive().open(archive_path) as archive: assert archive.manifest.tags == expected + +@pytest.mark.xfail(sys.version_info < (3, 8), + reason="Issue #62", raises=ArchiveIntegrityError) +def test_create_long_directory_name(tmpdir, monkeypatch): + """An archive containing a directory with a long path name. + + Verification fails if the archive is created in the GNU tar format. + """ + setup_testdata(tmpdir, testdata_long_dir) + monkeypatch.chdir(tmpdir) + archive_path = Path("archive-longdir.tar") + Archive().create(archive_path, "", [long_dir_path]) + with Archive().open(archive_path) as archive: + check_manifest(archive.manifest, testdata_long_dir) + archive.verify() From f1dfd6d2dc17c0338945fd71a80b6527c6cedb00 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 25 May 2021 21:35:35 +0200 Subject: [PATCH 075/138] Explicitely select POSIX.1-2001 (pax) format in the tarfile. Fix #62. --- archive/archive.py | 2 +- tests/test_03_create_misc.py | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index 2365d49..f4e0947 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -117,7 +117,7 @@ def create(self, path, compression=None, paths=None, fileinfos=None, return self def _create(self, mode): - with tarfile.open(self.path, mode) as tarf: + with tarfile.open(self.path, mode, format=tarfile.PAX_FORMAT) as tarf: with tempfile.TemporaryFile() as tmpf: self.manifest.write(tmpf) tmpf.seek(0) diff --git a/tests/test_03_create_misc.py b/tests/test_03_create_misc.py index ac66796..46dd0eb 100644 --- a/tests/test_03_create_misc.py +++ b/tests/test_03_create_misc.py @@ -2,11 +2,9 @@ """ from pathlib import Path -import sys from tempfile import TemporaryFile import pytest from archive import Archive -from archive.exception import ArchiveIntegrityError from conftest import * @@ -125,8 +123,6 @@ def test_create_tags(test_dir, monkeypatch, tags, expected): with Archive().open(archive_path) as archive: assert archive.manifest.tags == expected -@pytest.mark.xfail(sys.version_info < (3, 8), - reason="Issue #62", raises=ArchiveIntegrityError) def test_create_long_directory_name(tmpdir, monkeypatch): """An archive containing a directory with a long path name. From 0d20852a3f386c4c170f496c580a0687d05c1091 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 25 May 2021 21:44:34 +0200 Subject: [PATCH 076/138] Update changelog --- CHANGES.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9100714..a9e27c5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,10 @@ New features + `#50`_, `#51`_: Add a header with some metadata to the index in a mail archive created by :class:`MailArchive`. ++ `#62`_, `#63`_: Explicitely select POSIX.1-2001 (pax) format in the + tarfile. This fixes failing verification if the archive contains a + directory with a long path name. + Incompatible changes -------------------- @@ -74,6 +78,8 @@ Bug fixes and minor changes .. _#59: https://github.com/RKrahl/archive-tools/pull/59 .. _#60: https://github.com/RKrahl/archive-tools/pull/60 .. _#61: https://github.com/RKrahl/archive-tools/pull/61 +.. _#62: https://github.com/RKrahl/archive-tools/issues/62 +.. _#63: https://github.com/RKrahl/archive-tools/pull/63 0.5.1 (2020-12-12) From b8befa079b9aa32a341ab664bd4db457dedb2b52 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 25 May 2021 22:16:43 +0200 Subject: [PATCH 077/138] Revert 3b4cba4: not needed any more after the fix of #62 --- extra/makeincr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/makeincr.py b/extra/makeincr.py index 83cdea5..6e9df47 100755 --- a/extra/makeincr.py +++ b/extra/makeincr.py @@ -42,7 +42,7 @@ def _create(self, mode): def _add_item(self, tarf, fi, arcname): inp_tarf = self.inp_arch._file inp_arcname = self.inp_arch._arcname(fi.path) - ti = inp_tarf._getmember(inp_arcname, normalize=True) + ti = inp_tarf.getmember(inp_arcname) if fi.is_file(): dup = self._check_duplicate(ti, arcname) if dup: From b11c904effa553a1c98142b295dcc84709995f33 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 27 May 2021 21:47:45 +0200 Subject: [PATCH 078/138] Add convert-to-pax.py script --- extra/convert-to-pax.py | 44 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 extra/convert-to-pax.py diff --git a/extra/convert-to-pax.py b/extra/convert-to-pax.py new file mode 100755 index 0000000..4a49fd3 --- /dev/null +++ b/extra/convert-to-pax.py @@ -0,0 +1,44 @@ +#! /usr/bin/python3 +"""Convert a tar file to POSIX.1-2001 (pax) format. +""" + +import argparse +from pathlib import Path +import tarfile + + +compression_map = { + '.tar': '', + '.tar.gz': 'gz', + '.tar.bz2': 'bz2', + '.tar.xz': 'xz', +} +"""Map path suffix to compression mode.""" + + +def main(): + argparser = argparse.ArgumentParser() + argparser.add_argument('input', type=Path, + help=("input tar file")) + argparser.add_argument('output', type=Path, + help=("output tar file")) + args = argparser.parse_args() + + try: + compression = compression_map["".join(args.output.suffixes)] + except KeyError: + # Last ressort default + compression = 'gz' + mode = 'x:' + compression + pax = tarfile.PAX_FORMAT + with tarfile.open(args.input, mode='r') as inp: + with tarfile.open(args.output, mode=mode, format=pax) as outp: + for ti in inp: + if ti.isfile(): + outp.addfile(ti, fileobj=inp.extractfile(ti)) + else: + outp.addfile(ti) + + +if __name__ == "__main__": + main() From c3d8bc585b676a308e4ad54bcc90ab67f1a954b2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 27 May 2021 21:48:23 +0200 Subject: [PATCH 079/138] Typo --- extra/makeincr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/makeincr.py b/extra/makeincr.py index 6e9df47..19b7233 100755 --- a/extra/makeincr.py +++ b/extra/makeincr.py @@ -84,7 +84,7 @@ def main(): argparser.add_argument('input', type=Path, help=("input archive")) argparser.add_argument('output', type=Path, - help=("input archive")) + help=("output archive")) args = argparser.parse_args() inp_archive = Archive().open(args.input) From 3f38ee3289fbc3bd293d5b354e3b96387b611dee Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 27 May 2021 22:31:51 +0200 Subject: [PATCH 080/138] Add a comment explaining the relation of convert-to-pax.py with #62 --- extra/convert-to-pax.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/extra/convert-to-pax.py b/extra/convert-to-pax.py index 4a49fd3..fa37bb6 100755 --- a/extra/convert-to-pax.py +++ b/extra/convert-to-pax.py @@ -1,5 +1,11 @@ #! /usr/bin/python3 """Convert a tar file to POSIX.1-2001 (pax) format. + +Note: this script does not use archive-tools. It can be used for any +tar file. But it is related to archive-tools because it may be used to +retroactively repair archives that fail to verify due to `Issue #62`_. + +.. _Issue #62: https://github.com/RKrahl/archive-tools/issues/62 """ import argparse From fc7adf9b20c5fbf22104590b7036ebf6dfca8432 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 13:44:23 +0200 Subject: [PATCH 081/138] Change diff_manifest() to also report differences in file system metadata for directories and symbol links --- archive/manifest.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/archive/manifest.py b/archive/manifest.py index 307af0e..6012ada 100644 --- a/archive/manifest.py +++ b/archive/manifest.py @@ -289,18 +289,14 @@ def _match(fi_a, fi_b, algorithm): if fi_a.target != fi_b.target: return DiffStatus.SYMLNK_TARGET elif fi_a.type == "f": - # Note: we don't need to compare the size, because if - # the size differs, it's mostly certain that also the - # checksum do. - if fi_a.checksum[algorithm] != fi_b.checksum[algorithm]: + if (fi_a.size != fi_b.size or + fi_a.checksum[algorithm] != fi_b.checksum[algorithm]): return DiffStatus.CONTENT - elif (fi_a.uid != fi_b.uid or - fi_a.uname != fi_b.uname or - fi_a.gid != fi_b.gid or - fi_a.gname != fi_b.gname or - fi_a.mode != fi_b.mode or - int(fi_a.mtime) != int(fi_b.mtime)): - return DiffStatus.META + if (fi_a.uid != fi_b.uid or fi_a.uname != fi_b.uname or + fi_a.gid != fi_b.gid or fi_a.gname != fi_b.gname or + fi_a.mode != fi_b.mode or + int(fi_a.mtime) != int(fi_b.mtime)): + return DiffStatus.META return DiffStatus.MATCH it_a = iter(itertools.chain(manifest_a, itertools.repeat(None))) From f1197b1069536ac263b360230ea67734a6826e6b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 19:57:12 +0200 Subject: [PATCH 082/138] Add a test for Issue #64 --- tests/test_02_create.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_02_create.py b/tests/test_02_create.py index b07f9e9..6e54a44 100644 --- a/tests/test_02_create.py +++ b/tests/test_02_create.py @@ -4,11 +4,13 @@ import datetime from pathlib import Path import shutil +import stat import subprocess import pytest from pytest_dependency import depends from archive import Archive from archive.manifest import FileInfo, Manifest +from archive.tools import mode_ft from conftest import * @@ -113,6 +115,29 @@ def test_check_content(test_dir, dep_testcase, inclmeta): sha256.wait() assert sha256.returncode == 0 +@pytest.mark.xfail(reason="Issue #64") +@pytest.mark.dependency() +def test_check_fstat(test_dir, dep_testcase): + """Check that file system metadata are preserved + """ + compression, abspath = dep_testcase + flag = absflag(abspath) + archive_path = test_dir / archive_name(ext=compression, tags=[flag]) + outdir = test_dir / "out" + shutil.rmtree(outdir, ignore_errors=True) + outdir.mkdir() + if abspath: + cwd = outdir / "archive" / test_dir.relative_to(test_dir.anchor) + else: + cwd = outdir + with Archive().open(archive_path) as archive: + archive.extract(outdir) + for f in testdata: + fstat = (cwd / f.path).lstat() + assert mode_ft[stat.S_IFMT(fstat.st_mode)] == f.type + assert fstat.st_mtime == f.mtime + assert stat.S_IMODE(fstat.st_mode) == f.mode + @pytest.mark.dependency() def test_verify(test_dir, dep_testcase): compression, abspath = dep_testcase From df78d2ec9c3d1bd185fa75433477cc7183c3daf9 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 20:05:24 +0200 Subject: [PATCH 083/138] Add a method Archive.extract_member() --- archive/archive.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/archive/archive.py b/archive/archive.py index f4e0947..6703786 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -327,6 +327,10 @@ def _check_condition(cond, item, message): else: raise ArchiveIntegrityError("%s: invalid type" % (itemname)) + def extract_member(self, fi, targetdir): + arcname = self._arcname(fi.path) + self._file.extract(arcname, path=str(targetdir)) + def extract(self, targetdir, inclmeta=False): # We extract the directories last in reverse order. This way, # the directory attributes, in particular the file modification @@ -338,12 +342,12 @@ def extract(self, targetdir, inclmeta=False): self._file.extract(mi, path=str(targetdir)) for fi in self.manifest: if fi.is_dir(): - dirstack.append(fi.path) + dirstack.append(fi) else: - self._file.extract(self._arcname(fi.path), path=str(targetdir)) + self.extract_member(fi, targetdir) while True: try: - p = dirstack.pop() + fi = dirstack.pop() except IndexError: break - self._file.extract(self._arcname(p), path=str(targetdir)) + self.extract_member(fi, targetdir) From befba4b06ec8cdd4109e3143c341bd8f104ae8c6 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 20:12:01 +0200 Subject: [PATCH 084/138] Fix #64 --- archive/archive.py | 2 ++ tests/test_02_create.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/archive/archive.py b/archive/archive.py index 6703786..76366c3 100644 --- a/archive/archive.py +++ b/archive/archive.py @@ -329,7 +329,9 @@ def _check_condition(cond, item, message): def extract_member(self, fi, targetdir): arcname = self._arcname(fi.path) + mtimes = (fi.mtime, fi.mtime) self._file.extract(arcname, path=str(targetdir)) + os.utime(targetdir / arcname, mtimes, follow_symlinks=False) def extract(self, targetdir, inclmeta=False): # We extract the directories last in reverse order. This way, diff --git a/tests/test_02_create.py b/tests/test_02_create.py index 6e54a44..0a2b3b9 100644 --- a/tests/test_02_create.py +++ b/tests/test_02_create.py @@ -115,7 +115,6 @@ def test_check_content(test_dir, dep_testcase, inclmeta): sha256.wait() assert sha256.returncode == 0 -@pytest.mark.xfail(reason="Issue #64") @pytest.mark.dependency() def test_check_fstat(test_dir, dep_testcase): """Check that file system metadata are preserved From d41f41ed2913355a4b7f1c53d8a0ee83a5201ebd Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 20:23:51 +0200 Subject: [PATCH 085/138] Update changelog --- CHANGES.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index a9e27c5..8663904 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -55,6 +55,9 @@ Bug fixes and minor changes optional. The default will be derived from the suffixes of the `path` argument. ++ `#65`_: Add a method :meth:`Archive.extract_member` to extract an + individual member of the archive. + + `#53`_, `#54`_: Spurious :exc:`FileNotFoundError` from :meth:`Archive.create` when passing a relative path as `workdir` argument. @@ -64,6 +67,9 @@ Bug fixes and minor changes + `#56`_, `#57`_: Inconsistent result from `archive-tool diff` with option `--skip-dir-content`. ++ `#64`_, `#65`_: :meth:`Archive.extract` does not preserve the file + modification time for symbol links. + + `#48`_: Review and standardize some error messages. .. _#48: https://github.com/RKrahl/archive-tools/pull/48 @@ -80,6 +86,8 @@ Bug fixes and minor changes .. _#61: https://github.com/RKrahl/archive-tools/pull/61 .. _#62: https://github.com/RKrahl/archive-tools/issues/62 .. _#63: https://github.com/RKrahl/archive-tools/pull/63 +.. _#64: https://github.com/RKrahl/archive-tools/issues/64 +.. _#65: https://github.com/RKrahl/archive-tools/pull/65 0.5.1 (2020-12-12) From b3f5d755c09b9fd3c39bbda8b580fe18f2e30a65 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 21:39:32 +0200 Subject: [PATCH 086/138] Fix tests: make sure to preserve the parent directory modification time when modifying test data --- tests/test_02_diff_manifest.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_02_diff_manifest.py b/tests/test_02_diff_manifest.py index 5e284a6..48c2dd4 100644 --- a/tests/test_02_diff_manifest.py +++ b/tests/test_02_diff_manifest.py @@ -1,6 +1,7 @@ """Test diff_manifest() function in archive.manifest. """ +import os from pathlib import Path import shutil from tempfile import TemporaryFile @@ -77,8 +78,10 @@ def test_diff_manifest_modified_file(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime p = base_dir / "rnd.dat" shutil.copy(gettestdata("rnd2.dat"), p) + os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 @@ -94,9 +97,11 @@ def test_diff_manifest_symlink_target(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime p = base_dir / "s.dat" p.unlink() p.symlink_to(Path("msg.txt")) + os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 @@ -112,9 +117,11 @@ def test_diff_manifest_wrong_type(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime p = base_dir / "rnd.dat" p.unlink() p.symlink_to(Path("data", "rnd.dat")) + os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 @@ -131,9 +138,11 @@ def test_diff_manifest_missing_files(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime p1 = base_dir / "rnd.dat" p2 = base_dir / "a.dat" p1.rename(p2) + os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 2 @@ -155,11 +164,15 @@ def test_diff_manifest_mult(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime + mtime_data = os.stat(base_dir / "data").st_mtime pm = base_dir / "data" / "rnd.dat" shutil.copy(gettestdata("rnd2.dat"), pm) p1 = base_dir / "msg.txt" p2 = base_dir / "o.txt" p1.rename(p2) + os.utime(base_dir, times=(mtime_base, mtime_base)) + os.utime(base_dir / "data", times=(mtime_data, mtime_data)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 3 @@ -185,8 +198,10 @@ def test_diff_manifest_dircontent(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime pd = base_dir / "data" shutil.rmtree(pd) + os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 2 @@ -211,8 +226,10 @@ def test_diff_manifest_add_file_last(test_data, testname, monkeypatch): with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") + mtime_base = os.stat(base_dir).st_mtime p = base_dir / "zzz.dat" shutil.copy(gettestdata("rnd2.dat"), p) + os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 From dd30b4caa595ee3178d70791d1152f5c90c73a53 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 29 May 2021 21:50:33 +0200 Subject: [PATCH 087/138] Update changelog --- CHANGES.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 8663904..3c25f2f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -15,7 +15,9 @@ New features + `#54`_: Add new keyword argument `fileinfos` that :class:`Manifest` and :meth:`Archive.create` accept. -+ `#57`_: Add :func:`diff_manifest`. ++ `#57`_, `#66`_: Add :func:`diff_manifest`. The `archive-tool diff` + command with `--report-meta` flag also reports differences in file + system metadata for directories and symbol links. + `#50`_, `#51`_: Add a header with some metadata to the index in a mail archive created by :class:`MailArchive`. @@ -88,6 +90,7 @@ Bug fixes and minor changes .. _#63: https://github.com/RKrahl/archive-tools/pull/63 .. _#64: https://github.com/RKrahl/archive-tools/issues/64 .. _#65: https://github.com/RKrahl/archive-tools/pull/65 +.. _#66: https://github.com/RKrahl/archive-tools/pull/66 0.5.1 (2020-12-12) From 037fe915950e6742d80871bb674a25a61653633e Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 30 May 2021 16:06:32 +0200 Subject: [PATCH 088/138] Add a new package archive.bt and move the functionality of the backup-tool command line script into modules in that package --- archive/bt/__init__.py | 61 +++++++++++++ archive/bt/config.py | 83 ++++++++++++++++++ archive/bt/create.py | 82 ++++++++++++++++++ scripts/backup-tool.py | 189 +---------------------------------------- setup.py | 2 +- 5 files changed, 229 insertions(+), 188 deletions(-) create mode 100644 archive/bt/__init__.py create mode 100644 archive/bt/config.py create mode 100644 archive/bt/create.py diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py new file mode 100644 index 0000000..2483e83 --- /dev/null +++ b/archive/bt/__init__.py @@ -0,0 +1,61 @@ +"""Internal modules used by the backup-tool command line tool. +""" + +import argparse +import logging +import sys +from archive.exception import ArchiveError, ConfigError +from archive.bt.config import Config +from archive.bt.create import create + + +# TODO: +# +# - in the long run, we want to select the schedule (e.g. set the +# conditions, when to choose which schedule) in the configuration +# file, and even put the definition and semantics (e.g. which +# schedules exist and what do they mean) there. But this seem to be +# most tricky part of the whole project. We want to get the basics +# working first. So for the moment, we hard code definition and +# semantics here and select the schedule as a command line argument. +# +# - consider add configuration options for dedup mode and for checksum +# algorithm. +# +# - consider adding more log messages and logging configuration. + +log = logging.getLogger(__name__) +schedules = {'full', 'cumu', 'incr'} + +def backup_tool(): + logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") + + argparser = argparse.ArgumentParser() + clsgrp = argparser.add_mutually_exclusive_group() + clsgrp.add_argument('--policy', default='sys') + clsgrp.add_argument('--user') + argparser.add_argument('--schedule', choices=schedules, default='full') + argparser.add_argument('-v', '--verbose', action='store_true', + help=("verbose diagnostic output")) + args = argparser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + if args.user: + args.policy = 'user' + + try: + config = Config(args) + except ConfigError as e: + print("%s: configuration error: %s" % (argparser.prog, e), + file=sys.stderr) + sys.exit(2) + + log.info("host:%s, policy:%s", config.host, config.policy) + + try: + create(config) + except ArchiveError as e: + print("%s: error: %s" % (argparser.prog, e), + file=sys.stderr) + sys.exit(1) diff --git a/archive/bt/config.py b/archive/bt/config.py new file mode 100644 index 0000000..f1e14ce --- /dev/null +++ b/archive/bt/config.py @@ -0,0 +1,83 @@ +"""Configuration for the backup-tool command line tool. +""" + +import datetime +import os +from pathlib import Path +import pwd +import socket +import archive.config + + +def get_config_file(): + try: + return os.environ['BACKUP_CFG'] + except KeyError: + return "/etc/backup.cfg" + +class Config(archive.config.Config): + + defaults = { + 'dirs': None, + 'excludes': "", + 'backupdir': None, + 'targetdir': "%(backupdir)s", + 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", + } + args_options = ('policy', 'user', 'schedule') + + def __init__(self, args): + host = socket.gethostname() + config_file = get_config_file() + sections = ("%s/%s" % (host, args.policy), host, args.policy) + self.config_file = config_file + super().__init__(args, config_section=sections) + if not self.config_file: + raise ConfigError("configuration file %s not found" % config_file) + self['host'] = host + self['date'] = datetime.date.today().strftime("%y%m%d") + if args.user: + try: + self['home'] = pwd.getpwnam(args.user).pw_dir + except KeyError: + pass + + @property + def host(self): + return self.get('host') + + @property + def policy(self): + return self.get('policy') + + @property + def user(self): + return self.get('user') + + @property + def schedule(self): + return self.get('schedule') + + @property + def name(self): + return self.get('name', required=True) + + @property + def dirs(self): + return self.get('dirs', required=True, split=True, type=Path) + + @property + def excludes(self): + return self.get('excludes', split=True, type=Path) + + @property + def backupdir(self): + return self.get('backupdir', required=True, type=Path) + + @property + def targetdir(self): + return self.get('targetdir', required=True, type=Path) + + @property + def path(self): + return self.targetdir / self.name diff --git a/archive/bt/create.py b/archive/bt/create.py new file mode 100644 index 0000000..9bbcd34 --- /dev/null +++ b/archive/bt/create.py @@ -0,0 +1,82 @@ +"""Create a backup. +""" + +import logging +import os +from archive.archive import Archive +from archive.exception import ArchiveCreateError +from archive.index import ArchiveIndex +from archive.manifest import Manifest, DiffStatus, diff_manifest + + +log = logging.getLogger(__name__) + +def get_prev_backups(config): + idx_file = config.backupdir / ".index.yaml" + if idx_file.is_file(): + log.debug("reading index file %s", str(idx_file)) + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + else: + log.debug("index file not found") + idx = ArchiveIndex() + idx.sort() + f_d = dict(host=config.host, policy=config.policy) + if config.policy == 'user': + f_d['user'] = config.user + return filter(lambda i: i >= f_d, idx) + +def filter_fileinfos(base, fileinfos): + for stat, fi1, fi2 in diff_manifest(base, fileinfos): + if stat == DiffStatus.MISSING_B or stat == DiffStatus.MATCH: + continue + yield fi2 + +def get_fileinfos(config): + + last_full = None + last_cumu = None + last_incr = [] + for i in get_prev_backups(config): + if i.schedule == 'full': + last_full = i + last_cumu = None + last_incr = [] + elif i.schedule == 'cumu': + last_cumu = i + last_incr = [] + elif i.schedule == 'incr': + last_incr.append(i) + + fileinfos = Manifest(paths=config.dirs, excludes=config.excludes) + + if config.schedule != 'full': + if not last_full: + raise ArchiveCreateError("No previous full backup found, can not " + "create %s archive" % config.schedule) + base_archives = [last_full.path] + if config.schedule == 'incr': + if last_cumu: + base_archives.append(last_cumu.path) + base_archives.extend([i.path for i in last_incr]) + for p in base_archives: + log.debug("considering %s to create differential archive", p) + with Archive().open(p) as base: + fileinfos = filter_fileinfos(base.manifest, fileinfos) + + return fileinfos + +def create(config): + os.umask(0o277) + fileinfos = get_fileinfos(config) + + log.debug("creating archive %s", config.path) + + tags = [ + "host:%s" % config.host, + "policy:%s" % config.policy, + "schedule:%s" % config.schedule, + ] + if config.user: + tags.append("user:%s" % config.user) + Archive().create(config.path, fileinfos=fileinfos, tags=tags) diff --git a/scripts/backup-tool.py b/scripts/backup-tool.py index 559cfd6..e6cf120 100644 --- a/scripts/backup-tool.py +++ b/scripts/backup-tool.py @@ -2,191 +2,6 @@ """Create a backup. """ -import argparse -import datetime -import logging -import os -from pathlib import Path -import pwd -import socket -import sys -from archive.archive import Archive -import archive.config -from archive.exception import ConfigError, ArchiveCreateError -from archive.index import ArchiveIndex -from archive.manifest import Manifest, DiffStatus, diff_manifest +import archive.bt -logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") -log = logging.getLogger(__name__) - -os.umask(0o277) - -# Note: in the long run, we want to select the schedule (e.g. set the -# conditions, when to choose which schedule) in the configuration -# file, and even put the definition and semantics (e.g. which -# schedules exist and what do they mean) there. But this seem to be -# most tricky part of the whole project. We want to get the basics -# working first. So for the moment, we hard code definition and -# semantics here and select the schedule as a command line argument. - -# TODO: -# - consider add configuration options for dedup mode and for checksum -# algorithm. -# - consider adding more log messages and logging configuration. - -schedules = {'full', 'cumu', 'incr'} -def get_config_file(): - try: - return os.environ['BACKUP_CFG'] - except KeyError: - return "/etc/backup.cfg" - -class Config(archive.config.Config): - - defaults = { - 'dirs': None, - 'excludes': "", - 'backupdir': None, - 'targetdir': "%(backupdir)s", - 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", - } - args_options = ('policy', 'user', 'schedule') - - def __init__(self, args): - host = socket.gethostname() - config_file = get_config_file() - sections = ("%s/%s" % (host, args.policy), host, args.policy) - self.config_file = config_file - super().__init__(args, config_section=sections) - if not self.config_file: - raise ConfigError("configuration file %s not found" % config_file) - self['host'] = host - self['date'] = datetime.date.today().strftime("%y%m%d") - if args.user: - try: - self['home'] = pwd.getpwnam(args.user).pw_dir - except KeyError: - pass - - @property - def host(self): - return self.get('host') - - @property - def policy(self): - return self.get('policy') - - @property - def user(self): - return self.get('user') - - @property - def schedule(self): - return self.get('schedule') - - @property - def name(self): - return self.get('name', required=True) - - @property - def dirs(self): - return self.get('dirs', required=True, split=True, type=Path) - - @property - def excludes(self): - return self.get('excludes', split=True, type=Path) - - @property - def backupdir(self): - return self.get('backupdir', required=True, type=Path) - - @property - def targetdir(self): - return self.get('targetdir', required=True, type=Path) - - @property - def path(self): - return self.targetdir / self.name - - -def filter_fileinfos(base, fileinfos): - for stat, fi1, fi2 in diff_manifest(base, fileinfos): - if stat == DiffStatus.MISSING_B or stat == DiffStatus.MATCH: - continue - yield fi2 - - -argparser = argparse.ArgumentParser() -clsgrp = argparser.add_mutually_exclusive_group() -clsgrp.add_argument('--policy', default='sys') -clsgrp.add_argument('--user') -argparser.add_argument('--schedule', choices=schedules, default='full') -argparser.add_argument('-v', '--verbose', action='store_true', - help=("verbose diagnostic output")) -args = argparser.parse_args() - -if args.verbose: - logging.getLogger().setLevel(logging.DEBUG) -if args.user: - args.policy = 'user' - -try: - config = Config(args) -except ConfigError as e: - print("%s: configuration error: %s" % (argparser.prog, e), file=sys.stderr) - sys.exit(2) - -log.info("host:%s, policy:%s", config.host, config.policy) - -idx_file = config.backupdir / ".index.yaml" -if idx_file.is_file(): - log.debug("reading index file %s", str(idx_file)) - with idx_file.open("rb") as f: - idx = ArchiveIndex(f) -else: - log.debug("index file not found") - idx = ArchiveIndex() -idx.sort() - -f_d = dict(host=config.host, policy=config.policy) -if config.policy == 'user': - f_d['user'] = config.user -last_full = None -last_cumu = None -last_incr = [] -for i in filter(lambda i: i >= f_d, idx): - if i.schedule == 'full': - last_full = i - last_cumu = None - last_incr = [] - elif i.schedule == 'cumu': - last_cumu = i - last_incr = [] - elif i.schedule == 'incr': - last_incr.append(i) - -fileinfos = Manifest(paths=config.dirs, excludes=config.excludes) -tags = [ - "host:%s" % config.host, - "policy:%s" % config.policy, - "schedule:%s" % config.schedule, -] -if config.user: - tags.append("user:%s" % config.user) - -if config.schedule != 'full': - if not last_full: - raise ArchiveCreateError("No previous full backup found, " - "can not create %s archive" % config.schedule) - base_archives = [last_full.path] - if config.schedule == 'incr': - if last_cumu: - base_archives.append(last_cumu.path) - base_archives.extend([i.path for i in last_incr]) - for p in base_archives: - log.debug("considering %s to create differential archive", p) - with Archive().open(p) as base: - fileinfos = filter_fileinfos(base.manifest, fileinfos) - -log.debug("creating archive %s", config.path) -archive = Archive().create(config.path, fileinfos=fileinfos, tags=tags) +archive.bt.backup_tool() diff --git a/setup.py b/setup.py index cdb9b5c..45b47bb 100644 --- a/setup.py +++ b/setup.py @@ -118,7 +118,7 @@ def run(self): url = "https://github.com/RKrahl/archive-tools", license = "Apache-2.0", requires = ["PyYAML"], - packages = ["archive", "archive.cli"], + packages = ["archive", "archive.cli", "archive.bt"], scripts = ["scripts/archive-tool.py", "scripts/backup-tool.py", "scripts/imap-to-archive.py"], data_files = [("/etc", ["etc/backup.cfg"])], From da51146dc56f9a6e02c51f89f0e98ca5c55e63d8 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 30 May 2021 19:52:12 +0200 Subject: [PATCH 089/138] Change backup-tool to feature a subcommand on the command line --- archive/bt/__init__.py | 26 +++++++++++++++----------- archive/bt/config.py | 10 +++++++++- archive/bt/create.py | 12 +++++++++++- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py index 2483e83..2904802 100644 --- a/archive/bt/__init__.py +++ b/archive/bt/__init__.py @@ -2,11 +2,11 @@ """ import argparse +import importlib import logging import sys from archive.exception import ArchiveError, ConfigError from archive.bt.config import Config -from archive.bt.create import create # TODO: @@ -25,24 +25,24 @@ # - consider adding more log messages and logging configuration. log = logging.getLogger(__name__) -schedules = {'full', 'cumu', 'incr'} +subcmds = ( "create", ) def backup_tool(): logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") argparser = argparse.ArgumentParser() - clsgrp = argparser.add_mutually_exclusive_group() - clsgrp.add_argument('--policy', default='sys') - clsgrp.add_argument('--user') - argparser.add_argument('--schedule', choices=schedules, default='full') argparser.add_argument('-v', '--verbose', action='store_true', help=("verbose diagnostic output")) + subparsers = argparser.add_subparsers(title='subcommands', dest='subcmd') + for sc in subcmds: + m = importlib.import_module('archive.bt.%s' % sc) + m.add_parser(subparsers) args = argparser.parse_args() if args.verbose: logging.getLogger().setLevel(logging.DEBUG) - if args.user: - args.policy = 'user' + if not hasattr(args, "func"): + argparser.error("subcommand is required") try: config = Config(args) @@ -51,11 +51,15 @@ def backup_tool(): file=sys.stderr) sys.exit(2) - log.info("host:%s, policy:%s", config.host, config.policy) + if config.policy: + log.info("%s %s: host:%s, policy:%s", argparser.prog, args.subcmd, + config.host, config.policy) + else: + log.info("%s %s: host:%s", argparser.prog, args.subcmd, config.host) try: - create(config) + sys.exit(args.func(args, config)) except ArchiveError as e: - print("%s: error: %s" % (argparser.prog, e), + print("%s: error: %s" % (argparser.prog, e), file=sys.stderr) sys.exit(1) diff --git a/archive/bt/config.py b/archive/bt/config.py index f1e14ce..dbf8c67 100644 --- a/archive/bt/config.py +++ b/archive/bt/config.py @@ -27,9 +27,17 @@ class Config(archive.config.Config): args_options = ('policy', 'user', 'schedule') def __init__(self, args): + for o in self.args_options: + if not hasattr(args, o): + setattr(args, o, None) host = socket.gethostname() config_file = get_config_file() - sections = ("%s/%s" % (host, args.policy), host, args.policy) + if args.user: + args.policy = 'user' + if args.policy: + sections = ("%s/%s" % (host, args.policy), host, args.policy) + else: + sections = (host,) self.config_file = config_file super().__init__(args, config_section=sections) if not self.config_file: diff --git a/archive/bt/create.py b/archive/bt/create.py index 9bbcd34..948584a 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -10,6 +10,7 @@ log = logging.getLogger(__name__) +schedules = {'full', 'cumu', 'incr'} def get_prev_backups(config): idx_file = config.backupdir / ".index.yaml" @@ -66,7 +67,7 @@ def get_fileinfos(config): return fileinfos -def create(config): +def create(args, config): os.umask(0o277) fileinfos = get_fileinfos(config) @@ -80,3 +81,12 @@ def create(config): if config.user: tags.append("user:%s" % config.user) Archive().create(config.path, fileinfos=fileinfos, tags=tags) + return 0 + +def add_parser(subparsers): + parser = subparsers.add_parser('create', help="create a backup") + clsgrp = parser.add_mutually_exclusive_group() + clsgrp.add_argument('--policy', default='sys') + clsgrp.add_argument('--user') + parser.add_argument('--schedule', choices=schedules, default='full') + parser.set_defaults(func=create) From 24fb5beabf0652857c46ea0663f2c078c6decd9f Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 30 May 2021 21:03:57 +0200 Subject: [PATCH 090/138] Add index subcommand to backup-tool --- archive/bt/__init__.py | 2 +- archive/bt/index.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 archive/bt/index.py diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py index 2904802..7723c5b 100644 --- a/archive/bt/__init__.py +++ b/archive/bt/__init__.py @@ -25,7 +25,7 @@ # - consider adding more log messages and logging configuration. log = logging.getLogger(__name__) -subcmds = ( "create", ) +subcmds = ( "create", "index", ) def backup_tool(): logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") diff --git a/archive/bt/index.py b/archive/bt/index.py new file mode 100644 index 0000000..c43bbca --- /dev/null +++ b/archive/bt/index.py @@ -0,0 +1,24 @@ +"""Update the index of backups. +""" + +import logging +from archive.index import ArchiveIndex + + +log = logging.getLogger(__name__) + +def update_index(args, config): + idx_file = config.backupdir / ".index.yaml" + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + idx.add_archives(config.backupdir.glob("*.tar*"), prune=args.prune) + idx.sort() + with idx_file.open("wb") as f: + idx.write(f) + return 0 + +def add_parser(subparsers): + parser = subparsers.add_parser('index', help="update backup index") + parser.add_argument('--no-prune', action='store_false', dest='prune', + help="do not remove missing backups from the index") + parser.set_defaults(func=update_index) From 015881e01a48860f7cd0e471f201231d87b49d81 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 31 May 2021 07:00:00 +0200 Subject: [PATCH 091/138] update_index() creates the index if it doesn't already exist --- archive/bt/index.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/archive/bt/index.py b/archive/bt/index.py index c43bbca..47ea60a 100644 --- a/archive/bt/index.py +++ b/archive/bt/index.py @@ -9,8 +9,13 @@ def update_index(args, config): idx_file = config.backupdir / ".index.yaml" - with idx_file.open("rb") as f: - idx = ArchiveIndex(f) + if idx_file.is_file(): + log.debug("reading index file %s", str(idx_file)) + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + else: + log.debug("index file not found") + idx = ArchiveIndex() idx.add_archives(config.backupdir.glob("*.tar*"), prune=args.prune) idx.sort() with idx_file.open("wb") as f: From 75b55104d7875b516b7363d2d956d8e1e008a40a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 19 Jul 2021 00:59:24 +0200 Subject: [PATCH 092/138] Add module archive.bt.schedule --- archive/bt/schedule.py | 176 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 archive/bt/schedule.py diff --git a/archive/bt/schedule.py b/archive/bt/schedule.py new file mode 100644 index 0000000..265ba0d --- /dev/null +++ b/archive/bt/schedule.py @@ -0,0 +1,176 @@ +"""Provide helper for the backup-tool related to schedules. +""" + +import collections +import datetime +from enum import IntEnum +import re +from lark import Lark, Transformer + + +class _DTMatcher: + """datetime component matcher to be used in ScheduleDate. + This is an abstract base class. + """ + def matches(self, value): + raise NotImplementedError + +class _DTMatcherAny(_DTMatcher): + + def matches(self, value): + return True + + def __str__(self): + return '*' + +class _DTMatcherValue(_DTMatcher): + + def __init__(self, value): + assert isinstance(value, int) + self.value = value + + def matches(self, value): + return value == self.value + + def __str__(self): + return '%d' % self.value + +class _DTMatcherInterval(_DTMatcher): + + def __init__(self, i_min, i_max): + assert isinstance(i_min, int) + assert isinstance(i_max, int) + self.i_min = i_min + self.i_max = i_max + + def matches(self, value): + return self.i_min <= value <= self.i_max + + def __str__(self): + return '[%d,%d]' % (self.i_min, self.i_max) + +class _DTMatcherList(_DTMatcher): + + def __init__(self, dtms): + self.dtms = dtms + + def matches(self, value): + for dtm in self.dtms: + if dtm.matches(value): + return True + else: + return False + + def __str__(self): + return '(%s)' % ",".join(str(m) for m in self.dtms) + +_wd = dict(Mon=1, Tue=2, Wed=3, Thu=4, Fri=5, Sat=6, Sun=7) + +_dt_fields = ('weekday', 'year', 'month', 'day', 'hour', 'minute', 'second') +_dt_tuple = collections.namedtuple('_dt_tuple', _dt_fields) + + +_sd_grammar = r""" + sd: [weekday _WS] date [_WS time] + + weekday: wd ("," wd)* -> vlist + + wd: wdstr -> value + | wdstr ".." wdstr -> intervall + + wdstr: MON | TUE | WED | THU | FRI | SAT | SUN + + date: [[dtc "-"] dtc "-"] dtc + + time: dtc ":" dtc [":" dtc] + + dtc: dtcs ("," dtcs)* -> vlist + + dtcs: "*" -> wildcard + | INT -> value + | INT ".." INT -> intervall + + MON: "Mon" + TUE: "Tue" + WED: "Wed" + THU: "Thu" + FRI: "Fri" + SAT: "Sat" + SUN: "Sun" + + _WS: (" "|/\t/)+ + + %import common.INT +""" + +class _SDTf(Transformer): + + def wdstr(self, l): + (s,) = l + return _wd[s] + + def wildcard(self, l): + return _DTMatcherAny() + + def value(self, l): + (v,) = l + return _DTMatcherValue(int(v)) + + def intervall(self, l): + (a, b) = l + return _DTMatcherInterval(int(a), int(b)) + + def vlist(self, l): + if len(l) == 1: + return l[0] + else: + return _DTMatcherList(l) + + def date(self, l): + l = list(l) + while len(l) < 3: + l.insert(0, _DTMatcherAny()) + return l + + def time(self, l): + l = list(l) + while len(l) < 3: + l.append(_DTMatcherAny()) + return l + + def sd(self, l): + l = list(l) + r = [] + # weekday + if isinstance(l[0], _DTMatcher): + r.append(l.pop(0)) + else: + r.append(_DTMatcherAny()) + # date + r.extend(l.pop(0)) + # time + if l: + r.extend(l.pop(0)) + else: + r.extend((_DTMatcherAny(), _DTMatcherAny(), _DTMatcherAny())) + return r + +_sd_parser = Lark(_sd_grammar, start='sd', parser='lalr', transformer=_SDTf()) + +class ScheduleDate(_dt_tuple): + + def __new__(cls, spec): + l = _sd_parser.parse(spec) + return super().__new__(cls, *l) + + def __contains__(self, dt): + if isinstance(dt, datetime.datetime): + return (self.weekday.matches(dt.isoweekday()) and + self.year.matches(dt.year) and + self.month.matches(dt.month) and + self.day.matches(dt.day) and + self.hour.matches(dt.hour) and + self.minute.matches(dt.minute) and + self.second.matches(dt.second)) + else: + return False From 4e6d368b84bd310234c8bb0650650b88fac14d97 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 31 Jul 2021 20:16:57 +0200 Subject: [PATCH 093/138] Document dependency on lark-parser --- .github/requirements.txt | 1 + README.rst | 5 +++++ python-archive-tools.spec | 2 ++ setup.py | 2 +- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/requirements.txt b/.github/requirements.txt index 1911bb1..d240ac8 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -1,5 +1,6 @@ PyYAML >=5.4 distutils-pytest +lark-parser pytest >=3.6.0 pytest-dependency >=0.2 python-dateutil diff --git a/README.rst b/README.rst index f3fc22c..845217d 100644 --- a/README.rst +++ b/README.rst @@ -56,6 +56,10 @@ Required library packages: + `PyYAML`_ ++ `lark-parser`_ + + Required for the `backup-tool.py` script. + Optional library packages: + `imapclient`_ @@ -136,6 +140,7 @@ permissions and limitations under the License. .. _PyPI site: https://pypi.org/project/archive-tools/ .. _PyYAML: http://pyyaml.org/wiki/PyYAML +.. _lark-parser: https://github.com/lark-parser/lark .. _imapclient: https://github.com/mjs/imapclient/ .. _python-dateutil: https://dateutil.readthedocs.io/en/stable/ .. _setuptools_scm: https://github.com/pypa/setuptools_scm/ diff --git a/python-archive-tools.spec b/python-archive-tools.spec index 12a7eef..992d8aa 100644 --- a/python-archive-tools.spec +++ b/python-archive-tools.spec @@ -13,11 +13,13 @@ BuildRequires: fdupes BuildRequires: python3-base >= 3.6 %if %{with tests} BuildRequires: python3-PyYAML +BuildRequires: python3-lark-parser BuildRequires: python3-distutils-pytest BuildRequires: python3-pytest-dependency >= 0.2 BuildRequires: python3-pytest >= 3.0 %endif Requires: python3-PyYAML +Requires: python3-lark-parser Recommends: python3-IMAPClient Recommends: python3-python-dateutil BuildArch: noarch diff --git a/setup.py b/setup.py index 45b47bb..45af630 100644 --- a/setup.py +++ b/setup.py @@ -117,7 +117,7 @@ def run(self): author_email = "rolf@rotkraut.de", url = "https://github.com/RKrahl/archive-tools", license = "Apache-2.0", - requires = ["PyYAML"], + requires = ["PyYAML", "lark"], packages = ["archive", "archive.cli", "archive.bt"], scripts = ["scripts/archive-tool.py", "scripts/backup-tool.py", "scripts/imap-to-archive.py"], From b9c48f9d35355282e95b771692c2e0cf8ee7a988 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 31 Jul 2021 21:18:18 +0200 Subject: [PATCH 094/138] Fix: distutils expects a module name rather then a PyPI package name in requires --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 0ff04a0..a9d2c6e 100644 --- a/setup.py +++ b/setup.py @@ -117,7 +117,7 @@ def run(self): author_email = "rolf@rotkraut.de", url = "https://github.com/RKrahl/archive-tools", license = "Apache-2.0", - requires = ["PyYAML"], + requires = ["yaml"], packages = ["archive", "archive.cli"], scripts = ["scripts/archive-tool.py", "scripts/imap-to-archive.py"], classifiers = [ From a07f85746d51a0053baae1f52bc69a69a17835f6 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 31 Jul 2021 22:02:41 +0200 Subject: [PATCH 095/138] Rename mailarchive tests --- ...est_05_mailarchive_create.py => test_09_mailarchive_create.py} | 0 ...est_05_mailarchive_legacy.py => test_09_mailarchive_legacy.py} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/{test_05_mailarchive_create.py => test_09_mailarchive_create.py} (100%) rename tests/{test_05_mailarchive_legacy.py => test_09_mailarchive_legacy.py} (100%) diff --git a/tests/test_05_mailarchive_create.py b/tests/test_09_mailarchive_create.py similarity index 100% rename from tests/test_05_mailarchive_create.py rename to tests/test_09_mailarchive_create.py diff --git a/tests/test_05_mailarchive_legacy.py b/tests/test_09_mailarchive_legacy.py similarity index 100% rename from tests/test_05_mailarchive_legacy.py rename to tests/test_09_mailarchive_legacy.py From 58bc6468c85b342f708a1833f0ed4f6ccc66ce0d Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 1 Aug 2021 14:28:37 +0200 Subject: [PATCH 096/138] Add tests for class ScheduleDate --- tests/test_05_schedule.py | 121 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 tests/test_05_schedule.py diff --git a/tests/test_05_schedule.py b/tests/test_05_schedule.py new file mode 100644 index 0000000..47491da --- /dev/null +++ b/tests/test_05_schedule.py @@ -0,0 +1,121 @@ +"""Test class archive.bt.schedule.ScheduleDate. +""" + +import datetime +import pytest +from archive.bt.schedule import ScheduleDate + + +test_schedules = [ + { + 'schedule' : "Sat,Thu,Mon..Wed,Sat..Sun *", + 'dates': [ + ( datetime.datetime(2021, 7, 1, 5, 13, 21), True ), + ( datetime.datetime(2021, 7, 2, 6, 24, 36), False ), + ( datetime.datetime(2021, 7, 3, 3, 57, 42), True ), + ( datetime.datetime(2021, 7, 4, 8, 8, 48), True ), + ( datetime.datetime(2021, 7, 5, 19, 50, 14), True ), + ( datetime.datetime(2021, 7, 6, 22, 48, 56), True ), + ( datetime.datetime(2021, 7, 7, 1, 11, 49), True ), + ], + }, + { + 'schedule' : "Mon,Sun 2012-*-* 2,1:23", + 'dates': [ + ( datetime.datetime(2012, 10, 20, 1, 23, 48), False ), + ( datetime.datetime(2012, 10, 21, 1, 23, 7), True ), + ( datetime.datetime(2012, 10, 21, 2, 24, 30), False ), + ( datetime.datetime(2012, 10, 21, 3, 23, 26), False ), + ( datetime.datetime(2012, 10, 22, 1, 23, 39), True ), + ], + }, + { + 'schedule' : "Wed *-1", + 'dates': [ + ( datetime.datetime(2002, 4, 1, 13, 52, 43), False ), + ( datetime.datetime(2002, 5, 1, 17, 11, 44), True ), + ( datetime.datetime(2002, 6, 1, 2, 11, 24), False ), + ( datetime.datetime(2003, 9, 1, 6, 5, 23), False ), + ( datetime.datetime(2003, 9, 3, 2, 37, 36), False ), + ( datetime.datetime(2003, 10, 1, 15, 30, 6), True ), + ( datetime.datetime(2003, 11, 1, 20, 29, 54), False ), + ], + }, + { + 'schedule' : "Wed..Wed,Wed *-1", + 'dates': [ + ( datetime.datetime(2002, 4, 1, 13, 52, 43), False ), + ( datetime.datetime(2002, 5, 1, 17, 11, 44), True ), + ( datetime.datetime(2002, 6, 1, 2, 11, 24), False ), + ( datetime.datetime(2003, 9, 1, 6, 5, 23), False ), + ( datetime.datetime(2003, 9, 3, 2, 37, 36), False ), + ( datetime.datetime(2003, 10, 1, 15, 30, 6), True ), + ( datetime.datetime(2003, 11, 1, 20, 29, 54), False ), + ], + }, + { + 'schedule' : "10-15", + 'dates': [ + ( datetime.datetime(2017, 9, 15, 3, 8, 17), False ), + ( datetime.datetime(2017, 10, 14, 23, 48, 51), False ), + ( datetime.datetime(2017, 10, 15, 4, 12, 36), True ), + ( datetime.datetime(2018, 10, 15, 11, 14, 43), True ), + ], + }, + { + 'schedule' : "Fri 1..7 4,10,16,22:30", + 'dates': [ + ( datetime.datetime(2021, 7, 1, 4, 30, 45), False ), + ( datetime.datetime(2021, 7, 2, 4, 30, 45), True ), + ( datetime.datetime(2021, 7, 2, 5, 30, 45), False ), + ( datetime.datetime(2021, 7, 2, 16, 30, 45), True ), + ( datetime.datetime(2021, 7, 9, 16, 30, 45), False ), + ], + }, + { + 'schedule' : "Mon *-*-2..8", + 'dates': [ + ( datetime.datetime(2021, 3, 1, 3, 0), False ), + ( datetime.datetime(2021, 3, 5, 3, 0), False ), + ( datetime.datetime(2021, 3, 8, 3, 0), True ), + ( datetime.datetime(2021, 3, 15, 3, 0), False ), + ( datetime.datetime(2021, 7, 5, 3, 0), True ), + ( datetime.datetime(2021, 7, 9, 3, 0), False ), + ( datetime.datetime(2021, 7, 12, 3, 0), False ), + ], + }, + { + 'schedule' : "Mon *", + 'dates': [ + ( datetime.datetime(2021, 3, 1, 3, 0), True ), + ( datetime.datetime(2021, 3, 5, 3, 0), False ), + ( datetime.datetime(2021, 3, 8, 3, 0), True ), + ( datetime.datetime(2021, 3, 15, 3, 0), True ), + ( datetime.datetime(2021, 7, 5, 3, 0), True ), + ( datetime.datetime(2021, 7, 9, 3, 0), False ), + ( datetime.datetime(2021, 7, 12, 3, 0), True ), + ], + }, + { + 'schedule' : "*", + 'dates': [ + ( datetime.datetime(2021, 3, 1, 3, 0), True ), + ( datetime.datetime(2021, 3, 5, 3, 0), True ), + ( datetime.datetime(2021, 3, 8, 3, 0), True ), + ( datetime.datetime(2021, 3, 15, 3, 0), True ), + ( datetime.datetime(2021, 7, 5, 3, 0), True ), + ( datetime.datetime(2021, 7, 9, 3, 0), True ), + ( datetime.datetime(2021, 7, 12, 3, 0), True ), + ], + }, +] + +@pytest.mark.parametrize("schedule,dates", [ + (s['schedule'], s['dates']) for s in test_schedules +]) +def test_schedule_parse(schedule, dates): + """Various parsing examples for ScheduleDate. + """ + sd = ScheduleDate(schedule) + for d in dates: + assert (d[0] in sd) == d[1] From f9e24905a720bf48e68a1a3bbd359956ca73d1d6 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 18 Sep 2021 21:14:44 +0200 Subject: [PATCH 097/138] Allow the split keyword arg to Config.get() to be a delimiter string --- archive/config.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/archive/config.py b/archive/config.py index d711316..41e1355 100644 --- a/archive/config.py +++ b/archive/config.py @@ -46,10 +46,14 @@ def get(self, key, required=False, subst=True, split=False, type=None): if subst: value = value % self if split: + if isinstance(split, str): + sep = split + else: + sep = None if type: - value = [type(v) for v in value.split()] + value = [type(v) for v in value.split(sep=sep)] else: - value = value.split() + value = value.split(sep=sep) else: if type: value = type(value) From 9a51c503e03fb005b047406685c7aecb4f6f78ea Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 19 Sep 2021 17:56:00 +0200 Subject: [PATCH 098/138] Add a class hierarchy implementing schedules. --- archive/bt/schedule.py | 87 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/archive/bt/schedule.py b/archive/bt/schedule.py index 265ba0d..e8bf5f8 100644 --- a/archive/bt/schedule.py +++ b/archive/bt/schedule.py @@ -8,6 +8,10 @@ from lark import Lark, Transformer +class NoFullBackupError(Exception): + pass + + class _DTMatcher: """datetime component matcher to be used in ScheduleDate. This is an abstract base class. @@ -174,3 +178,86 @@ def __contains__(self, dt): self.second.matches(dt.second)) else: return False + + +class BaseSchedule: + """Abstract base class for schedules. + """ + + SubClasses = dict() + ClsName = None + + def __init__(self, name, date, parent): + self.name = name + self.date = date + self.parent = parent + + def match_date(self, dt): + return dt in self.date + + def get_base_archives(self, archives): + raise NotImplementedError + + def get_child_base_archives(self, archives): + raise NotImplementedError + + @classmethod + def register_clsname(cls, subcls): + """A class decorator to register the name for a subclass. + """ + assert issubclass(subcls, cls) + assert subcls.ClsName and subcls.ClsName not in cls.SubClasses + cls.SubClasses[subcls.ClsName] = subcls + return subcls + +@BaseSchedule.register_clsname +class FullSchedule(BaseSchedule): + + ClsName = "full" + + def get_base_archives(self, archives): + return [] + + def get_child_base_archives(self, archives): + last_full = None + for i in archives: + if i.schedule == self.name: + last_full = i + if last_full: + return [last_full] + else: + raise NoFullBackupError + +@BaseSchedule.register_clsname +class CumuSchedule(BaseSchedule): + + ClsName = "cumu" + + def get_base_archives(self, archives): + return self.parent.get_child_base_archives(archives) + + def get_child_base_archives(self, archives): + base_archives = self.parent.get_child_base_archives(archives) + p_idx = archives.index(base_archives[-1]) + for i in archives[p_idx+1:]: + if i.schedule == self.name: + last_cumu = i + if last_cumu: + base_archives.append(last_cumu) + return base_archives + +@BaseSchedule.register_clsname +class IncrSchedule(BaseSchedule): + + ClsName = "incr" + + def get_base_archives(self, archives): + base_archives = self.parent.get_child_base_archives(archives) + p_idx = archives.index(base_archives[-1]) + for i in archives[p_idx+1:]: + if i.schedule == self.name: + base_archives.append(i) + return base_archives + + def get_child_base_archives(self, archives): + return self.get_base_archives(archives) From d952e26f115f5a027a9fef82ee99505e533a6d30 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 19 Sep 2021 18:40:25 +0200 Subject: [PATCH 099/138] Determine the schedule to use from date specifications in the config file rather than from a command line argument --- archive/bt/config.py | 7 +++-- archive/bt/create.py | 69 ++++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/archive/bt/config.py b/archive/bt/config.py index dbf8c67..07452f4 100644 --- a/archive/bt/config.py +++ b/archive/bt/config.py @@ -23,8 +23,9 @@ class Config(archive.config.Config): 'backupdir': None, 'targetdir': "%(backupdir)s", 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", + 'schedules': None, } - args_options = ('policy', 'user', 'schedule') + args_options = ('policy', 'user') def __init__(self, args): for o in self.args_options: @@ -63,8 +64,8 @@ def user(self): return self.get('user') @property - def schedule(self): - return self.get('schedule') + def schedules(self): + return self.get('schedules', required=True, split='/') @property def name(self): diff --git a/archive/bt/create.py b/archive/bt/create.py index 948584a..2e402aa 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -1,16 +1,17 @@ """Create a backup. """ +import datetime import logging import os from archive.archive import Archive from archive.exception import ArchiveCreateError from archive.index import ArchiveIndex from archive.manifest import Manifest, DiffStatus, diff_manifest +from archive.bt.schedule import ScheduleDate, BaseSchedule, NoFullBackupError log = logging.getLogger(__name__) -schedules = {'full', 'cumu', 'incr'} def get_prev_backups(config): idx_file = config.backupdir / ".index.yaml" @@ -25,7 +26,7 @@ def get_prev_backups(config): f_d = dict(host=config.host, policy=config.policy) if config.policy == 'user': f_d['user'] = config.user - return filter(lambda i: i >= f_d, idx) + return list(filter(lambda i: i >= f_d, idx)) def filter_fileinfos(base, fileinfos): for stat, fi1, fi2 in diff_manifest(base, fileinfos): @@ -33,50 +34,49 @@ def filter_fileinfos(base, fileinfos): continue yield fi2 -def get_fileinfos(config): - - last_full = None - last_cumu = None - last_incr = [] - for i in get_prev_backups(config): - if i.schedule == 'full': - last_full = i - last_cumu = None - last_incr = [] - elif i.schedule == 'cumu': - last_cumu = i - last_incr = [] - elif i.schedule == 'incr': - last_incr.append(i) +def get_schedule(config): + last_schedule = None + schedules = [] + for s in config.schedules: + sd_str = config.get('schedule.%s.date' % s, required=True) + cls = BaseSchedule.SubClasses[s] + last_schedule = cls(s, ScheduleDate(sd_str), last_schedule) + schedules.append(last_schedule) + now = datetime.datetime.now() + for s in schedules: + if s.match_date(now): + return s + else: + log.debug("no schedule date matches now") + return None +def get_fileinfos(config, schedule): fileinfos = Manifest(paths=config.dirs, excludes=config.excludes) - - if config.schedule != 'full': - if not last_full: - raise ArchiveCreateError("No previous full backup found, can not " - "create %s archive" % config.schedule) - base_archives = [last_full.path] - if config.schedule == 'incr': - if last_cumu: - base_archives.append(last_cumu.path) - base_archives.extend([i.path for i in last_incr]) - for p in base_archives: - log.debug("considering %s to create differential archive", p) - with Archive().open(p) as base: - fileinfos = filter_fileinfos(base.manifest, fileinfos) - + try: + base_archives = schedule.get_base_archives(get_prev_backups(config)) + except NoFullBackupError: + raise ArchiveCreateError("No previous full backup found, can not " + "create %s archive" % schedule.name) + for p in [i.path for i in base_archives]: + log.debug("considering %s to create differential archive", p) + with Archive().open(p) as base: + fileinfos = filter_fileinfos(base.manifest, fileinfos) return fileinfos def create(args, config): os.umask(0o277) - fileinfos = get_fileinfos(config) + schedule = get_schedule(config) + if schedule is None: + return 0 + config['schedule'] = schedule.name + fileinfos = get_fileinfos(config, schedule) log.debug("creating archive %s", config.path) tags = [ "host:%s" % config.host, "policy:%s" % config.policy, - "schedule:%s" % config.schedule, + "schedule:%s" % schedule.name, ] if config.user: tags.append("user:%s" % config.user) @@ -88,5 +88,4 @@ def add_parser(subparsers): clsgrp = parser.add_mutually_exclusive_group() clsgrp.add_argument('--policy', default='sys') clsgrp.add_argument('--user') - parser.add_argument('--schedule', choices=schedules, default='full') parser.set_defaults(func=create) From 4fbd5c332d666b709f62653af8c9272a89df6cdc Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 26 Sep 2021 23:17:48 +0200 Subject: [PATCH 100/138] Fix UnboundLocalError: local variable was not initialized --- archive/bt/schedule.py | 1 + 1 file changed, 1 insertion(+) diff --git a/archive/bt/schedule.py b/archive/bt/schedule.py index e8bf5f8..630d216 100644 --- a/archive/bt/schedule.py +++ b/archive/bt/schedule.py @@ -239,6 +239,7 @@ def get_base_archives(self, archives): def get_child_base_archives(self, archives): base_archives = self.parent.get_child_base_archives(archives) p_idx = archives.index(base_archives[-1]) + last_cumu = None for i in archives[p_idx+1:]: if i.schedule == self.name: last_cumu = i From 3088c99e48325d61cd21c4968979dae0af115372 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 2 Oct 2021 15:12:19 +0200 Subject: [PATCH 101/138] Update changelog --- CHANGES.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index dc4069f..ea8e138 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -20,6 +20,8 @@ New features + `#50`_, `#51`_: Add a header with some metadata to the index in a mail archive created by :class:`MailArchive`. ++ `#67`_: Add :mod:`archive.index` providing :class:`ArchiveIndex`. + Incompatible changes -------------------- @@ -70,6 +72,7 @@ Bug fixes and minor changes .. _#59: https://github.com/RKrahl/archive-tools/pull/59 .. _#60: https://github.com/RKrahl/archive-tools/pull/60 .. _#61: https://github.com/RKrahl/archive-tools/pull/61 +.. _#67: https://github.com/RKrahl/archive-tools/pull/67 0.5.1 (2020-12-12) From fb1fd46462903a8b1d156ce68591c8e4d95c173d Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 2 Oct 2021 15:31:50 +0200 Subject: [PATCH 102/138] Update changelog --- CHANGES.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9100714..07950f1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -62,6 +62,11 @@ Bug fixes and minor changes + `#48`_: Review and standardize some error messages. +Internal changes +---------------- + ++ `#68`_: Add :mod:`archive.config`. + .. _#48: https://github.com/RKrahl/archive-tools/pull/48 .. _#50: https://github.com/RKrahl/archive-tools/issues/50 .. _#51: https://github.com/RKrahl/archive-tools/pull/51 @@ -74,6 +79,7 @@ Bug fixes and minor changes .. _#59: https://github.com/RKrahl/archive-tools/pull/59 .. _#60: https://github.com/RKrahl/archive-tools/pull/60 .. _#61: https://github.com/RKrahl/archive-tools/pull/61 +.. _#68: https://github.com/RKrahl/archive-tools/pull/68 0.5.1 (2020-12-12) From 36ae718836e47fcab54bf99ed563b6335df327d0 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 3 Oct 2021 13:14:45 +0200 Subject: [PATCH 103/138] Change owner of the archive when making a user backup --- archive/bt/create.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/archive/bt/create.py b/archive/bt/create.py index 2e402aa..678c00a 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -4,6 +4,7 @@ import datetime import logging import os +import pwd from archive.archive import Archive from archive.exception import ArchiveCreateError from archive.index import ArchiveIndex @@ -63,6 +64,17 @@ def get_fileinfos(config, schedule): fileinfos = filter_fileinfos(base.manifest, fileinfos) return fileinfos +def chown(path, user): + try: + pw = pwd.getpwnam(user) + except KeyError: + log.warn("User %s not found in password database", user) + return + try: + os.chown(path, pw.pw_uid, pw.pw_gid) + except OSError as e: + log.error("chown %s: %s: %s", path, type(e).__name__, e) + def create(args, config): os.umask(0o277) schedule = get_schedule(config) @@ -80,7 +92,9 @@ def create(args, config): ] if config.user: tags.append("user:%s" % config.user) - Archive().create(config.path, fileinfos=fileinfos, tags=tags) + arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags) + if config.user: + chown(arch.path, config.user) return 0 def add_parser(subparsers): From 6ecd8104bfb1bea77d08055e27c4e8a4ebd78a9a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 3 Oct 2021 16:21:54 +0200 Subject: [PATCH 104/138] Add class DataFileBase --- tests/conftest.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5778fc5..f9fe097 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -149,6 +149,18 @@ def __init__(self, path, mode, *, mtime=None): def mode(self): return self._mode +class DataFileBase(DataFileOrDir): + + Checksums = _get_checksums() + + @property + def type(self): + return 'f' + + @property + def checksum(self): + return self._checksum or self.Checksums[self.path.name] + class DataDir(DataFileOrDir): @property @@ -160,42 +172,24 @@ def create(self, main_dir): path.mkdir(parents=True, exist_ok=True) _set_fs_attrs(path, self.mode, self.mtime) -class DataFile(DataFileOrDir): - - Checksums = _get_checksums() +class DataFile(DataFileBase): def __init__(self, path, mode, *, mtime=None, checksum=None): super().__init__(path, mode, mtime=mtime) self._checksum = checksum - @property - def type(self): - return 'f' - - @property - def checksum(self): - return self._checksum or self.Checksums[self.path.name] - def create(self, main_dir): path = main_dir / self.path path.parent.mkdir(parents=True, exist_ok=True) shutil.copy(gettestdata(self.path.name), path) _set_fs_attrs(path, self.mode, self.mtime) -class DataRandomFile(DataFileOrDir): +class DataRandomFile(DataFileBase): def __init__(self, path, mode, *, mtime=None, size=1024): super().__init__(path, mode, mtime=mtime) self._size = size - @property - def type(self): - return 'f' - - @property - def checksum(self): - return self._checksum - def create(self, main_dir): path = main_dir / self.path h = hashlib.new("sha256") From b73c070d9b9c75f05d558b48087f5bac8f39f805 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 4 Oct 2021 01:43:45 +0200 Subject: [PATCH 105/138] Start implementing a test for backup-tool --- tests/conftest.py | 19 ++-- tests/test_06_backup-tool.py | 213 +++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+), 7 deletions(-) create mode 100644 tests/test_06_backup-tool.py diff --git a/tests/conftest.py b/tests/conftest.py index f9fe097..a4ba952 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,7 +15,7 @@ __all__ = [ - 'DataDir', 'DataFile', 'DataRandomFile', 'DataSymLink', + 'DataDir', 'DataFile', 'DataContentFile', 'DataRandomFile', 'DataSymLink', 'absflag', 'archive_name', 'callscript', 'check_manifest', 'get_output', 'gettestdata', 'require_compression', 'setup_testdata', 'sub_testdata', @@ -184,23 +184,28 @@ def create(self, main_dir): shutil.copy(gettestdata(self.path.name), path) _set_fs_attrs(path, self.mode, self.mtime) -class DataRandomFile(DataFileBase): +class DataContentFile(DataFileBase): - def __init__(self, path, mode, *, mtime=None, size=1024): + def __init__(self, path, data, mode, *, mtime=None): super().__init__(path, mode, mtime=mtime) - self._size = size + self.data = data def create(self, main_dir): path = main_dir / self.path h = hashlib.new("sha256") - data = bytearray(getrandbits(8) for _ in range(self._size)) - h.update(data) + h.update(self.data) self._checksum = h.hexdigest() path.parent.mkdir(parents=True, exist_ok=True) with path.open("wb") as f: - f.write(data) + f.write(self.data) _set_fs_attrs(path, self.mode, self.mtime) +class DataRandomFile(DataContentFile): + + def __init__(self, path, mode, *, mtime=None, size=1024): + data = bytearray(getrandbits(8) for _ in range(size)) + super().__init__(path, data, mode, mtime=mtime) + class DataSymLink(DataItem): def __init__(self, path, target, *, mtime=None): diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py new file mode 100644 index 0000000..9a42213 --- /dev/null +++ b/tests/test_06_backup-tool.py @@ -0,0 +1,213 @@ +"""Test the backup-tool. +""" + +import datetime +import itertools +import os +from pathlib import Path +import pwd +import socket +import string +import sys +from archive import Archive +from archive.bt import backup_tool +import pytest +from conftest import * + + +def mock_getpwnam(name): + """Mock pwd.getpwnam() pretending there is a user 'jdoe'. + """ + if name == 'jdoe': + pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') + return pwd.struct_passwd(pwt) + else: + return pwd.getpwnam(name) + +def get_mock_constfunc(c): + """Return a function returning a constant value. + The returned function may be used to mock socket.gethostname(). + """ + def mock_func(): + return c + return mock_func + +_orig_dt_datetime = datetime.datetime +_orig_dt_date = datetime.date + +class FrozenDateTimeMeta(type): + def __instancecheck__(self, instance): + if type(instance) in {_orig_dt_datetime, FrozenDateTime}: + return True + +class FrozenDateTime(datetime.datetime): + __metaclass__ = FrozenDateTimeMeta + _frozen = datetime.datetime.now() + + @classmethod + def freeze(cls, dt): + cls._frozen = dt + + @classmethod + def now(cls, tz=None): + return cls._frozen + +class FrozenDateMeta(type): + def __instancecheck__(self, instance): + if type(instance) in {_orig_dt_date, FrozenDate}: + return True + +class FrozenDate(datetime.date): + __metaclass__ = FrozenDateMeta + + @classmethod + def today(cls): + return FrozenDateTime.now().date() + +cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[DEFAULT] +backupdir = $root/net/backup + +[serv] + +[desk] +targetdir = $root/var/backup + +[sys] +dirs = + $root/etc + $root/root +excludes = + $root/root/.cache +schedules = full/incr + +[desk/sys] +schedule.full.date = Sun *-*-1..7 +schedule.incr.date = Sun * + +[serv/sys] +dirs = + $root/etc + $root/root + $root/usr/local +excludes = + $root/root/.cache +schedule.full.date = Mon *-*-2..8 +schedule.incr.date = Mon * + +[user] +name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +dirs = $root/%(home)s +excludes = + $root/%(home)s/.cache + $root/%(home)s/.thumbnails + $root/%(home)s/tmp +schedules = full/cumu/incr +schedule.full.date = Mon *-*-2..8 +schedule.cumu.date = Mon * +schedule.incr.date = * +""" + +sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1633129414), + DataContentFile(Path("etc", "foo.cfg"), + b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), + DataDir(Path("root"), 0o700, mtime=1633274230), + DataRandomFile(Path("root", "rnd5.dat"), + 0o600, size=85, mtime=1633243020), + DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), + mtime=1633243020), +] + +sys_serv_data = [ + DataDir(Path("usr", "local"), 0o755, mtime=1616490893), + DataRandomFile(Path("usr", "local", "rnd6.dat"), + 0o644, size=607, mtime=1633275272), +] + +user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd3.dat"), + 0o600, size=796, mtime=1633243020), +] + +excl_data = [ + DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), + DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), + 0o600, size=385, mtime=1633275272), + DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), + DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), + DataRandomFile(Path("root", ".cache", "rnd4.dat"), + 0o600, size=665, mtime=1633275272), + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + DataDir(Path("var", "backup"), 0o755, mtime=1632704400), +] + +@pytest.fixture(scope="module") +def test_dir(tmpdir): + subst = dict(root=tmpdir) + cfg_data = string.Template(cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + cfg_file = DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683) + sys_data.append(cfg_file) + all_data = itertools.chain(sys_data, sys_serv_data, user_data, excl_data) + setup_testdata(tmpdir, all_data) + return tmpdir + +def test_backup(test_dir, monkeypatch): + cfg_path = test_dir / "etc" / "backup.cfg" + monkeypatch.setenv("BACKUP_CFG", str(cfg_path)) + monkeypatch.setattr(datetime, "datetime", FrozenDateTime) + monkeypatch.setattr(datetime, "date", FrozenDate) + monkeypatch.setattr(pwd, "getpwnam", mock_getpwnam) + + sys_desk_full = { d.path:d for d in sys_data } + sys_serv_full = { d.path:d for d in sys_data + sys_serv_data } + user_full = { d.path:d for d in user_data } + + monkeypatch.setattr(socket, "gethostname", get_mock_constfunc("desk")) + FrozenDateTime.freeze(datetime.datetime(2021, 10, 3, 19, 30)) + cmd = "backup-tool --verbose create --policy sys" + monkeypatch.setattr(sys, "argv", cmd.split()) + with pytest.raises(SystemExit) as excinfo: + backup_tool() + assert excinfo.value.code == 0 + path = test_dir / "var" / "backup" / "desk-211003-full.tar.bz2" + with Archive().open(path) as archive: + check_manifest(archive.manifest, sys_desk_full.values(), + prefix_dir=test_dir) + path.rename(test_dir / "net" / "backup" / "desk-211003-full.tar.bz2") + + monkeypatch.setattr(socket, "gethostname", get_mock_constfunc("serv")) + FrozenDateTime.freeze(datetime.datetime(2021, 10, 4, 3, 0)) + cmd = "backup-tool --verbose create --policy sys" + monkeypatch.setattr(sys, "argv", cmd.split()) + with pytest.raises(SystemExit) as excinfo: + backup_tool() + assert excinfo.value.code == 0 + path = test_dir / "net" / "backup" / "serv-211004-full.tar.bz2" + with Archive().open(path) as archive: + check_manifest(archive.manifest, sys_serv_full.values(), + prefix_dir=test_dir) + cmd = "backup-tool --verbose create --user jdoe" + monkeypatch.setattr(sys, "argv", cmd.split()) + with pytest.raises(SystemExit) as excinfo: + backup_tool() + assert excinfo.value.code == 0 + path = test_dir / "net" / "backup" / "jdoe-211004-full.tar.bz2" + with Archive().open(path) as archive: + check_manifest(archive.manifest, user_full.values(), + prefix_dir=test_dir) + cmd = "backup-tool --verbose index" + monkeypatch.setattr(sys, "argv", cmd.split()) + with pytest.raises(SystemExit) as excinfo: + backup_tool() + + sys_desk_cumu = {} + sys_serv_cumu = {} + user_cumu = {} + sys_desk_incr = {} + sys_serv_incr = {} + user_incr = {} From ab8e953bf6b3e27debd03775e026c662afa1512e Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 4 Oct 2021 21:59:29 +0200 Subject: [PATCH 106/138] Fix backup-tool: restore umask after creating archive --- archive/bt/create.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/archive/bt/create.py b/archive/bt/create.py index 678c00a..c5cff9f 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -9,6 +9,7 @@ from archive.exception import ArchiveCreateError from archive.index import ArchiveIndex from archive.manifest import Manifest, DiffStatus, diff_manifest +from archive.tools import tmp_umask from archive.bt.schedule import ScheduleDate, BaseSchedule, NoFullBackupError @@ -76,7 +77,6 @@ def chown(path, user): log.error("chown %s: %s: %s", path, type(e).__name__, e) def create(args, config): - os.umask(0o277) schedule = get_schedule(config) if schedule is None: return 0 @@ -92,9 +92,10 @@ def create(args, config): ] if config.user: tags.append("user:%s" % config.user) - arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags) - if config.user: - chown(arch.path, config.user) + with tmp_umask(0o277): + arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags) + if config.user: + chown(arch.path, config.user) return 0 def add_parser(subparsers): From 415a9ef0a91990c376ee2c7e7ca38eb7f73f3282 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 4 Oct 2021 22:07:14 +0200 Subject: [PATCH 107/138] Fix backup-tool: do not fail if there is nothing to archive --- archive/bt/create.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/archive/bt/create.py b/archive/bt/create.py index c5cff9f..9f0476f 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -1,6 +1,7 @@ """Create a backup. """ +from collections.abc import Sequence import datetime import logging import os @@ -82,6 +83,11 @@ def create(args, config): return 0 config['schedule'] = schedule.name fileinfos = get_fileinfos(config, schedule) + if not isinstance(fileinfos, Sequence): + fileinfos = list(fileinfos) + if not fileinfos: + log.debug("nothing to archive") + return 0 log.debug("creating archive %s", config.path) From 43e09bd3016e6d06e7dc9f5df0703a792309e4aa Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 10 Oct 2021 14:17:53 +0200 Subject: [PATCH 108/138] Move the classes used to mock system library class to conftest.py --- tests/conftest.py | 35 +++++++++++++++++++++ tests/test_06_backup-tool.py | 60 +++++------------------------------- 2 files changed, 43 insertions(+), 52 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a4ba952..db1e05b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ """pytest configuration. """ +import datetime import hashlib import os from pathlib import Path @@ -15,6 +16,7 @@ __all__ = [ + 'FrozenDateTime', 'FrozenDate', 'MockFunction', 'DataDir', 'DataFile', 'DataContentFile', 'DataRandomFile', 'DataSymLink', 'absflag', 'archive_name', 'callscript', 'check_manifest', 'get_output', 'gettestdata', 'require_compression', 'setup_testdata', @@ -55,6 +57,39 @@ def require_compression(compression): except ImportError: pytest.skip(msg % ("lzma", "xz")) +class FrozenDateTime(datetime.datetime): + _frozen = datetime.datetime.now() + + @classmethod + def freeze(cls, dt): + cls._frozen = dt + + @classmethod + def now(cls, tz=None): + return cls._frozen + +class FrozenDate(datetime.date): + + @classmethod + def today(cls): + return FrozenDateTime.now().date() + +class MockFunction: + """A function returning a preset value. + + May be used to mock library functions, such as pwd.getpwnam() or + socket.gethostname(). + """ + + def __init__(self, value=None): + self.set_return_value(value) + + def set_return_value(self, value): + self._value = value + + def __call__(self, *args): + return self._value + class TmpDir(object): """Provide a temporary directory. """ diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 9a42213..9c86850 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -15,55 +15,6 @@ from conftest import * -def mock_getpwnam(name): - """Mock pwd.getpwnam() pretending there is a user 'jdoe'. - """ - if name == 'jdoe': - pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') - return pwd.struct_passwd(pwt) - else: - return pwd.getpwnam(name) - -def get_mock_constfunc(c): - """Return a function returning a constant value. - The returned function may be used to mock socket.gethostname(). - """ - def mock_func(): - return c - return mock_func - -_orig_dt_datetime = datetime.datetime -_orig_dt_date = datetime.date - -class FrozenDateTimeMeta(type): - def __instancecheck__(self, instance): - if type(instance) in {_orig_dt_datetime, FrozenDateTime}: - return True - -class FrozenDateTime(datetime.datetime): - __metaclass__ = FrozenDateTimeMeta - _frozen = datetime.datetime.now() - - @classmethod - def freeze(cls, dt): - cls._frozen = dt - - @classmethod - def now(cls, tz=None): - return cls._frozen - -class FrozenDateMeta(type): - def __instancecheck__(self, instance): - if type(instance) in {_orig_dt_date, FrozenDate}: - return True - -class FrozenDate(datetime.date): - __metaclass__ = FrozenDateMeta - - @classmethod - def today(cls): - return FrozenDateTime.now().date() - cfg = """# Configuration file for backup-tool. # All paths are within a root directory that need to be substituted. @@ -158,16 +109,21 @@ def test_dir(tmpdir): def test_backup(test_dir, monkeypatch): cfg_path = test_dir / "etc" / "backup.cfg" + gethostname = MockFunction() + pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') + getpwnam = MockFunction(pwd.struct_passwd(pwt)) + monkeypatch.setenv("BACKUP_CFG", str(cfg_path)) monkeypatch.setattr(datetime, "datetime", FrozenDateTime) monkeypatch.setattr(datetime, "date", FrozenDate) - monkeypatch.setattr(pwd, "getpwnam", mock_getpwnam) + monkeypatch.setattr(socket, "gethostname", gethostname) + monkeypatch.setattr(pwd, "getpwnam", getpwnam) sys_desk_full = { d.path:d for d in sys_data } sys_serv_full = { d.path:d for d in sys_data + sys_serv_data } user_full = { d.path:d for d in user_data } - monkeypatch.setattr(socket, "gethostname", get_mock_constfunc("desk")) + gethostname.set_return_value("desk") FrozenDateTime.freeze(datetime.datetime(2021, 10, 3, 19, 30)) cmd = "backup-tool --verbose create --policy sys" monkeypatch.setattr(sys, "argv", cmd.split()) @@ -180,7 +136,7 @@ def test_backup(test_dir, monkeypatch): prefix_dir=test_dir) path.rename(test_dir / "net" / "backup" / "desk-211003-full.tar.bz2") - monkeypatch.setattr(socket, "gethostname", get_mock_constfunc("serv")) + gethostname.set_return_value("serv") FrozenDateTime.freeze(datetime.datetime(2021, 10, 4, 3, 0)) cmd = "backup-tool --verbose create --policy sys" monkeypatch.setattr(sys, "argv", cmd.split()) From bee93f6e59099abde2c5761f0b725c1b0f0b3bd9 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 10 Oct 2021 21:07:15 +0200 Subject: [PATCH 109/138] Add a helper class BTTestEnv and a fixture env to manage the environment to test backup-tool --- tests/test_06_backup-tool.py | 108 ++++++++++++++++++++--------------- 1 file changed, 61 insertions(+), 47 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 9c86850..5d2ec03 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -12,9 +12,50 @@ from archive import Archive from archive.bt import backup_tool import pytest +from _pytest.monkeypatch import MonkeyPatch from conftest import * +class BTTestEnv: + """Helper class to manage the environment to test backup-tool. + """ + + def __init__(self, root): + self.root = root + self.monkeypatch = MonkeyPatch() + self._datetime = FrozenDateTime + self._date = FrozenDate + self._gethostname = MockFunction() + pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') + self._getpwnam = MockFunction(pwd.struct_passwd(pwt)) + + def __enter__(self): + self.monkeypatch.setattr(datetime, "datetime", self._datetime) + self.monkeypatch.setattr(datetime, "date", self._date) + self.monkeypatch.setattr(socket, "gethostname", self._gethostname) + self.monkeypatch.setattr(pwd, "getpwnam", self._getpwnam) + return self + + def __exit__(self, type, value, tb): + self.monkeypatch.undo() + + def set_datetime(self, dt): + self._datetime.freeze(dt) + + def set_hostname(self, name): + self._gethostname.set_return_value(name) + + def run_backup_tool(self, argv): + self.monkeypatch.setattr(sys, "argv", argv.split()) + with pytest.raises(SystemExit) as excinfo: + backup_tool() + assert excinfo.value.code == 0 + +@pytest.fixture(scope="module") +def env(tmpdir): + with BTTestEnv(tmpdir) as e: + yield e + cfg = """# Configuration file for backup-tool. # All paths are within a root directory that need to be substituted. @@ -96,70 +137,43 @@ DataDir(Path("var", "backup"), 0o755, mtime=1632704400), ] -@pytest.fixture(scope="module") -def test_dir(tmpdir): - subst = dict(root=tmpdir) +def test_backup(env): + subst = dict(root=env.root) cfg_data = string.Template(cfg).substitute(subst).encode('ascii') cfg_path = Path("etc", "backup.cfg") cfg_file = DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683) sys_data.append(cfg_file) all_data = itertools.chain(sys_data, sys_serv_data, user_data, excl_data) - setup_testdata(tmpdir, all_data) - return tmpdir - -def test_backup(test_dir, monkeypatch): - cfg_path = test_dir / "etc" / "backup.cfg" - gethostname = MockFunction() - pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') - getpwnam = MockFunction(pwd.struct_passwd(pwt)) + setup_testdata(env.root, all_data) - monkeypatch.setenv("BACKUP_CFG", str(cfg_path)) - monkeypatch.setattr(datetime, "datetime", FrozenDateTime) - monkeypatch.setattr(datetime, "date", FrozenDate) - monkeypatch.setattr(socket, "gethostname", gethostname) - monkeypatch.setattr(pwd, "getpwnam", getpwnam) + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) sys_desk_full = { d.path:d for d in sys_data } sys_serv_full = { d.path:d for d in sys_data + sys_serv_data } user_full = { d.path:d for d in user_data } - gethostname.set_return_value("desk") - FrozenDateTime.freeze(datetime.datetime(2021, 10, 3, 19, 30)) - cmd = "backup-tool --verbose create --policy sys" - monkeypatch.setattr(sys, "argv", cmd.split()) - with pytest.raises(SystemExit) as excinfo: - backup_tool() - assert excinfo.value.code == 0 - path = test_dir / "var" / "backup" / "desk-211003-full.tar.bz2" + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 3, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + path = env.root / "var" / "backup" / "desk-211003-full.tar.bz2" with Archive().open(path) as archive: check_manifest(archive.manifest, sys_desk_full.values(), - prefix_dir=test_dir) - path.rename(test_dir / "net" / "backup" / "desk-211003-full.tar.bz2") - - gethostname.set_return_value("serv") - FrozenDateTime.freeze(datetime.datetime(2021, 10, 4, 3, 0)) - cmd = "backup-tool --verbose create --policy sys" - monkeypatch.setattr(sys, "argv", cmd.split()) - with pytest.raises(SystemExit) as excinfo: - backup_tool() - assert excinfo.value.code == 0 - path = test_dir / "net" / "backup" / "serv-211004-full.tar.bz2" + prefix_dir=env.root) + path.rename(env.root / "net" / "backup" / "desk-211003-full.tar.bz2") + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + path = env.root / "net" / "backup" / "serv-211004-full.tar.bz2" with Archive().open(path) as archive: check_manifest(archive.manifest, sys_serv_full.values(), - prefix_dir=test_dir) - cmd = "backup-tool --verbose create --user jdoe" - monkeypatch.setattr(sys, "argv", cmd.split()) - with pytest.raises(SystemExit) as excinfo: - backup_tool() - assert excinfo.value.code == 0 - path = test_dir / "net" / "backup" / "jdoe-211004-full.tar.bz2" + prefix_dir=env.root) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + path = env.root / "net" / "backup" / "jdoe-211004-full.tar.bz2" with Archive().open(path) as archive: check_manifest(archive.manifest, user_full.values(), - prefix_dir=test_dir) - cmd = "backup-tool --verbose index" - monkeypatch.setattr(sys, "argv", cmd.split()) - with pytest.raises(SystemExit) as excinfo: - backup_tool() + prefix_dir=env.root) + env.run_backup_tool("backup-tool --verbose index") sys_desk_cumu = {} sys_serv_cumu = {} From 0ee90cb2cfd10270ebe84553aae1d1c28e126481 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 10 Oct 2021 22:02:28 +0200 Subject: [PATCH 110/138] Move management of test data into class BTTestEnv --- tests/test_06_backup-tool.py | 128 +++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 59 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 5d2ec03..bd22171 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -28,6 +28,8 @@ def __init__(self, root): self._gethostname = MockFunction() pwt = ('jdoe', '*', 1000, 1000, 'John Doe', '/home/jdoe', '/bin/bash') self._getpwnam = MockFunction(pwd.struct_passwd(pwt)) + self.test_data = dict() + self.test_data_tags = dict() def __enter__(self): self.monkeypatch.setattr(datetime, "datetime", self._datetime) @@ -45,6 +47,32 @@ def set_datetime(self, dt): def set_hostname(self, name): self._gethostname.set_return_value(name) + def add_test_data(self, tags, items): + for i in items: + self.test_data[i.path] = i + for t in tags: + for s in ('full', 'cumu', 'incr'): + k = (t,s) + self.test_data_tags.setdefault(k, set()) + self.test_data_tags[k].add(i.path) + + def flush_test_data(self, tags, schedule): + if schedule == 'cumu': + schedules = ('cumu', 'incr') + else: + schedules = ('incr',) + for t in tags: + for s in schedules: + self.test_data_tags[t,s] = set() + + def setup_test_data(self): + setup_testdata(self.root, self.test_data.values()) + + def check_archive(self, path, tag, schedule): + items = [ self.test_data[p] for p in self.test_data_tags[tag,schedule] ] + with Archive().open(path) as archive: + check_manifest(archive.manifest, items, prefix_dir=self.root) + def run_backup_tool(self, argv): self.monkeypatch.setattr(sys, "argv", argv.split()) with pytest.raises(SystemExit) as excinfo: @@ -102,82 +130,64 @@ def env(tmpdir): schedule.incr.date = * """ -sys_data = [ - DataDir(Path("etc"), 0o755, mtime=1633129414), - DataContentFile(Path("etc", "foo.cfg"), - b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), - DataDir(Path("root"), 0o700, mtime=1633274230), - DataRandomFile(Path("root", "rnd5.dat"), - 0o600, size=85, mtime=1633243020), - DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), - mtime=1633243020), -] - -sys_serv_data = [ - DataDir(Path("usr", "local"), 0o755, mtime=1616490893), - DataRandomFile(Path("usr", "local", "rnd6.dat"), - 0o644, size=607, mtime=1633275272), -] - -user_data = [ - DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), - DataRandomFile(Path("home", "jdoe", "rnd3.dat"), - 0o600, size=796, mtime=1633243020), -] - -excl_data = [ - DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), - DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), - 0o600, size=385, mtime=1633275272), - DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), - DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), - DataRandomFile(Path("root", ".cache", "rnd4.dat"), - 0o600, size=665, mtime=1633275272), - DataDir(Path("net", "backup"), 0o755, mtime=1632704400), - DataDir(Path("var", "backup"), 0o755, mtime=1632704400), -] - def test_backup(env): subst = dict(root=env.root) cfg_data = string.Template(cfg).substitute(subst).encode('ascii') cfg_path = Path("etc", "backup.cfg") - cfg_file = DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683) - sys_data.append(cfg_file) - all_data = itertools.chain(sys_data, sys_serv_data, user_data, excl_data) - setup_testdata(env.root, all_data) - + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1633129414), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683), + DataContentFile(Path("etc", "foo.cfg"), + b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), + DataDir(Path("root"), 0o700, mtime=1633274230), + DataRandomFile(Path("root", "rnd5.dat"), + 0o600, size=85, mtime=1633243020), + DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), + mtime=1633243020), + ] + env.add_test_data(('desk','serv'), sys_data) + sys_serv_data = [ + DataDir(Path("usr", "local"), 0o755, mtime=1616490893), + DataRandomFile(Path("usr", "local", "rnd6.dat"), + 0o644, size=607, mtime=1633275272), + ] + env.add_test_data(('serv',), sys_serv_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd3.dat"), + 0o600, size=796, mtime=1633243020), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), + DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), + 0o600, size=385, mtime=1633275272), + DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), + DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), + DataRandomFile(Path("root", ".cache", "rnd4.dat"), + 0o600, size=665, mtime=1633275272), + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + DataDir(Path("var", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) - sys_desk_full = { d.path:d for d in sys_data } - sys_serv_full = { d.path:d for d in sys_data + sys_serv_data } - user_full = { d.path:d for d in user_data } - env.set_hostname("desk") env.set_datetime(datetime.datetime(2021, 10, 3, 19, 30)) env.run_backup_tool("backup-tool --verbose create --policy sys") path = env.root / "var" / "backup" / "desk-211003-full.tar.bz2" - with Archive().open(path) as archive: - check_manifest(archive.manifest, sys_desk_full.values(), - prefix_dir=env.root) + env.check_archive(path, 'desk', 'full') path.rename(env.root / "net" / "backup" / "desk-211003-full.tar.bz2") env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") path = env.root / "net" / "backup" / "serv-211004-full.tar.bz2" - with Archive().open(path) as archive: - check_manifest(archive.manifest, sys_serv_full.values(), - prefix_dir=env.root) + env.check_archive(path, 'serv', 'full') env.run_backup_tool("backup-tool --verbose create --user jdoe") path = env.root / "net" / "backup" / "jdoe-211004-full.tar.bz2" - with Archive().open(path) as archive: - check_manifest(archive.manifest, user_full.values(), - prefix_dir=env.root) + env.check_archive(path, 'user', 'full') env.run_backup_tool("backup-tool --verbose index") - sys_desk_cumu = {} - sys_serv_cumu = {} - user_cumu = {} - sys_desk_incr = {} - sys_serv_incr = {} - user_incr = {} + env.flush_test_data(('desk', 'serv', 'user'), 'cumu') From b6e7ef4fe8b94ba865b41097e7eaf7746ad8fd2e Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 10 Oct 2021 23:21:39 +0200 Subject: [PATCH 111/138] Move backup-tool into a test class --- tests/test_06_backup-tool.py | 136 ++++++++++++++++++----------------- 1 file changed, 71 insertions(+), 65 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index bd22171..fcd2e6a 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -22,6 +22,7 @@ class BTTestEnv: def __init__(self, root): self.root = root + self.root.mkdir() self.monkeypatch = MonkeyPatch() self._datetime = FrozenDateTime self._date = FrozenDate @@ -79,12 +80,14 @@ def run_backup_tool(self, argv): backup_tool() assert excinfo.value.code == 0 -@pytest.fixture(scope="module") -def env(tmpdir): - with BTTestEnv(tmpdir) as e: +@pytest.fixture(scope="class") +def env(tmpdir, request): + with BTTestEnv(tmpdir / request.cls.__name__) as e: yield e -cfg = """# Configuration file for backup-tool. +class TestBackupTool: + + cfg = """# Configuration file for backup-tool. # All paths are within a root directory that need to be substituted. [DEFAULT] @@ -130,64 +133,67 @@ def env(tmpdir): schedule.incr.date = * """ -def test_backup(env): - subst = dict(root=env.root) - cfg_data = string.Template(cfg).substitute(subst).encode('ascii') - cfg_path = Path("etc", "backup.cfg") - sys_data = [ - DataDir(Path("etc"), 0o755, mtime=1633129414), - DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683), - DataContentFile(Path("etc", "foo.cfg"), - b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), - DataDir(Path("root"), 0o700, mtime=1633274230), - DataRandomFile(Path("root", "rnd5.dat"), - 0o600, size=85, mtime=1633243020), - DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), - mtime=1633243020), - ] - env.add_test_data(('desk','serv'), sys_data) - sys_serv_data = [ - DataDir(Path("usr", "local"), 0o755, mtime=1616490893), - DataRandomFile(Path("usr", "local", "rnd6.dat"), - 0o644, size=607, mtime=1633275272), - ] - env.add_test_data(('serv',), sys_serv_data) - user_data = [ - DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), - DataRandomFile(Path("home", "jdoe", "rnd3.dat"), - 0o600, size=796, mtime=1633243020), - ] - env.add_test_data(('user',), user_data) - excl_data = [ - DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), - DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), - 0o600, size=385, mtime=1633275272), - DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), - DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), - DataRandomFile(Path("root", ".cache", "rnd4.dat"), - 0o600, size=665, mtime=1633275272), - DataDir(Path("net", "backup"), 0o755, mtime=1632704400), - DataDir(Path("var", "backup"), 0o755, mtime=1632704400), - ] - env.add_test_data(('excl',), excl_data) - env.setup_test_data() - env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) - - env.set_hostname("desk") - env.set_datetime(datetime.datetime(2021, 10, 3, 19, 30)) - env.run_backup_tool("backup-tool --verbose create --policy sys") - path = env.root / "var" / "backup" / "desk-211003-full.tar.bz2" - env.check_archive(path, 'desk', 'full') - path.rename(env.root / "net" / "backup" / "desk-211003-full.tar.bz2") - - env.set_hostname("serv") - env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) - env.run_backup_tool("backup-tool --verbose create --policy sys") - path = env.root / "net" / "backup" / "serv-211004-full.tar.bz2" - env.check_archive(path, 'serv', 'full') - env.run_backup_tool("backup-tool --verbose create --user jdoe") - path = env.root / "net" / "backup" / "jdoe-211004-full.tar.bz2" - env.check_archive(path, 'user', 'full') - env.run_backup_tool("backup-tool --verbose index") - - env.flush_test_data(('desk', 'serv', 'user'), 'cumu') + def init_data(self, env): + subst = dict(root=env.root) + cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1633129414), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683), + DataContentFile(Path("etc", "foo.cfg"), + b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), + DataDir(Path("root"), 0o700, mtime=1633274230), + DataRandomFile(Path("root", "rnd5.dat"), + 0o600, size=85, mtime=1633243020), + DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), + mtime=1633243020), + ] + env.add_test_data(('desk','serv'), sys_data) + sys_serv_data = [ + DataDir(Path("usr", "local"), 0o755, mtime=1616490893), + DataRandomFile(Path("usr", "local", "rnd6.dat"), + 0o644, size=607, mtime=1633275272), + ] + env.add_test_data(('serv',), sys_serv_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd3.dat"), + 0o600, size=796, mtime=1633243020), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), + DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), + 0o600, size=385, mtime=1633275272), + DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), + DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), + DataRandomFile(Path("root", ".cache", "rnd4.dat"), + 0o600, size=665, mtime=1633275272), + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + DataDir(Path("var", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + def test_initial_full_backup(self, env): + self.init_data(env) + + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 3, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + path = env.root / "var" / "backup" / "desk-211003-full.tar.bz2" + env.check_archive(path, 'desk', 'full') + path.rename(env.root / "net" / "backup" / "desk-211003-full.tar.bz2") + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + path = env.root / "net" / "backup" / "serv-211004-full.tar.bz2" + env.check_archive(path, 'serv', 'full') + env.run_backup_tool("backup-tool --verbose create --user jdoe") + path = env.root / "net" / "backup" / "jdoe-211004-full.tar.bz2" + env.check_archive(path, 'user', 'full') + env.run_backup_tool("backup-tool --verbose index") + + env.flush_test_data(('desk', 'serv', 'user'), 'cumu') From bd788c1349202c5481bfbc04bb21034ed9c664a2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 16 Oct 2021 21:12:26 +0200 Subject: [PATCH 112/138] Tests: creating a DataItem also touches the parent directory --- tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/conftest.py b/tests/conftest.py index db1e05b..d146036 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -152,6 +152,7 @@ def _set_fs_attrs(path, mode, mtime): path.chmod(mode) if mtime is not None: os.utime(path, (mtime, mtime), follow_symlinks=False) + os.utime(path.parent, (mtime, mtime), follow_symlinks=False) class DataItem: From 28f1163b9ce723bd41d87092e09746195f3b35a4 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 17 Oct 2021 13:58:31 +0200 Subject: [PATCH 113/138] Add checking of the archive index --- tests/test_06_backup-tool.py | 60 ++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 9 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index fcd2e6a..74dadb3 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -10,6 +10,7 @@ import string import sys from archive import Archive +from archive.index import IndexItem, ArchiveIndex from archive.bt import backup_tool import pytest from _pytest.monkeypatch import MonkeyPatch @@ -31,6 +32,13 @@ def __init__(self, root): self._getpwnam = MockFunction(pwd.struct_passwd(pwt)) self.test_data = dict() self.test_data_tags = dict() + self.index = ArchiveIndex() + self.backupdir = None + self.tmptarget = None + + def config(self, backupdir, tmptarget): + self.backupdir = self.root / backupdir + self.tmptarget = self.root / tmptarget def __enter__(self): self.monkeypatch.setattr(datetime, "datetime", self._datetime) @@ -69,11 +77,40 @@ def flush_test_data(self, tags, schedule): def setup_test_data(self): setup_testdata(self.root, self.test_data.values()) - def check_archive(self, path, tag, schedule): + def move_archive(self, name): + (self.tmptarget / name).rename(self.backupdir / name) + + def check_archive(self, name, tag, schedule): + path = self.backupdir / name items = [ self.test_data[p] for p in self.test_data_tags[tag,schedule] ] with Archive().open(path) as archive: check_manifest(archive.manifest, items, prefix_dir=self.root) + def check_index(self): + idx_file = self.backupdir / ".index.yaml" + backupdir_content = { idx_file } + with idx_file.open("rb") as f: + idx = ArchiveIndex(f) + assert len(idx) == len(self.index) + for i1, i0 in zip(idx, self.index): + assert i1.as_dict() == i0.as_dict() + backupdir_content.add(i0.path) + assert set(self.backupdir.iterdir()) == backupdir_content + assert set(self.tmptarget.iterdir()) == set() + + def add_index(self, name, host, schedule, policy=None, user=None): + if user: + policy = 'user' + idx_data = { + 'date': datetime.datetime.now().isoformat(sep=' '), + 'path': self.backupdir / name, + 'host': host, + 'policy': policy, + 'user': user, + 'schedule': schedule, + } + self.index.append(IndexItem(idx_data)) + def run_backup_tool(self, argv): self.monkeypatch.setattr(sys, "argv", argv.split()) with pytest.raises(SystemExit) as excinfo: @@ -134,6 +171,7 @@ class TestBackupTool: """ def init_data(self, env): + env.config("net/backup", "var/backup") subst = dict(root=env.root) cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') cfg_path = Path("etc", "backup.cfg") @@ -182,18 +220,22 @@ def test_initial_full_backup(self, env): env.set_hostname("desk") env.set_datetime(datetime.datetime(2021, 10, 3, 19, 30)) env.run_backup_tool("backup-tool --verbose create --policy sys") - path = env.root / "var" / "backup" / "desk-211003-full.tar.bz2" - env.check_archive(path, 'desk', 'full') - path.rename(env.root / "net" / "backup" / "desk-211003-full.tar.bz2") + archive_name = "desk-211003-full.tar.bz2" + env.move_archive(archive_name) + env.check_archive(archive_name, 'desk', 'full') + env.add_index(archive_name, 'desk', 'full', policy='sys') env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") - path = env.root / "net" / "backup" / "serv-211004-full.tar.bz2" - env.check_archive(path, 'serv', 'full') + archive_name = "serv-211004-full.tar.bz2" + env.check_archive(archive_name, 'serv', 'full') + env.add_index(archive_name, 'serv', 'full', policy='sys') env.run_backup_tool("backup-tool --verbose create --user jdoe") - path = env.root / "net" / "backup" / "jdoe-211004-full.tar.bz2" - env.check_archive(path, 'user', 'full') - env.run_backup_tool("backup-tool --verbose index") + archive_name = "jdoe-211004-full.tar.bz2" + env.check_archive(archive_name, 'user', 'full') + env.add_index(archive_name, 'serv', 'full', user='jdoe') + env.run_backup_tool("backup-tool --verbose index") + env.check_index() env.flush_test_data(('desk', 'serv', 'user'), 'cumu') From 333c7dc9a2710bc2c1495b2b6b12240f621f6ed2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 14 Nov 2021 21:53:03 +0100 Subject: [PATCH 114/138] Add a plan for the backup-tool tests --- tests/test_06_backup-tool.py | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 74dadb3..5ed2fd4 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -123,6 +123,78 @@ def env(tmpdir, request): yield e class TestBackupTool: + """Test scenario: consider a directory having the following structure:: + + testdir + +-- etc + +-- home + | +-- jdoe + +-- net + | +-- backup + +-- root + +-- usr + | +-- local + +-- var + +-- backup + + Backups are created at different points in time and different + policies, see the cfg file for details: + + + host=desk, policy=sys + schedule: monthly full, weekly incr + + + host=serv, policy=sys + schedule: monthly full, weekly incr + + + host=serv, policy=user, user=jdoe + schedule: monthly full, weekly cumu, daily incr + + Tests: + + + test_initial_full_backup: full backup of initial test data. + 2021-10-03: host=desk, policy=sys, schedule=full + 2021-10-04: host=serv, policy=sys, schedule=full + 2021-10-04: host=serv, policy=user, user=jdoe, schedule=full + + + test_simple_incr_backup: add a few files, both in sys and in + user directories. According to schedule, only incremental user + backup will be made. + 2021-10-06: host=serv, policy=user, user=jdoe, schedule=incr + + + test_noop_incr_backup: add only files in directories that being + excluded. Since there is nothing to backup, no backup should be + created at all. + 2021-10-08: - + + + test_simple_cumu_backup: add some more files, both in sys and in + user directories. According to schedule, a cumulative backup + for user and incremental backups for sys are made. + 2021-10-10: host=desk, policy=sys, schedule=incr + 2021-10-11: host=serv, policy=sys, schedule=incr + 2021-10-11: host=serv, policy=user, user=jdoe, schedule=cumu + + + test_incr_backup: add another files in a user directory. + 2021-10-13: host=serv, policy=user, user=jdoe, schedule=incr + + + test_del_incr_backup: delete the file created for the last test + again. Only the parent directory will be added to the + incremental backup for it has a changed file modification time, + but not its content. + 2021-10-15: host=serv, policy=user, user=jdoe, schedule=incr + + + test_cumu_backup: nothing has changed in sys directories, no + backups will be created for sys. The cumulative backup for user + will essentially have the same content as the last one. + 2021-10-17: - + 2021-10-18: - + 2021-10-18: host=serv, policy=user, user=jdoe, schedule=cumu + + + test_full_backup: the next regular full backup. + 2021-11-07: host=desk, policy=sys, schedule=full + 2021-11-08: host=serv, policy=sys, schedule=full + 2021-11-08: host=serv, policy=user, user=jdoe, schedule=full + + """ cfg = """# Configuration file for backup-tool. # All paths are within a root directory that need to be substituted. From 2e69289e214b1fd9a2c07ad1a804df7a5f69b9eb Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 21 Nov 2021 17:35:03 +0100 Subject: [PATCH 115/138] Implement the backup-tool tests --- tests/conftest.py | 6 + tests/test_06_backup-tool.py | 224 +++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index d146036..d91dc26 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -175,6 +175,12 @@ def st_mode(self): def create(self, main_dir): raise NotImplementedError + def unlink(self, main_dir, mtime): + path = main_dir / self.path + path.unlink() + if mtime: + os.utime(path.parent, (mtime, mtime), follow_symlinks=False) + class DataFileOrDir(DataItem): def __init__(self, path, mode, *, mtime=None): diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 5ed2fd4..9a209b1 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -65,6 +65,15 @@ def add_test_data(self, tags, items): self.test_data_tags.setdefault(k, set()) self.test_data_tags[k].add(i.path) + def remove_test_data(self, tags, items): + for i in items: + del self.test_data[i.path] + for t in tags: + for s in ('full', 'cumu', 'incr'): + k = (t,s) + self.test_data_tags.setdefault(k, set()) + self.test_data_tags[k].discard(i.path) + def flush_test_data(self, tags, schedule): if schedule == 'cumu': schedules = ('cumu', 'incr') @@ -286,7 +295,10 @@ def init_data(self, env): env.setup_test_data() env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + @pytest.mark.dependency() def test_initial_full_backup(self, env): + """Full backup of initial test data. + """ self.init_data(env) env.set_hostname("desk") @@ -311,3 +323,215 @@ def test_initial_full_backup(self, env): env.run_backup_tool("backup-tool --verbose index") env.check_index() env.flush_test_data(('desk', 'serv', 'user'), 'cumu') + + @pytest.mark.dependency(depends=["test_initial_full_backup"], scope='class') + def test_simple_incr_backup(self, env): + """Add a few files, both in sys and in user directories. + According to schedule, only incremental user backup will be + made. + """ + mtime = 1633451717 + u_path = Path("home", "jdoe", "misc") + u_dir = DataDir(u_path, 0o755, mtime=mtime) + u_file = DataRandomFile(u_path / "rnd7.dat", + 0o644, size=473, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + mtime = 1633464305 + s_path = Path("root", "rnd8.dat") + s_file = DataRandomFile(s_path, 0o600, size=42, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_dir, u_file]) + env.add_test_data(('desk','serv'), [s_parent, s_file]) + setup_testdata(env.root, [u_dir, u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211006-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_simple_incr_backup"], scope='class') + def test_noop_incr_backup(self, env): + """Add only files in directories that being excluded. + Since there is nothing to backup, no backup should be created at all. + """ + mtime = 1633487220 + s_path = Path("root", ".cache", "rnd10.dat") + s_file = DataRandomFile(s_path, 0o600, size=27, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + mtime = 1633500600 + u_path = Path("home", "jdoe", "tmp", "rnd9.dat") + u_file = DataRandomFile(u_path, 0o640, size=582, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('excl',), [s_parent, s_file, u_parent, u_file]) + setup_testdata(env.root, [s_file, u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 8, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.run_backup_tool("backup-tool --verbose create --user jdoe") + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_noop_incr_backup"], scope='class') + def test_simple_cumu_backup(self, env): + """Add some more files, both in sys and in user directories. + According to schedule, a cumulative backup for user and + incremental backups for sys are made. + """ + mtime = 1633837020 + s0_path = Path("usr", "local", "rnd11.dat") + s0_file = DataRandomFile(s0_path, 0o644, size=528, mtime=mtime) + s0_parent = env.test_data[s0_path.parent] + s0_parent.mtime = mtime + mtime = 1633843260 + s1_path = Path("root", "rnd12.dat") + s1_file = DataRandomFile(s1_path, 0o600, size=17, mtime=mtime) + s1_parent = env.test_data[s1_path.parent] + s1_parent.mtime = mtime + mtime = 1633876920 + u_path = Path("home", "jdoe", "misc", "rnd13.dat") + u_file = DataRandomFile(u_path, 0o644, size=378, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('serv',), [s0_parent, s0_file]) + env.add_test_data(('desk','serv'), [s1_parent, s1_file]) + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [s0_file, s1_file, u_file]) + + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 10, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "desk-211010-incr.tar.bz2" + env.move_archive(archive_name) + env.check_archive(archive_name, 'desk', 'incr') + env.add_index(archive_name, 'desk', 'incr', policy='sys') + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211011-incr.tar.bz2" + env.check_archive(archive_name, 'serv', 'incr') + env.add_index(archive_name, 'serv', 'incr', policy='sys') + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211011-cumu.tar.bz2" + env.check_archive(archive_name, 'user', 'cumu') + env.add_index(archive_name, 'serv', 'cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'incr') + + @pytest.mark.dependency(depends=["test_simple_cumu_backup"], scope='class') + def test_incr_backup(self, env): + """Add another files in a user directory. + """ + mtime = 1634067525 + u_path = Path("home", "jdoe", "misc", "rnd14.dat") + u_file = DataRandomFile(u_path, 0o644, size=146, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211013-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_incr_backup"], scope='class') + def test_del_incr_backup(self, env): + """Delete the file created for the last test again. + Only the parent directory will be added to the incremental + backup for it has a changed file modification time, but not + its content. + """ + mtime = 1634240325 + u_path = Path("home", "jdoe", "misc", "rnd14.dat") + u_file = env.test_data[u_path] + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.remove_test_data(('user',), [u_file]) + env.add_test_data(('user',), [u_parent]) + u_file.unlink(env.root, mtime) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 15, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211015-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_del_incr_backup"], scope='class') + def test_cumu_backup(self, env): + """Do the next weekly backup. + Nothing has changed in sys directories, no backups will be + created for sys. The cumulative backup for user will + essentially have the same content as the last one. + """ + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 10, 17, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211018-cumu.tar.bz2" + env.check_archive(archive_name, 'user', 'cumu') + env.add_index(archive_name, 'serv', 'cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'incr') + + @pytest.mark.dependency(depends=["test_cumu_backup"], scope='class') + def test_full_backup(self, env): + """Do the next monthly backup. + """ + env.set_hostname("desk") + env.set_datetime(datetime.datetime(2021, 11, 7, 19, 30)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "desk-211107-full.tar.bz2" + env.move_archive(archive_name) + env.check_archive(archive_name, 'desk', 'full') + env.add_index(archive_name, 'desk', 'full', policy='sys') + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211108-full.tar.bz2" + env.check_archive(archive_name, 'serv', 'full') + env.add_index(archive_name, 'serv', 'full', policy='sys') + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211108-full.tar.bz2" + env.check_archive(archive_name, 'user', 'full') + env.add_index(archive_name, 'serv', 'full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('desk', 'serv', 'user'), 'cumu') From d02c7e107c4261971f86c118bb099c96e5419b89 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 21 Nov 2021 18:35:54 +0100 Subject: [PATCH 116/138] Fix backup-tool tests: make sure all archives are created with different timestamps --- tests/test_06_backup-tool.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 9a209b1..57701e1 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -315,6 +315,7 @@ def test_initial_full_backup(self, env): archive_name = "serv-211004-full.tar.bz2" env.check_archive(archive_name, 'serv', 'full') env.add_index(archive_name, 'serv', 'full', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211004-full.tar.bz2" env.check_archive(archive_name, 'user', 'full') @@ -349,6 +350,7 @@ def test_simple_incr_backup(self, env): env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 6, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211006-incr.tar.bz2" env.check_archive(archive_name, 'user', 'incr') @@ -379,6 +381,7 @@ def test_noop_incr_backup(self, env): env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 8, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 8, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") env.run_backup_tool("backup-tool --verbose index") @@ -425,6 +428,7 @@ def test_simple_cumu_backup(self, env): archive_name = "serv-211011-incr.tar.bz2" env.check_archive(archive_name, 'serv', 'incr') env.add_index(archive_name, 'serv', 'incr', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211011-cumu.tar.bz2" env.check_archive(archive_name, 'user', 'cumu') @@ -449,6 +453,7 @@ def test_incr_backup(self, env): env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 13, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211013-incr.tar.bz2" env.check_archive(archive_name, 'user', 'incr') @@ -477,6 +482,7 @@ def test_del_incr_backup(self, env): env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 15, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 15, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211015-incr.tar.bz2" env.check_archive(archive_name, 'user', 'incr') @@ -500,6 +506,7 @@ def test_cumu_backup(self, env): env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 18, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211018-cumu.tar.bz2" env.check_archive(archive_name, 'user', 'cumu') @@ -527,6 +534,7 @@ def test_full_backup(self, env): archive_name = "serv-211108-full.tar.bz2" env.check_archive(archive_name, 'serv', 'full') env.add_index(archive_name, 'serv', 'full', policy='sys') + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211108-full.tar.bz2" env.check_archive(archive_name, 'user', 'full') From 0e613e3cad5371850b4410cf72f5b9f8f5f2848f Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 21 Nov 2021 22:19:50 +0100 Subject: [PATCH 117/138] Explicitely set maybe_placeholders=False in Lark() --- archive/bt/schedule.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/archive/bt/schedule.py b/archive/bt/schedule.py index 630d216..1cbb442 100644 --- a/archive/bt/schedule.py +++ b/archive/bt/schedule.py @@ -159,7 +159,9 @@ def sd(self, l): r.extend((_DTMatcherAny(), _DTMatcherAny(), _DTMatcherAny())) return r -_sd_parser = Lark(_sd_grammar, start='sd', parser='lalr', transformer=_SDTf()) +_sd_parser = Lark(_sd_grammar, + start='sd', parser='lalr', transformer=_SDTf(), + maybe_placeholders=False) class ScheduleDate(_dt_tuple): From d03a4dcbfae0d1062317029c34c911a405f5bd5f Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 22 Nov 2021 22:02:03 +0100 Subject: [PATCH 118/138] Add two more tests for backup-tool --- tests/conftest.py | 4 +++ tests/data/.sha256 | 1 + tests/data/rnd2bis.dat | Bin 0 -> 487 bytes tests/test_06_backup-tool.py | 67 +++++++++++++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 tests/data/rnd2bis.dat diff --git a/tests/conftest.py b/tests/conftest.py index d91dc26..3ce8bdf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -191,6 +191,10 @@ def __init__(self, path, mode, *, mtime=None): def mode(self): return self._mode + @mode.setter + def mode(self, mode): + self._mode = mode + class DataFileBase(DataFileOrDir): Checksums = _get_checksums() diff --git a/tests/data/.sha256 b/tests/data/.sha256 index 1a1d202..ba12ce9 100644 --- a/tests/data/.sha256 +++ b/tests/data/.sha256 @@ -1,3 +1,4 @@ b22b009134622b6508d756f1062455d71a7026594eacb0badf81f4f677929ebe msg.txt 21bad91c29230c3b1da568d4f2ccc77f6d79c0ea91ac6a40d37b2b15a2932bea rnd.dat 2d65300e0b6b56d4e50812a962b4a01db8d3a6ac96396a2d92fe59a13b286ee8 rnd2.dat +9de77792007068fa67fa063180ae970c1b7d93b80a8848a7524e4a500effafc0 rnd2bis.dat diff --git a/tests/data/rnd2bis.dat b/tests/data/rnd2bis.dat new file mode 100644 index 0000000000000000000000000000000000000000..caa901591826bf4f2421d29e247191d4d81563f7 GIT binary patch literal 487 zcmVBjkSknIyRHx@E6vv%Q`V9$pka zqL74c_S?+_XZUda8hp3rkP^eNql@_icK7G9igh)+v=Ar5C`F%iUQv(i@T>YZumwt? zwp^An9nhSePWmcL&WJBC6f`Z4b`7CluErE-Llnj;on-U2yvt8VtCS48 zQlJy9*bR$!wY88^7c> d_p49ODEzML7}R0Vrxj!~K_yRzxYvn-lgT+!^P&I% literal 0 HcmV?d00001 diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 57701e1..7e9530c 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -173,7 +173,15 @@ class TestBackupTool: + test_noop_incr_backup: add only files in directories that being excluded. Since there is nothing to backup, no backup should be created at all. - 2021-10-08: - + 2021-10-07: - + + + test_content_incr_backup: modify a file's content, but make sure + all filesystem metadata remain unchanged. + 2021-10-08: host=serv, policy=user, user=jdoe, schedule=incr + + + test_meta_incr_backup: modify a file's metadata, but keep the + content unchanged. + 2021-10-09: host=serv, policy=user, user=jdoe, schedule=incr + test_simple_cumu_backup: add some more files, both in sys and in user directories. According to schedule, a cumulative backup @@ -276,6 +284,9 @@ def init_data(self, env): env.add_test_data(('serv',), sys_serv_data) user_data = [ DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd.dat"), + 0o600, size=7964, mtime=1626052455), + DataFile(Path("home", "jdoe", "rnd2.dat"), 0o640, mtime=1633050855), DataRandomFile(Path("home", "jdoe", "rnd3.dat"), 0o600, size=796, mtime=1633243020), ] @@ -378,17 +389,69 @@ def test_noop_incr_backup(self, env): env.add_test_data(('excl',), [s_parent, s_file, u_parent, u_file]) setup_testdata(env.root, [s_file, u_file]) + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_noop_incr_backup"], scope='class') + def test_content_incr_backup(self, env): + """Modify a file's content, but make sure all filesystem metadata + remain unchanged. + """ + u_path = Path("home", "jdoe", "rnd2.dat") + u_orig_file = env.test_data[u_path] + with gettestdata("rnd2bis.dat").open("rb") as f: + u_file = DataContentFile(u_path, f.read(), + mode=u_orig_file.mode, + mtime=u_orig_file.mtime) + u_parent = env.test_data[u_path.parent] + env.add_test_data(('user',), [u_file]) + setup_testdata(env.root, [u_parent, u_file]) + env.set_hostname("serv") env.set_datetime(datetime.datetime(2021, 10, 8, 3, 0)) env.run_backup_tool("backup-tool --verbose create --policy sys") env.set_datetime(datetime.datetime(2021, 10, 8, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211008-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_noop_incr_backup"], scope='class') + @pytest.mark.dependency(depends=["test_content_incr_backup"], scope='class') + def test_meta_incr_backup(self, env): + """Modify a file's metadata, but keep the content unchanged. + """ + u_path = Path("home", "jdoe", "rnd3.dat") + u_file = env.test_data[u_path] + u_parent = env.test_data[u_path.parent] + u_file.mode = 0o644 + env.add_test_data(('user',), [u_file]) + (env.root / u_path).chmod(u_file.mode) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 9, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 9, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211009-incr.tar.bz2" + env.check_archive(archive_name, 'user', 'incr') + env.add_index(archive_name, 'serv', 'incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'incr') + + @pytest.mark.dependency(depends=["test_meta_incr_backup"], scope='class') def test_simple_cumu_backup(self, env): """Add some more files, both in sys and in user directories. According to schedule, a cumulative backup for user and From 5379d655a9af7300cf53caf0cb476b16370cf00d Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 23 Nov 2021 22:43:20 +0100 Subject: [PATCH 119/138] Simplify test names, dropping needless suffix --- tests/test_06_backup-tool.py | 58 ++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 7e9530c..2176a56 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -160,53 +160,53 @@ class TestBackupTool: Tests: - + test_initial_full_backup: full backup of initial test data. + + test_initial_full: full backup of initial test data. 2021-10-03: host=desk, policy=sys, schedule=full 2021-10-04: host=serv, policy=sys, schedule=full 2021-10-04: host=serv, policy=user, user=jdoe, schedule=full - + test_simple_incr_backup: add a few files, both in sys and in + + test_simple_incr: add a few files, both in sys and in user directories. According to schedule, only incremental user backup will be made. 2021-10-06: host=serv, policy=user, user=jdoe, schedule=incr - + test_noop_incr_backup: add only files in directories that being + + test_noop_incr: add only files in directories that being excluded. Since there is nothing to backup, no backup should be created at all. 2021-10-07: - - + test_content_incr_backup: modify a file's content, but make sure + + test_content_incr: modify a file's content, but make sure all filesystem metadata remain unchanged. 2021-10-08: host=serv, policy=user, user=jdoe, schedule=incr - + test_meta_incr_backup: modify a file's metadata, but keep the + + test_meta_incr: modify a file's metadata, but keep the content unchanged. 2021-10-09: host=serv, policy=user, user=jdoe, schedule=incr - + test_simple_cumu_backup: add some more files, both in sys and in + + test_simple_cumu: add some more files, both in sys and in user directories. According to schedule, a cumulative backup for user and incremental backups for sys are made. 2021-10-10: host=desk, policy=sys, schedule=incr 2021-10-11: host=serv, policy=sys, schedule=incr 2021-10-11: host=serv, policy=user, user=jdoe, schedule=cumu - + test_incr_backup: add another files in a user directory. + + test_incr: add another files in a user directory. 2021-10-13: host=serv, policy=user, user=jdoe, schedule=incr - + test_del_incr_backup: delete the file created for the last test + + test_del_incr: delete the file created for the last test again. Only the parent directory will be added to the incremental backup for it has a changed file modification time, but not its content. 2021-10-15: host=serv, policy=user, user=jdoe, schedule=incr - + test_cumu_backup: nothing has changed in sys directories, no + + test_cumu: nothing has changed in sys directories, no backups will be created for sys. The cumulative backup for user will essentially have the same content as the last one. 2021-10-17: - 2021-10-18: - 2021-10-18: host=serv, policy=user, user=jdoe, schedule=cumu - + test_full_backup: the next regular full backup. + + test_full: the next regular full backup. 2021-11-07: host=desk, policy=sys, schedule=full 2021-11-08: host=serv, policy=sys, schedule=full 2021-11-08: host=serv, policy=user, user=jdoe, schedule=full @@ -307,7 +307,7 @@ def init_data(self, env): env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) @pytest.mark.dependency() - def test_initial_full_backup(self, env): + def test_initial_full(self, env): """Full backup of initial test data. """ self.init_data(env) @@ -336,8 +336,8 @@ def test_initial_full_backup(self, env): env.check_index() env.flush_test_data(('desk', 'serv', 'user'), 'cumu') - @pytest.mark.dependency(depends=["test_initial_full_backup"], scope='class') - def test_simple_incr_backup(self, env): + @pytest.mark.dependency(depends=["test_initial_full"], scope='class') + def test_simple_incr(self, env): """Add a few files, both in sys and in user directories. According to schedule, only incremental user backup will be made. @@ -371,8 +371,8 @@ def test_simple_incr_backup(self, env): env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_simple_incr_backup"], scope='class') - def test_noop_incr_backup(self, env): + @pytest.mark.dependency(depends=["test_simple_incr"], scope='class') + def test_noop_incr(self, env): """Add only files in directories that being excluded. Since there is nothing to backup, no backup should be created at all. """ @@ -399,8 +399,8 @@ def test_noop_incr_backup(self, env): env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_noop_incr_backup"], scope='class') - def test_content_incr_backup(self, env): + @pytest.mark.dependency(depends=["test_noop_incr"], scope='class') + def test_content_incr(self, env): """Modify a file's content, but make sure all filesystem metadata remain unchanged. """ @@ -427,8 +427,8 @@ def test_content_incr_backup(self, env): env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_content_incr_backup"], scope='class') - def test_meta_incr_backup(self, env): + @pytest.mark.dependency(depends=["test_content_incr"], scope='class') + def test_meta_incr(self, env): """Modify a file's metadata, but keep the content unchanged. """ u_path = Path("home", "jdoe", "rnd3.dat") @@ -451,8 +451,8 @@ def test_meta_incr_backup(self, env): env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_meta_incr_backup"], scope='class') - def test_simple_cumu_backup(self, env): + @pytest.mark.dependency(depends=["test_meta_incr"], scope='class') + def test_simple_cumu(self, env): """Add some more files, both in sys and in user directories. According to schedule, a cumulative backup for user and incremental backups for sys are made. @@ -501,8 +501,8 @@ def test_simple_cumu_backup(self, env): env.check_index() env.flush_test_data(('desk', 'serv', 'user'), 'incr') - @pytest.mark.dependency(depends=["test_simple_cumu_backup"], scope='class') - def test_incr_backup(self, env): + @pytest.mark.dependency(depends=["test_simple_cumu"], scope='class') + def test_incr(self, env): """Add another files in a user directory. """ mtime = 1634067525 @@ -526,8 +526,8 @@ def test_incr_backup(self, env): env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_incr_backup"], scope='class') - def test_del_incr_backup(self, env): + @pytest.mark.dependency(depends=["test_incr"], scope='class') + def test_del_incr(self, env): """Delete the file created for the last test again. Only the parent directory will be added to the incremental backup for it has a changed file modification time, but not @@ -555,8 +555,8 @@ def test_del_incr_backup(self, env): env.check_index() env.flush_test_data(('user',), 'incr') - @pytest.mark.dependency(depends=["test_del_incr_backup"], scope='class') - def test_cumu_backup(self, env): + @pytest.mark.dependency(depends=["test_del_incr"], scope='class') + def test_cumu(self, env): """Do the next weekly backup. Nothing has changed in sys directories, no backups will be created for sys. The cumulative backup for user will @@ -579,8 +579,8 @@ def test_cumu_backup(self, env): env.check_index() env.flush_test_data(('desk', 'serv', 'user'), 'incr') - @pytest.mark.dependency(depends=["test_cumu_backup"], scope='class') - def test_full_backup(self, env): + @pytest.mark.dependency(depends=["test_cumu"], scope='class') + def test_full(self, env): """Do the next monthly backup. """ env.set_hostname("desk") From 09067d2d601733e26af5bad007376ea8fc3d1d48 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Thu, 25 Nov 2021 21:44:02 +0100 Subject: [PATCH 120/138] Make the schedules configurable in class BTTestEnv --- tests/test_06_backup-tool.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 2176a56..8295cb3 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -35,10 +35,12 @@ def __init__(self, root): self.index = ArchiveIndex() self.backupdir = None self.tmptarget = None + self.schedules = None - def config(self, backupdir, tmptarget): + def config(self, backupdir, tmptarget, schedules=('full', 'cumu', 'incr')): self.backupdir = self.root / backupdir self.tmptarget = self.root / tmptarget + self.schedules = schedules def __enter__(self): self.monkeypatch.setattr(datetime, "datetime", self._datetime) @@ -60,7 +62,7 @@ def add_test_data(self, tags, items): for i in items: self.test_data[i.path] = i for t in tags: - for s in ('full', 'cumu', 'incr'): + for s in self.schedules: k = (t,s) self.test_data_tags.setdefault(k, set()) self.test_data_tags[k].add(i.path) @@ -69,18 +71,15 @@ def remove_test_data(self, tags, items): for i in items: del self.test_data[i.path] for t in tags: - for s in ('full', 'cumu', 'incr'): + for s in self.schedules: k = (t,s) self.test_data_tags.setdefault(k, set()) self.test_data_tags[k].discard(i.path) def flush_test_data(self, tags, schedule): - if schedule == 'cumu': - schedules = ('cumu', 'incr') - else: - schedules = ('incr',) + idx = self.schedules.index(schedule) for t in tags: - for s in schedules: + for s in self.schedules[idx:]: self.test_data_tags[t,s] = set() def setup_test_data(self): From fae071897a8796d26b7defbf97f1ee9b7fcb948a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 14:55:24 +0100 Subject: [PATCH 121/138] Add tests for named schedules --- tests/test_06_backup-tool.py | 335 +++++++++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 8295cb3..231dcff 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -605,3 +605,338 @@ def test_full(self, env): env.run_backup_tool("backup-tool --verbose index") env.check_index() env.flush_test_data(('desk', 'serv', 'user'), 'cumu') + + +class TestBackupToolNamedSchedule: + """Use named schedules in the config file. + + Otherwise this is mostly a simplified version of class + TestBackupTool. The focus of the tests is on proper functioning + of the schedule, full vs. cumulative vs. incremental. + + (Named schedules are not yet implemented, thus the first test + xfails and subsequent tests are going to be skipped.) + """ + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[DEFAULT] +backupdir = $root/net/backup + +[serv] + +[desk] +targetdir = $root/var/backup + +[sys] +dirs = + $root/etc + $root/root +excludes = + $root/root/.cache +schedules = monthly:full/weekly:incr + +[desk/sys] +schedule.monthly.date = Sun *-*-1..7 +schedule.weekly.date = Sun * + +[serv/sys] +dirs = + $root/etc + $root/root + $root/usr/local +excludes = + $root/root/.cache +schedule.monthly.date = Mon *-*-2..8 +schedule.weekly.date = Mon * + +[user] +name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +dirs = $root/%(home)s +excludes = + $root/%(home)s/.cache + $root/%(home)s/.thumbnails + $root/%(home)s/tmp +schedules = monthly:full/weekly:cumu/daily:incr +schedule.monthly.date = Mon *-*-2..8 +schedule.weekly.date = Mon * +schedule.daily.date = * +""" + + def init_data(self, env): + env.config("net/backup", "var/backup", + schedules=('monthly', 'weekly', 'daily')) + subst = dict(root=env.root) + cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1633129414), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1632596683), + DataContentFile(Path("etc", "foo.cfg"), + b"[foo]\nbar = baz\n", 0o644, mtime=1632672000), + DataDir(Path("root"), 0o700, mtime=1633274230), + DataRandomFile(Path("root", "rnd5.dat"), + 0o600, size=85, mtime=1633243020), + DataSymLink(Path("root", "rnd.dat"), Path("rnd5.dat"), + mtime=1633243020), + DataDir(Path("usr", "local"), 0o755, mtime=1616490893), + DataRandomFile(Path("usr", "local", "rnd6.dat"), + 0o644, size=607, mtime=1633275272), + ] + env.add_test_data(('sys',), sys_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd.dat"), + 0o600, size=7964, mtime=1626052455), + DataFile(Path("home", "jdoe", "rnd2.dat"), 0o640, mtime=1633050855), + DataRandomFile(Path("home", "jdoe", "rnd3.dat"), + 0o600, size=796, mtime=1633243020), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("home", "jdoe", ".cache"), 0o700, mtime=1608491257), + DataRandomFile(Path("home", "jdoe", ".cache", "rnd2.dat"), + 0o600, size=385, mtime=1633275272), + DataDir(Path("home", "jdoe", "tmp"), 0o755, mtime=1631130997), + DataDir(Path("root", ".cache"), 0o700, mtime=1603009887), + DataRandomFile(Path("root", ".cache", "rnd4.dat"), + 0o600, size=665, mtime=1633275272), + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + DataDir(Path("var", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.dependency() + @pytest.mark.xfail(reason="named schedules not yet implemented") + def test_initial_monthly(self, env): + """Full backup of initial test data. + """ + self.init_data(env) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211004-monthly.tar.bz2" + env.check_archive(archive_name, 'sys', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211004-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys', 'user'), 'weekly') + + @pytest.mark.dependency(depends=["test_initial_monthly"], scope='class') + def test_first_daily(self, env): + """First incremental backup in the first week. + """ + mtime = 1633451717 + u_path = Path("home", "jdoe", "misc") + u_dir = DataDir(u_path, 0o755, mtime=mtime) + u_file = DataRandomFile(u_path / "rnd7.dat", + 0o644, size=473, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + mtime = 1633464305 + s_path = Path("root", "rnd8.dat") + s_file = DataRandomFile(s_path, 0o600, size=42, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_dir, u_file]) + env.add_test_data(('sys',), [s_parent, s_file]) + setup_testdata(env.root, [u_dir, u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 6, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211006-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_first_daily"], scope='class') + def test_second_daily(self, env): + """Second incremental backup in the first week. + """ + mtime = 1633500600 + u_path = Path("home", "jdoe", "misc", "rnd9.dat") + u_file = DataRandomFile(u_path, 0o640, size=582, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 7, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211007-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_second_daily"], scope='class') + def test_first_weekly(self, env): + """First cumulative backup. + """ + mtime = 1633837020 + s0_path = Path("usr", "local", "rnd11.dat") + s0_file = DataRandomFile(s0_path, 0o644, size=528, mtime=mtime) + s0_parent = env.test_data[s0_path.parent] + s0_parent.mtime = mtime + mtime = 1633843260 + s1_path = Path("root", "rnd12.dat") + s1_file = DataRandomFile(s1_path, 0o600, size=17, mtime=mtime) + s1_parent = env.test_data[s1_path.parent] + s1_parent.mtime = mtime + mtime = 1633876920 + u_path = Path("home", "jdoe", "misc", "rnd13.dat") + u_file = DataRandomFile(u_path, 0o644, size=378, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('sys',), [s0_parent, s0_file, s1_parent, s1_file]) + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [s0_file, s1_file, u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211011-weekly.tar.bz2" + env.check_archive(archive_name, 'sys', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 11, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211011-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys',), 'weekly') + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_first_weekly"], scope='class') + def test_third_daily(self, env): + """First incremental backup in the second week. + """ + mtime = 1634053507 + u_path = Path("home", "jdoe", "misc", "rnd14.dat") + u_file = DataRandomFile(u_path, 0o644, size=763, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + mtime = 1634083500 + s_path = Path("root", "rnd15.dat") + s_file = DataRandomFile(s_path, 0o600, size=165, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + env.add_test_data(('sys',), [s_parent, s_file]) + setup_testdata(env.root, [u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 13, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211013-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_third_daily"], scope='class') + def test_second_weekly(self, env): + """Second cumulative backup. + """ + mtime = 1634509129 + u_path = Path("home", "jdoe", "misc", "rnd16.dat") + u_file = DataRandomFile(u_path, 0o644, size=834, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211018-weekly.tar.bz2" + env.check_archive(archive_name, 'sys', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', policy='sys') + env.set_datetime(datetime.datetime(2021, 10, 18, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211018-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys',), 'weekly') + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_second_weekly"], scope='class') + def test_fourth_daily(self, env): + """First incremental backup in the third week. + """ + mtime = 1634605839 + s_path = Path("root", "rnd18.dat") + s_file = DataRandomFile(s_path, 0o600, size=589, mtime=mtime) + s_parent = env.test_data[s_path.parent] + s_parent.mtime = mtime + mtime = 1634631969 + u_path = Path("home", "jdoe", "misc", "rnd17.dat") + u_file = DataRandomFile(u_path, 0o644, size=568, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + env.add_test_data(('sys',), [s_parent, s_file]) + setup_testdata(env.root, [u_file, s_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 20, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + env.set_datetime(datetime.datetime(2021, 10, 20, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211020-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_fourth_daily"], scope='class') + def test_second_monthly(self, env): + """Do the next monthly backup. + """ + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211108-monthly.tar.bz2" + env.check_archive(archive_name, 'sys', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', policy='sys') + env.set_datetime(datetime.datetime(2021, 11, 8, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211108-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('sys', 'user'), 'weekly') From 62d9268b709bf017273849f3d77f146308a18358 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 15:03:10 +0100 Subject: [PATCH 122/138] Implement named schedules --- archive/bt/create.py | 10 +++++++--- tests/test_06_backup-tool.py | 1 - 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/archive/bt/create.py b/archive/bt/create.py index 9f0476f..01ed9a7 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -41,9 +41,13 @@ def get_schedule(config): last_schedule = None schedules = [] for s in config.schedules: - sd_str = config.get('schedule.%s.date' % s, required=True) - cls = BaseSchedule.SubClasses[s] - last_schedule = cls(s, ScheduleDate(sd_str), last_schedule) + try: + n, t = s.split(':') + except ValueError: + n = t = s + cls = BaseSchedule.SubClasses[t] + sd_str = config.get('schedule.%s.date' % n, required=True) + last_schedule = cls(n, ScheduleDate(sd_str), last_schedule) schedules.append(last_schedule) now = datetime.datetime.now() for s in schedules: diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 231dcff..26de27d 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -710,7 +710,6 @@ def init_data(self, env): env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) @pytest.mark.dependency() - @pytest.mark.xfail(reason="named schedules not yet implemented") def test_initial_monthly(self, env): """Full backup of initial test data. """ From 0830ec91c5641bc8e1e13a543aba08e9fc4732ce Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 15:50:07 +0100 Subject: [PATCH 123/138] Make mtime argument optional in DataItem.unlink() --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3ce8bdf..9cc2697 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -175,7 +175,7 @@ def st_mode(self): def create(self, main_dir): raise NotImplementedError - def unlink(self, main_dir, mtime): + def unlink(self, main_dir, mtime=None): path = main_dir / self.path path.unlink() if mtime: From caddb7673a916c47b2acaeb0c2e82068411734b3 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 16:17:55 +0100 Subject: [PATCH 124/138] Update the example backup.cfg file --- etc/backup.cfg | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/etc/backup.cfg b/etc/backup.cfg index 0ba5a91..2f59d97 100644 --- a/etc/backup.cfg +++ b/etc/backup.cfg @@ -7,7 +7,14 @@ # each configuration option, the first occurrence in any of these # sections will be used. +# Default settings that are effectively included in all other sections. +[DEFAULT] +! backupdir = /proj/backup/auto + # The default policy sys +# In this example, we schedule a monthly full backup for the Monday +# after the first Sunday of the month and a weekly incremental backup +# each other Monday. [sys] ! dirs = ! /etc @@ -15,9 +22,15 @@ ! /usr/local ! excludes = ! /root/.cache -! backupdir = /proj/backup/auto +! schedules = full/incr +! schedule.full.date = Mon *-*-2..8 +! schedule.incr.date = Mon * -# The special policy user is used when the --user command line option is used. +# The special policy user is used when the --user command line option +# is used. +# In this example, we schedule a monthly full backup for the Monday +# after the first Sunday of the month, a weekly cumulative backup each +# other Monday and a daily incremental backup for any other day. [user] ! name = %(user)s-%(date)s-%(schedule)s.tar.bz2 ! dirs = %(home)s @@ -25,7 +38,10 @@ ! %(home)s/.cache ! %(home)s/.thumbnails ! %(home)s/tmp -! backupdir = /proj/backup/auto +! schedules = full/cumu/incr +! schedule.full.date = Mon *-*-2..8 +! schedule.cumu.date = Mon * +! schedule.incr.date = * # Override settings on a particular host ! [db-host] From b72ec3c5e85d53731397a0bc84dafbd06aa7cf7a Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 15:45:31 +0100 Subject: [PATCH 125/138] Remove comment on a completed TODO item --- archive/bt/__init__.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py index 7723c5b..643d0b7 100644 --- a/archive/bt/__init__.py +++ b/archive/bt/__init__.py @@ -11,14 +11,6 @@ # TODO: # -# - in the long run, we want to select the schedule (e.g. set the -# conditions, when to choose which schedule) in the configuration -# file, and even put the definition and semantics (e.g. which -# schedules exist and what do they mean) there. But this seem to be -# most tricky part of the whole project. We want to get the basics -# working first. So for the moment, we hard code definition and -# semantics here and select the schedule as a command line argument. -# # - consider add configuration options for dedup mode and for checksum # algorithm. # From 72cf71709c81065af1e5ebb6b3bac73093f57cf0 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 16:35:56 +0100 Subject: [PATCH 126/138] Fix: must include test data rnd2bis.dat into source distribution --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index bf5a022..7526730 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -11,5 +11,6 @@ include tests/data/manifest.yaml include tests/data/msg.txt include tests/data/rnd.dat include tests/data/rnd2.dat +include tests/data/rnd2bis.dat include tests/pytest.ini include tests/test_*.py From d200de2191bb980660c5c657536be75a95cddf45 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 17:10:29 +0100 Subject: [PATCH 127/138] Mark the backup.cfg file as %config(noreplace) in the spec file --- python-archive-tools.spec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-archive-tools.spec b/python-archive-tools.spec index 992d8aa..ab503e0 100644 --- a/python-archive-tools.spec +++ b/python-archive-tools.spec @@ -55,7 +55,7 @@ python3 setup.py test %files %defattr(-,root,root) %doc README.rst -%config %{_sysconfdir}/backup.cfg +%config(noreplace) %{_sysconfdir}/backup.cfg %{python3_sitelib}/* %{_bindir}/* From 27e224d375d2426bf587d35b08c8e6d34b4004c2 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 21:28:42 +0100 Subject: [PATCH 128/138] Remove an obsolete note from a docstring --- tests/test_06_backup-tool.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 26de27d..0c06de4 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -613,9 +613,6 @@ class TestBackupToolNamedSchedule: Otherwise this is mostly a simplified version of class TestBackupTool. The focus of the tests is on proper functioning of the schedule, full vs. cumulative vs. incremental. - - (Named schedules are not yet implemented, thus the first test - xfails and subsequent tests are going to be skipped.) """ cfg = """# Configuration file for backup-tool. From 2f94f641f27f2587d569c167c90a7d7a4d98b907 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Mon, 29 Nov 2021 23:15:35 +0100 Subject: [PATCH 129/138] Fix missing import --- archive/bt/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/archive/bt/config.py b/archive/bt/config.py index 07452f4..65ebecf 100644 --- a/archive/bt/config.py +++ b/archive/bt/config.py @@ -7,6 +7,7 @@ import pwd import socket import archive.config +from archive.exception import ConfigError def get_config_file(): From cd039307567bd13abbd8ec4f35962a138331aaab Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 28 Nov 2021 20:52:35 +0100 Subject: [PATCH 130/138] Add a configuration option for dedup mode --- archive/bt/__init__.py | 3 --- archive/bt/config.py | 6 ++++++ archive/bt/create.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py index 643d0b7..b9ea188 100644 --- a/archive/bt/__init__.py +++ b/archive/bt/__init__.py @@ -11,9 +11,6 @@ # TODO: # -# - consider add configuration options for dedup mode and for checksum -# algorithm. -# # - consider adding more log messages and logging configuration. log = logging.getLogger(__name__) diff --git a/archive/bt/config.py b/archive/bt/config.py index 65ebecf..76218c9 100644 --- a/archive/bt/config.py +++ b/archive/bt/config.py @@ -6,6 +6,7 @@ from pathlib import Path import pwd import socket +from archive.archive import DedupMode import archive.config from archive.exception import ConfigError @@ -25,6 +26,7 @@ class Config(archive.config.Config): 'targetdir': "%(backupdir)s", 'name': "%(host)s-%(date)s-%(schedule)s.tar.bz2", 'schedules': None, + 'dedup': 'link', } args_options = ('policy', 'user') @@ -88,6 +90,10 @@ def backupdir(self): def targetdir(self): return self.get('targetdir', required=True, type=Path) + @property + def dedup(self): + return self.get('dedup', required=True, type=DedupMode) + @property def path(self): return self.targetdir / self.name diff --git a/archive/bt/create.py b/archive/bt/create.py index 01ed9a7..e63b978 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -103,7 +103,8 @@ def create(args, config): if config.user: tags.append("user:%s" % config.user) with tmp_umask(0o277): - arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags) + arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags, + dedup=config.dedup) if config.user: chown(arch.path, config.user) return 0 From 6fc8f49461672650c3faf85e0890f4a2c9df1f25 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Tue, 30 Nov 2021 00:06:11 +0100 Subject: [PATCH 131/138] Add test for configuration option for dedup mode --- tests/test_06_backup-tool.py | 90 ++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index 0c06de4..b1cebfc 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -6,6 +6,7 @@ import os from pathlib import Path import pwd +import shutil import socket import string import sys @@ -936,3 +937,92 @@ def test_second_monthly(self, env): env.run_backup_tool("backup-tool --verbose index") env.check_index() env.flush_test_data(('sys', 'user'), 'weekly') + + +class TestBackupToolDedup: + """Test the dedup configration option. + """ + + src_dir = Path("root") + src_path = Path("root", "rnd.dat") + lnk_path = Path("root", "rnd_lnk.dat") + cp_path = Path("root", "rnd_cp.dat") + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[sys] +name = %(host)s-%(date)s-%(schedule)s-$suffix.tar.bz2 +dirs = + $root/root +backupdir = $root/net/backup +schedules = full/incr +schedule.full.date = Mon *-*-2..8 +schedule.incr.date = Mon * +""" + + def init_data(self, env, dedup): + env.config("net/backup", "var/backup") + subst = dict(root=env.root, suffix=str(dedup)) + cfg = string.Template(self.cfg).substitute(subst) + if dedup: + cfg_path = env.root / "etc" / ("backup-%s.cfg" % dedup) + cfg += "dedup = %s\n" % dedup + else: + cfg_path = env.root / "etc" / "backup.cfg" + cfg_path.parent.mkdir(parents=True, exist_ok=True) + with cfg_path.open("wt") as f: + f.write(cfg) + if not (env.root / self.src_dir).is_dir(): + sys_data = [ + DataDir(self.src_dir, 0o700, mtime=1633274230), + DataFile(self.src_path, 0o600, mtime=1633243020), + ] + env.add_test_data(('sys',), sys_data) + excl_data = [ + DataDir(Path("net", "backup"), 0o755, mtime=1632704400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + src_file = env.test_data[self.src_path] + os.link(env.root / self.src_path, env.root / self.lnk_path) + shutil.copy2(env.root / self.src_path, env.root / self.cp_path) + extra_data = [ + DataFile(self.lnk_path, src_file.mode, + mtime=src_file.mtime, checksum=src_file.checksum), + DataFile(self.cp_path, src_file.mode, + mtime=src_file.mtime, checksum=src_file.checksum), + ] + env.add_test_data(('sys',), extra_data) + env.test_data[self.src_dir].create(env.root) + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.parametrize("dedup", [None, 'never', 'link', 'content']) + def test_full(self, env, dedup): + """Full backup of initial test data. + """ + self.init_data(env, dedup) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 0)) + env.run_backup_tool("backup-tool --verbose create --policy sys") + archive_name = "serv-211004-full-%s.tar.bz2" % str(dedup) + env.check_archive(archive_name, 'sys', 'full') + with Archive().open(env.backupdir / archive_name) as archive: + src_path = archive._arcname(env.root / self.src_path) + lnk_path = archive._arcname(env.root / self.lnk_path) + cp_path = archive._arcname(env.root / self.cp_path) + ti_lnk = archive._file.getmember(lnk_path) + ti_cp = archive._file.getmember(cp_path) + if dedup == 'never': + assert ti_lnk.isfile() + assert ti_cp.isfile() + elif dedup is None or dedup == 'link': + assert ti_lnk.islnk() + assert ti_lnk.linkname == src_path + assert ti_cp.isfile() + elif dedup == 'content': + assert ti_lnk.islnk() + assert ti_lnk.linkname == src_path + assert ti_cp.islnk() + assert ti_cp.linkname == src_path From dd42188be3e32715aaa052e8fc53a33378f67d00 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 4 Dec 2021 22:42:04 +0100 Subject: [PATCH 132/138] Remove comment on a completed TODO item --- archive/bt/__init__.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/archive/bt/__init__.py b/archive/bt/__init__.py index b9ea188..d9da441 100644 --- a/archive/bt/__init__.py +++ b/archive/bt/__init__.py @@ -8,11 +8,6 @@ from archive.exception import ArchiveError, ConfigError from archive.bt.config import Config - -# TODO: -# -# - consider adding more log messages and logging configuration. - log = logging.getLogger(__name__) subcmds = ( "create", "index", ) From db7277202bb7c9005a657deac098d7c35415330c Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 5 Dec 2021 12:47:04 +0100 Subject: [PATCH 133/138] Add a tag 'type' indicating the schedule class to backups --- archive/bt/create.py | 1 + archive/index.py | 4 +++- tests/test_06_backup-tool.py | 40 ++++++++++++++++++++++++------------ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/archive/bt/create.py b/archive/bt/create.py index e63b978..b3ae931 100644 --- a/archive/bt/create.py +++ b/archive/bt/create.py @@ -99,6 +99,7 @@ def create(args, config): "host:%s" % config.host, "policy:%s" % config.policy, "schedule:%s" % schedule.name, + "type:%s" % schedule.ClsName, ] if config.user: tags.append("user:%s" % config.user) diff --git a/archive/index.py b/archive/index.py index 11521a1..9a4f485 100644 --- a/archive/index.py +++ b/archive/index.py @@ -19,6 +19,7 @@ def __init__(self, data=None, archive=None): self.policy = data.get('policy') self.user = data.get('user') self.schedule = data.get('schedule') + self.type = data.get('type') elif archive is not None: self.date = parse_date(archive.manifest.head['Date']) self.path = archive.path @@ -38,6 +39,7 @@ def __init__(self, data=None, archive=None): self.policy = tagmap.get('policy') self.user = tagmap.get('user') self.schedule = tagmap.get('schedule') + self.type = tagmap.get('type') else: raise TypeError("Either data or archive must be provided") @@ -48,7 +50,7 @@ def as_dict(self): 'date': self.date.isoformat(sep=' '), 'path': str(self.path), } - for k in ('host', 'policy', 'user', 'schedule'): + for k in ('host', 'policy', 'user', 'schedule', 'type'): v = getattr(self, k, None) if v: d[k] = v diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index b1cebfc..efdd936 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -107,7 +107,8 @@ def check_index(self): assert set(self.backupdir.iterdir()) == backupdir_content assert set(self.tmptarget.iterdir()) == set() - def add_index(self, name, host, schedule, policy=None, user=None): + def add_index(self, name, host, schedule, + type=None, policy=None, user=None): if user: policy = 'user' idx_data = { @@ -117,6 +118,7 @@ def add_index(self, name, host, schedule, policy=None, user=None): 'policy': policy, 'user': user, 'schedule': schedule, + 'type': type or schedule, } self.index.append(IndexItem(idx_data)) @@ -718,12 +720,14 @@ def test_initial_monthly(self, env): env.run_backup_tool("backup-tool --verbose create --policy sys") archive_name = "serv-211004-monthly.tar.bz2" env.check_archive(archive_name, 'sys', 'monthly') - env.add_index(archive_name, 'serv', 'monthly', policy='sys') + env.add_index(archive_name, 'serv', 'monthly', + type='full', policy='sys') env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211004-monthly.tar.bz2" env.check_archive(archive_name, 'user', 'monthly') - env.add_index(archive_name, 'serv', 'monthly', user='jdoe') + env.add_index(archive_name, 'serv', 'monthly', + type='full', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -756,7 +760,8 @@ def test_first_daily(self, env): env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211006-daily.tar.bz2" env.check_archive(archive_name, 'user', 'daily') - env.add_index(archive_name, 'serv', 'daily', user='jdoe') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -781,7 +786,8 @@ def test_second_daily(self, env): env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211007-daily.tar.bz2" env.check_archive(archive_name, 'user', 'daily') - env.add_index(archive_name, 'serv', 'daily', user='jdoe') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -815,12 +821,14 @@ def test_first_weekly(self, env): env.run_backup_tool("backup-tool --verbose create --policy sys") archive_name = "serv-211011-weekly.tar.bz2" env.check_archive(archive_name, 'sys', 'weekly') - env.add_index(archive_name, 'serv', 'weekly', policy='sys') + env.add_index(archive_name, 'serv', 'weekly', + type='incr', policy='sys') env.set_datetime(datetime.datetime(2021, 10, 11, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211011-weekly.tar.bz2" env.check_archive(archive_name, 'user', 'weekly') - env.add_index(archive_name, 'serv', 'weekly', user='jdoe') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -852,7 +860,8 @@ def test_third_daily(self, env): env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211013-daily.tar.bz2" env.check_archive(archive_name, 'user', 'daily') - env.add_index(archive_name, 'serv', 'daily', user='jdoe') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -875,12 +884,14 @@ def test_second_weekly(self, env): env.run_backup_tool("backup-tool --verbose create --policy sys") archive_name = "serv-211018-weekly.tar.bz2" env.check_archive(archive_name, 'sys', 'weekly') - env.add_index(archive_name, 'serv', 'weekly', policy='sys') + env.add_index(archive_name, 'serv', 'weekly', + type='incr', policy='sys') env.set_datetime(datetime.datetime(2021, 10, 18, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211018-weekly.tar.bz2" env.check_archive(archive_name, 'user', 'weekly') - env.add_index(archive_name, 'serv', 'weekly', user='jdoe') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -912,7 +923,8 @@ def test_fourth_daily(self, env): env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211020-daily.tar.bz2" env.check_archive(archive_name, 'user', 'daily') - env.add_index(archive_name, 'serv', 'daily', user='jdoe') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() @@ -927,12 +939,14 @@ def test_second_monthly(self, env): env.run_backup_tool("backup-tool --verbose create --policy sys") archive_name = "serv-211108-monthly.tar.bz2" env.check_archive(archive_name, 'sys', 'monthly') - env.add_index(archive_name, 'serv', 'monthly', policy='sys') + env.add_index(archive_name, 'serv', 'monthly', + type='full', policy='sys') env.set_datetime(datetime.datetime(2021, 11, 8, 3, 10)) env.run_backup_tool("backup-tool --verbose create --user jdoe") archive_name = "jdoe-211108-monthly.tar.bz2" env.check_archive(archive_name, 'user', 'monthly') - env.add_index(archive_name, 'serv', 'monthly', user='jdoe') + env.add_index(archive_name, 'serv', 'monthly', + type='full', user='jdoe') env.run_backup_tool("backup-tool --verbose index") env.check_index() From 9481766b7174ddc0f062a764f7fd78f7fbd03858 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 5 Dec 2021 15:48:24 +0100 Subject: [PATCH 134/138] Update changelog --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 34fc27a..506cca0 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,8 @@ Changelog New features ------------ ++ `#52`_, `#70`_: Add a `backup-tool` script. + + `#54`_: Add command line flags `--directory ` to `archive-tool create`. The script will change into this directory prior creating the archive if provided. @@ -84,6 +86,7 @@ Internal changes .. _#48: https://github.com/RKrahl/archive-tools/pull/48 .. _#50: https://github.com/RKrahl/archive-tools/issues/50 .. _#51: https://github.com/RKrahl/archive-tools/pull/51 +.. _#52: https://github.com/RKrahl/archive-tools/issues/52 .. _#53: https://github.com/RKrahl/archive-tools/issues/53 .. _#54: https://github.com/RKrahl/archive-tools/pull/54 .. _#55: https://github.com/RKrahl/archive-tools/issues/55 @@ -100,6 +103,7 @@ Internal changes .. _#66: https://github.com/RKrahl/archive-tools/pull/66 .. _#67: https://github.com/RKrahl/archive-tools/pull/67 .. _#68: https://github.com/RKrahl/archive-tools/pull/68 +.. _#70: https://github.com/RKrahl/archive-tools/pull/70 0.5.1 (2020-12-12) From b178d53648bf0b540aadde8fcbe984cd05f47c2b Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 5 Dec 2021 20:00:04 +0100 Subject: [PATCH 135/138] Now tested with Python 3.10 --- .github/workflows/run-tests.yaml | 2 +- setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml index d9e7509..5e05c4c 100644 --- a/.github/workflows/run-tests.yaml +++ b/.github/workflows/run-tests.yaml @@ -10,7 +10,7 @@ jobs: - '3.7' - '3.8' - '3.9' - - '3.10.0-beta - 3.10.0' + - '3.10' os: [ubuntu-latest] steps: - name: Check out repository code diff --git a/setup.py b/setup.py index a9d2c6e..c9925ff 100644 --- a/setup.py +++ b/setup.py @@ -130,6 +130,7 @@ def run(self): "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "Topic :: System :: Archiving", ], cmdclass = {'build_py': build_py, 'sdist': sdist, 'init_py': init_py}, From 6b18ff638fe63daa280cb910d6f4615c55b0cfcf Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sun, 5 Dec 2021 22:33:12 +0100 Subject: [PATCH 136/138] Add a backup-tool test for freely mixed schedule types --- tests/test_06_backup-tool.py | 465 +++++++++++++++++++++++++++++++++++ 1 file changed, 465 insertions(+) diff --git a/tests/test_06_backup-tool.py b/tests/test_06_backup-tool.py index efdd936..d9535da 100644 --- a/tests/test_06_backup-tool.py +++ b/tests/test_06_backup-tool.py @@ -953,6 +953,471 @@ def test_second_monthly(self, env): env.flush_test_data(('sys', 'user'), 'weekly') +class TestBackupToolMixedScheduleTypes: + """The schedule types may be freely mixed. + + The backup-tool supports a hierarchy of the schedule types 'full', + 'cumu', and 'incr'. It is not required to use them in that order. + Only the root of the hierarchy must have the type 'full', + otherwise the types may be freely mixed. + + The scenario considered in this test: + - quarterly: full, + - monthly: incr, + - weekly: cumu, + - daily: incr. + """ + + cfg = """# Configuration file for backup-tool. +# All paths are within a root directory that need to be substituted. + +[serv] +backupdir = $root/net/backup + +[user] +name = %(user)s-%(date)s-%(schedule)s.tar.bz2 +dirs = $root/%(home)s +excludes = + $root/%(home)s/.cache + $root/%(home)s/.thumbnails + $root/%(home)s/tmp +schedules = quarterly:full/monthly:incr/weekly:cumu/daily:incr +schedule.quarterly.date = Mon *-1,4,7,10-2..8 +schedule.monthly.date = Mon *-*-2..8 +schedule.weekly.date = Mon * +schedule.daily.date = * +""" + + def init_data(self, env): + env.config("net/backup", "var", + schedules=('quarterly', 'monthly', 'weekly', 'daily')) + subst = dict(root=env.root) + cfg_data = string.Template(self.cfg).substitute(subst).encode('ascii') + cfg_path = Path("etc", "backup.cfg") + sys_data = [ + DataDir(Path("etc"), 0o755, mtime=1625363657), + DataContentFile(cfg_path, cfg_data, 0o644, mtime=1625243298), + ] + env.add_test_data(('sys',), sys_data) + user_data = [ + DataDir(Path("home", "jdoe"), 0o700, mtime=1633263300), + DataRandomFile(Path("home", "jdoe", "rnd00.dat"), + 0o600, size=7964, mtime=1612908655), + DataRandomFile(Path("home", "jdoe", "rnd01.dat"), + 0o600, size=39, mtime=1614947739), + ] + env.add_test_data(('user',), user_data) + excl_data = [ + DataDir(Path("net", "backup"), 0o755, mtime=1625360400), + DataDir(Path("var"), 0o755, mtime=1625360400), + ] + env.add_test_data(('excl',), excl_data) + env.setup_test_data() + env.monkeypatch.setenv("BACKUP_CFG", str(env.root / cfg_path)) + + @pytest.mark.dependency() + def test_20210705(self, env): + """Full backup of initial test data. + """ + self.init_data(env) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 5, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210705-quarterly.tar.bz2" + env.check_archive(archive_name, 'user', 'quarterly') + env.add_index(archive_name, 'serv', 'quarterly', + type='full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + @pytest.mark.dependency(depends=["test_20210705"], scope='class') + def test_20210707(self, env): + """Daily incremental backup in the first week. + """ + mtime = 1625562697 + u_path = Path("home", "jdoe", "rnd02.dat") + u_file = DataRandomFile(u_path, 0o600, size=446, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 7, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210707-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210707"], scope='class') + def test_20210709(self, env): + """Second daily incremental backup in the first week. + """ + mtime = 1625743947 + u_path = Path("home", "jdoe", "rnd03.dat") + u_file = DataRandomFile(u_path, 0o600, size=55, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 9, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210709-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210709"], scope='class') + def test_20210712(self, env): + """Weekly cumulative backup. + """ + mtime = 1626043402 + u_path = Path("home", "jdoe", "rnd04.dat") + u_file = DataRandomFile(u_path, 0o600, size=228, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 12, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210712-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210712"], scope='class') + def test_20210714(self, env): + """Daily incremental backup in the second week. + """ + mtime = 1626167376 + u_path = Path("home", "jdoe", "rnd05.dat") + u_file = DataRandomFile(u_path, 0o600, size=263, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 14, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210714-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210714"], scope='class') + def test_20210719(self, env): + """Weekly cumulative backup. + """ + mtime = 1626575481 + u_path = Path("home", "jdoe", "rnd06.dat") + u_file = DataRandomFile(u_path, 0o600, size=287, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 19, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210719-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210719"], scope='class') + def test_20210721(self, env): + """Daily incremental backup in the third week. + """ + mtime = 1626826403 + u_path = Path("home", "jdoe", "rnd07.dat") + u_file = DataRandomFile(u_path, 0o600, size=318, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 7, 21, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210721-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210721"], scope='class') + def test_20210802(self, env): + """Monthly backup. + """ + mtime = 1627806186 + u_path = Path("home", "jdoe", "rnd08.dat") + u_file = DataRandomFile(u_path, 0o600, size=334, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 2, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210802-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + @pytest.mark.dependency(depends=["test_20210802"], scope='class') + def test_20210804(self, env): + """Daily incremental backup. + """ + mtime = 1628026098 + u_path = Path("home", "jdoe", "rnd09.dat") + u_file = DataRandomFile(u_path, 0o600, size=404, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210804-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210804"], scope='class') + def test_20210809(self, env): + """Weekly cumulative backup. + """ + mtime = 1628460869 + u_path = Path("home", "jdoe", "rnd10.dat") + u_file = DataRandomFile(u_path, 0o600, size=453, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 9, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210809-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210809"], scope='class') + def test_20210811(self, env): + """Daily incremental backup. + """ + mtime = 1628563138 + u_path = Path("home", "jdoe", "rnd11.dat") + u_file = DataRandomFile(u_path, 0o600, size=174, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 8, 11, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210811-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210811"], scope='class') + def test_20210906(self, env): + """Monthly backup. + """ + mtime = 1630827561 + u_path = Path("home", "jdoe", "rnd12.dat") + u_file = DataRandomFile(u_path, 0o600, size=225, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 6, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210906-monthly.tar.bz2" + env.check_archive(archive_name, 'user', 'monthly') + env.add_index(archive_name, 'serv', 'monthly', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + @pytest.mark.dependency(depends=["test_20210906"], scope='class') + def test_20210908(self, env): + """Daily incremental backup. + """ + mtime = 1630986960 + u_path = Path("home", "jdoe", "rnd13.dat") + u_file = DataRandomFile(u_path, 0o600, size=317, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 8, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210908-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210908"], scope='class') + def test_20210913(self, env): + """Weekly cumulative backup. + """ + mtime = 1631419436 + u_path = Path("home", "jdoe", "rnd14.dat") + u_file = DataRandomFile(u_path, 0o600, size=159, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 13, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210913-weekly.tar.bz2" + env.check_archive(archive_name, 'user', 'weekly') + env.add_index(archive_name, 'serv', 'weekly', + type='cumu', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210913"], scope='class') + def test_20210915(self, env): + """Daily incremental backup. + """ + mtime = 1631652957 + u_path = Path("home", "jdoe", "rnd15.dat") + u_file = DataRandomFile(u_path, 0o600, size=199, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 15, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210915-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210915"], scope='class') + def test_20210917(self, env): + """Daily incremental backup. + """ + mtime = 1631781786 + u_path = Path("home", "jdoe", "rnd16.dat") + u_file = DataRandomFile(u_path, 0o600, size=24, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 9, 17, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-210917-daily.tar.bz2" + env.check_archive(archive_name, 'user', 'daily') + env.add_index(archive_name, 'serv', 'daily', + type='incr', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'daily') + + @pytest.mark.dependency(depends=["test_20210917"], scope='class') + def test_20211004(self, env): + """Quarterly full backup. + """ + mtime = 1633264335 + u_path = Path("home", "jdoe", "rnd17.dat") + u_file = DataRandomFile(u_path, 0o600, size=467, mtime=mtime) + u_parent = env.test_data[u_path.parent] + u_parent.mtime = mtime + env.add_test_data(('user',), [u_parent, u_file]) + setup_testdata(env.root, [u_file]) + + env.set_hostname("serv") + env.set_datetime(datetime.datetime(2021, 10, 4, 3, 10)) + env.run_backup_tool("backup-tool --verbose create --user jdoe") + archive_name = "jdoe-211004-quarterly.tar.bz2" + env.check_archive(archive_name, 'user', 'quarterly') + env.add_index(archive_name, 'serv', 'quarterly', + type='full', user='jdoe') + + env.run_backup_tool("backup-tool --verbose index") + env.check_index() + env.flush_test_data(('user',), 'monthly') + + class TestBackupToolDedup: """Test the dedup configration option. """ From 1326808c2f0da296967b436acbea320cd9c0f745 Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 11 Dec 2021 19:31:39 +0100 Subject: [PATCH 137/138] Add CHANGES.rst and LICENSE.txt to the files installed with the RPM --- MANIFEST.in | 1 + python-archive-tools.spec | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index 7526730..13dee76 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ include .version +include CHANGES.rst include LICENSE.txt include MANIFEST.in include README.rst diff --git a/python-archive-tools.spec b/python-archive-tools.spec index ab503e0..3534327 100644 --- a/python-archive-tools.spec +++ b/python-archive-tools.spec @@ -54,7 +54,8 @@ python3 setup.py test %files %defattr(-,root,root) -%doc README.rst +%doc README.rst CHANGES.rst +%license LICENSE.txt %config(noreplace) %{_sysconfdir}/backup.cfg %{python3_sitelib}/* %{_bindir}/* From 8c06ada439398f2f4b31c0cb2b43d81642b6fbda Mon Sep 17 00:00:00 2001 From: Rolf Krahl Date: Sat, 11 Dec 2021 19:32:56 +0100 Subject: [PATCH 138/138] Prepare release of 0.6, anticipated for tomorrow --- CHANGES.rst | 4 ++-- README.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 506cca0..8d13d3d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,8 +2,8 @@ Changelog ========= -0.6 (not yet released) -~~~~~~~~~~~~~~~~~~~~~~ +0.6 (2021-12-12) +~~~~~~~~~~~~~~~~ New features ------------ diff --git a/README.rst b/README.rst index 845217d..d682180 100644 --- a/README.rst +++ b/README.rst @@ -126,7 +126,7 @@ the site-packages directory of your Python installation. Copyright and License --------------------- -Copyright 2019–2020 Rolf Krahl +Copyright 2019–2021 Rolf Krahl Licensed under the `Apache License`_, Version 2.0 (the "License"); you may not use this file except in compliance with the License.