diff --git a/README.md b/README.md index 9bcd4a0..604a0e5 100644 --- a/README.md +++ b/README.md @@ -44,13 +44,33 @@ privateer2 configure replacing `` with the name of the machine within either the `servers` or `clients` section of your configuration. This sets up a special docker volume that will persist ssh keys and configurations so that communication between clients and servers is straightforward and secure. It also leaves a file `.privateer_identity` at the same location as the configuration file, which is used as the default identity for subsequent commands. Typically this is what you want. +Servers must be started before any backup is possible. To do this, run + +``` +privateer2 server start +``` + +Once started you can stop a server with `privateer2 server stop` (or just kill the container) and find out how it's getting on with `privateer2 server status` + ### Manual backup +To back up a volume onto one of your configured servers, run: + ``` privateer2 backup [--server=NAME] ``` -Add `--dry-run` to see the commands to run it yourself +Add `--dry-run` to see the commands to run it yourself. + +### Scheduled backups + +Each client can run a long-lived container to perform backups on some schedule using [`yacron`](https://github.com/gjcarneiro/yacron). If your client configuration contains a `schedule` section then you can run the command + +``` +privateer2 schedule start +``` + +to start the scheduled tasks. ### Restore @@ -68,6 +88,41 @@ For example, if you are on a "staging" machine, connecting to the "backup" serve privateer2 restore user_data --server=backup --source=production ``` +### Point-in-time backup and recovery + +Point-in-time backup is always taken on the server side, and converts a copy of a volume held on the server to a `tar` file, on the host machine and outside of any docker volume. These can then be manually copied around and use to initialise the contents of new volumes, in a way similar to the normal restore path. + +The command to export the volume is: + +``` +privateer2 export [--to-dir=PATH] [--source=NAME] +``` + +which will bring up a new container and create the tar file within the directory `PATH`. The name will be automatically generated and include the curent time, volume name and source. The `source` argument controls who backed the volume up in the first place, in the case where there are multiple clients. It can be omitted in the case where there is only one client performing backups, and **must** be ommitted in the case where you are exporting a local volume. + +You can point this command at any volume on any system where `privateer2` is installed to make a `tar` file; this might be useful for ad-hoc backup and recovery. If you have a volume called `redis_data`, then + +``` +privateer2 export redis_data +``` + +will create a new file `redis_data-.tar` in your working directory. + +Given a `tar` file, recovery looks like: + +``` +privateer2 [--dry-run] import +``` + +This does not need to be run anywhere with a `privateer.json` configuration, and indeed does not try and read one. It will fail if the volume exists already, making the command fairly safe. + +We could copy the file created in the `redis_data` example above to another machine and run + +``` +privateer2 import redis_data-.tar redis_data +``` + +to export the `tar` file into a new volume `redis_data`. ## What's the problem anyway? @@ -90,7 +145,7 @@ bob alice +-------------------+ +-----------------------+ ``` -so in this case `bob` runs a privateer client which sends data over ssh+rsync to a server running on `alice`, eventually meaning that the data in `volume1` on `bob` is replicated to `volume2` on `alice`. This process uses a set of ssh keys that each client and server will hold in a `keys` volume. This means that they do not interact with any ssh systems on the host. Note that if `alice` is also running sshd, this backup process will use a *second* ssh connection. +so in this case `bob` runs a `privateer2` client which sends data over ssh+rsync to a server running on `alice`, eventually meaning that the data in `volume1` on `bob` is replicated to `volume2` on `alice`. This process uses a set of ssh keys that each client and server will hold in a `keys` volume. This means that they do not interact with any ssh systems on the host. Note that if `alice` is also running sshd, this backup process will use a *second* ssh connection. In addition, we will support point-in-time backups on `alice`, creating `tar` files of the volume onto disk that can be easily restored onto any host. diff --git a/src/privateer2/cli.py b/src/privateer2/cli.py index 6326b4a..d76b7b5 100644 --- a/src/privateer2/cli.py +++ b/src/privateer2/cli.py @@ -6,6 +6,8 @@ privateer2 [options] check [--connection] privateer2 [options] backup [--server=NAME] privateer2 [options] restore [--server=NAME] [--source=NAME] + privateer2 [options] export [--to-dir=PATH] [--source=NAME] + privateer2 [options] import privateer2 [options] server (start | stop | status) privateer2 [options] schedule (start | stop | status) @@ -19,6 +21,14 @@ or server being acted on; the machine we are generating keys for, configuring, checking, serving, backing up from or restoring to. + Note that the 'import' subcommand is quite different and does not + interact with the configuration; it will reject options '--as' and + '--path'. If 'volume' exists already, it will fail, so this is + fairly safe. If running export with '--source=local' then the + configuration is not read - this can be used anywhere to create a + tar file of a local volume, which is suitable for importing with + 'import'. + The server and schedule commands start background containers that run forever (with the 'start' option). Check in on them with 'status' or stop them with 'stop'. @@ -38,6 +48,7 @@ from privateer2.restore import restore from privateer2.schedule import schedule_start, schedule_status, schedule_stop from privateer2.server import server_start, server_status, server_stop +from privateer2.tar import export_tar, export_tar_local, import_tar def pull(cfg): @@ -114,7 +125,25 @@ def _parse_opts(opts): return Call(_show_version) dry_run = opts["--dry-run"] - name = opts["--as"] + if opts["import"]: + _dont_use("--as", opts, "import") + _dont_use("--path", opts, "import") + return Call( + import_tar, + volume=opts[""], + tarfile=opts[""], + dry_run=dry_run, + ) + elif opts["export"] and opts["--source"] == "local": + _dont_use("--as", opts, "export --local") + _dont_use("--path", opts, "export --local") + return Call( + export_tar_local, + volume=opts[""], + to_dir=opts["--to-dir"], + dry_run=dry_run, + ) + path_config = _path_config(opts["--path"]) root_config = os.path.dirname(path_config) cfg = read_config(path_config) @@ -159,6 +188,16 @@ def _parse_opts(opts): source=opts["--source"], dry_run=dry_run, ) + elif opts["export"]: + return Call( + export_tar, + cfg=cfg, + name=name, + volume=opts[""], + to_dir=opts["--to-dir"], + source=opts["--source"], + dry_run=dry_run, + ) elif opts["server"]: if opts["start"]: return Call(server_start, cfg=cfg, name=name, dry_run=dry_run) diff --git a/src/privateer2/config.py b/src/privateer2/config.py index b63ebc4..510a73b 100644 --- a/src/privateer2/config.py +++ b/src/privateer2/config.py @@ -85,6 +85,9 @@ def machine_config(self, name): # this could be put elsewhere; we find the plausible sources (original # clients) that backed up a source to any server. def find_source(cfg, volume, source): + if volume not in cfg.list_volumes(): + msg = f"Unknown volume '{volume}'" + raise Exception(msg) for v in cfg.volumes: if v.name == volume and v.local: if source is not None: diff --git a/src/privateer2/tar.py b/src/privateer2/tar.py new file mode 100644 index 0000000..bef4587 --- /dev/null +++ b/src/privateer2/tar.py @@ -0,0 +1,129 @@ +import os + +import docker +from privateer2.check import check +from privateer2.config import find_source +from privateer2.util import ( + isotimestamp, + mounts_str, + run_container_with_command, + take_ownership, + volume_exists, +) + + +def export_tar(cfg, name, volume, *, to_dir=None, source=None, dry_run=False): + machine = check(cfg, name, quiet=True) + source = find_source(cfg, volume, source) + if not source: + return export_tar_local(volume, to_dir=to_dir, dry_run=dry_run) + + path = os.path.abspath(to_dir or "") + mounts = [ + docker.types.Mount("/export", path, type="bind"), + docker.types.Mount( + "/privateer", machine.data_volume, type="volume", read_only=True + ), + ] + tarfile = f"{source}-{volume}-{isotimestamp()}.tar" + src = f"/privateer/{source}/{volume}" + return _run_tar_create(mounts, src, path, tarfile, dry_run) + + +def export_tar_local(volume, *, to_dir=None, dry_run=False): + if not volume_exists(volume): + msg = f"Volume '{volume}' does not exist" + raise Exception(msg) + + path = os.path.abspath(to_dir or "") + mounts = [ + docker.types.Mount("/export", path, type="bind"), + docker.types.Mount("/privateer", volume, type="volume", read_only=True), + ] + tarfile = f"{volume}-{isotimestamp()}.tar" + src = "/privateer" + return _run_tar_create(mounts, src, path, tarfile, dry_run) + + +def import_tar(volume, tarfile, *, dry_run=False): + if volume_exists(volume): + msg = f"Volume '{volume}' already exists, please delete first" + raise Exception(msg) + if not os.path.exists(tarfile): + msg = f"Input file '{tarfile}' does not exist" + raise Exception(msg) + + # Use ubuntu (not alpine) because we will require the -p tag to + # preserve permissions on tar + image = "ubuntu" + tarfile = os.path.abspath(tarfile) + mounts = [ + docker.types.Mount("/src.tar", tarfile, type="bind", read_only=True), + docker.types.Mount("/privateer", volume, type="volume"), + ] + working_dir = "/privateer" + command = ["tar", "-xvpf", "/src.tar"] + if dry_run: + cmd = [ + "docker", + "run", + "--rm", + *mounts_str(mounts), + "-w", + working_dir, + image, + *command, + ] + print("Command to manually run import:") + print() + print(f" docker volume create {volume}") + print(f" {' '.join(cmd)}") + else: + docker.from_env().volumes.create(volume) + run_container_with_command( + "Import", + image, + command=command, + mounts=mounts, + working_dir=working_dir, + ) + + +def _run_tar_create(mounts, src, path, tarfile, dry_run): + image = "ubuntu" + command = ["tar", "-cpvf", f"/export/{tarfile}", "."] + if dry_run: + cmd = [ + "docker", + "run", + "--rm", + *mounts_str(mounts), + "-w", + src, + image, + *command, + ] + print("Command to manually run export:") + print() + print(f" {' '.join(cmd)}") + print() + print("(pay attention to the final '.' in the above command!)") + print() + print("Note that this file will have root ownership after creation") + print(f"You can fix that with 'sudo chown $(whoami) {tarfile}'") + print("or") + print() + cmd_own = take_ownership(tarfile, path, command_only=True) + print(f" {' '.join(cmd_own)}") + else: + run_container_with_command( + "Export", + image, + command=command, + mounts=mounts, + working_dir=src, + ) + print("Taking ownership of file") + take_ownership(tarfile, path) + print(f"Tar file ready at '{path}/{tarfile}'") + return os.path.join(path, tarfile) diff --git a/src/privateer2/util.py b/src/privateer2/util.py index e25e05b..e7b4fd7 100644 --- a/src/privateer2/util.py +++ b/src/privateer2/util.py @@ -232,6 +232,7 @@ def take_ownership(filename, directory, *, command_only=False): # tar return [ "docker", "run", + "--rm", *mounts_str(mounts), "-w", "/src", diff --git a/tests/test_cli.py b/tests/test_cli.py index cbc1ffb..00fe67d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -33,6 +33,17 @@ def test_can_parse_version(): assert res.kwargs == {} +def test_can_parse_import(): + res = _parse_argv(["import", "--dry-run", "f", "v"]) + assert res.target == privateer2.cli.import_tar + assert res.kwargs == {"volume": "v", "tarfile": "f", "dry_run": True} + assert not _parse_argv(["import", "f", "v"]).kwargs["dry_run"] + with pytest.raises(Exception, match="Don't use '--path' with 'import'"): + _parse_argv(["--path=privateer.json", "import", "f", "v"]) + with pytest.raises(Exception, match="Don't use '--as' with 'import'"): + _parse_argv(["--as=alice", "import", "f", "v"]) + + def test_can_parse_keygen_all(): res = _parse_argv(["keygen", "--path=example/simple.json", "--all"]) assert res.target == privateer2.cli.keygen_all @@ -212,6 +223,37 @@ def test_can_parse_complex_restore(tmp_path): } +def test_can_parse_export(tmp_path): + shutil.copy("example/simple.json", tmp_path / "privateer.json") + with open(tmp_path / ".privateer_identity", "w") as f: + f.write("alice\n") + with transient_working_directory(tmp_path): + res = _parse_argv(["export", "v"]) + assert res.target == privateer2.cli.export_tar + assert res.kwargs == { + "cfg": read_config("example/simple.json"), + "name": "alice", + "volume": "v", + "to_dir": None, + "source": None, + "dry_run": False, + } + + +def test_can_parse_local_export(): + res = _parse_argv(["export", "v", "--source=local"]) + assert res.target == privateer2.cli.export_tar_local + assert res.kwargs == { + "volume": "v", + "to_dir": None, + "dry_run": False, + } + with pytest.raises(Exception, match="Don't use '--as'"): + _parse_argv(["export", "v", "--source=local", "--as=bob"]) + with pytest.raises(Exception, match="Don't use '--path'"): + _parse_argv(["export", "v", "--source=local", "--path=p"]) + + def test_error_if_unknown_identity(tmp_path): shutil.copy("example/simple.json", tmp_path / "privateer.json") msg = "Can't determine identity; did you forget to configure" diff --git a/tests/test_config.py b/tests/test_config.py index 54b4e9b..1b6f85b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -112,12 +112,14 @@ def test_can_find_appropriate_source(): msg = "Invalid source 'alice': valid options: 'bob', 'carol'" with pytest.raises(Exception, match=msg): find_source(cfg, "data", "alice") + with pytest.raises(Exception, match="Unknown volume 'unknown'"): + find_source(cfg, "unknown", "alice") def test_can_find_appropriate_source_if_local(): cfg = read_config("example/simple.json") cfg.volumes[0].local = True - find_source(cfg, "data", None) + assert find_source(cfg, "data", None) is None msg = "'data' is a local source, so 'source' must be empty" with pytest.raises(Exception, match=msg): find_source(cfg, "data", "bob") diff --git a/tests/test_tar.py b/tests/test_tar.py new file mode 100644 index 0000000..418d0d5 --- /dev/null +++ b/tests/test_tar.py @@ -0,0 +1,185 @@ +import os +import tarfile +from unittest.mock import MagicMock, call + +import pytest +import vault_dev + +import docker +import privateer2.tar +import privateer2.util +from privateer2.config import read_config +from privateer2.configure import configure +from privateer2.keys import keygen_all +from privateer2.tar import export_tar, export_tar_local, import_tar + + +def test_can_print_instructions_for_exporting_local_vol(managed_docker, capsys): + vol = managed_docker("volume") + privateer2.util.string_to_volume("hello", vol, "test") + path = export_tar_local(vol, dry_run=True) + out = capsys.readouterr() + lines = out.out.strip().split("\n") + assert "Command to manually run export:" in lines + assert "(pay attention to the final '.' in the above command!)" in lines + cmd = ( + f" docker run --rm " + f"-v {os.getcwd()}:/export -v {vol}:/privateer:ro " + f"-w /privateer ubuntu tar -cpvf /export/{os.path.basename(path)} ." + ) + assert cmd in lines + + +def test_can_export_local_volume(tmp_path, managed_docker): + vol = managed_docker("volume") + privateer2.util.string_to_volume("hello", vol, "test") + path = export_tar_local(vol, to_dir=tmp_path) + assert len(os.listdir(tmp_path)) == 1 + assert os.listdir(tmp_path)[0] == os.path.basename(path) + with tarfile.open(path, "r") as f: + assert f.getnames() == [".", "./test"] + + +def test_can_print_instructions_for_export_volume(managed_docker, capsys): + with vault_dev.Server(export_token=True) as server: + cfg = read_config("example/simple.json") + cfg.vault.url = server.url() + vol_keys = managed_docker("volume") + vol_data = managed_docker("volume") + name = managed_docker("container") + cfg.servers[0].key_volume = vol_keys + cfg.servers[0].data_volume = vol_data + cfg.servers[0].container = name + keygen_all(cfg) + configure(cfg, "alice") + capsys.readouterr() + path = export_tar(cfg, "alice", "data", dry_run=True) + out = capsys.readouterr() + lines = out.out.strip().split("\n") + assert "Command to manually run export:" in lines + assert "(pay attention to the final '.' in the above command!)" in lines + cmd = ( + f" docker run --rm " + f"-v {os.getcwd()}:/export -v {vol_data}:/privateer:ro " + "-w /privateer/bob/data " + f"ubuntu tar -cpvf /export/{os.path.basename(path)} ." + ) + assert cmd in lines + + +def test_can_export_managed_volume(monkeypatch, managed_docker): + mock_tar_create = MagicMock() + monkeypatch.setattr(privateer2.tar, "_run_tar_create", mock_tar_create) + with vault_dev.Server(export_token=True) as server: + cfg = read_config("example/simple.json") + cfg.vault.url = server.url() + vol = managed_docker("volume") + cfg.servers[0].key_volume = managed_docker("volume") + cfg.servers[0].data_volume = vol + cfg.servers[0].container = managed_docker("container") + keygen_all(cfg) + configure(cfg, "alice") + path = export_tar(cfg, "alice", "data") + assert path == mock_tar_create.return_value + assert mock_tar_create.call_count == 1 + call_args = mock_tar_create.call_args + path = os.path.abspath("") + mounts = [ + docker.types.Mount("/export", path, type="bind"), + docker.types.Mount("/privateer", vol, type="volume", read_only=True), + ] + tarfile = call_args[0][3] + src = "/privateer/bob/data" + assert call_args == call(mounts, src, path, tarfile, False) + + +def test_can_export_local_managed_volume(monkeypatch, managed_docker): + mock_tar_local = MagicMock() + mock_tar_create = MagicMock() + monkeypatch.setattr(privateer2.tar, "_run_tar_create", mock_tar_create) + monkeypatch.setattr(privateer2.tar, "export_tar_local", mock_tar_local) + with vault_dev.Server(export_token=True) as server: + cfg = read_config("example/local.json") + cfg.vault.url = server.url() + vol_keys = managed_docker("volume") + vol_data = managed_docker("volume") + vol_other = managed_docker("volume") + name = managed_docker("container") + cfg.servers[0].key_volume = vol_keys + cfg.servers[0].data_volume = vol_data + cfg.servers[0].container = name + cfg.volumes[1].name = vol_other + keygen_all(cfg) + configure(cfg, "alice") + path = export_tar(cfg, "alice", vol_other) + assert mock_tar_create.call_count == 0 + assert mock_tar_local.call_count == 1 + assert mock_tar_local.call_args == call( + vol_other, to_dir=None, dry_run=False + ) + assert path == mock_tar_local.return_value + + +def test_throw_if_local_volume_does_not_exist(managed_docker): + vol = managed_docker("volume") + msg = f"Volume '{vol}' does not exist" + with pytest.raises(Exception, match=msg): + export_tar_local(vol) + + +def test_throw_if_volume_does_not_exist(managed_docker): + with vault_dev.Server(export_token=True) as server: + cfg = read_config("example/simple.json") + cfg.vault.url = server.url() + vol = managed_docker("volume") + cfg.servers[0].key_volume = managed_docker("volume") + cfg.servers[0].data_volume = vol + cfg.servers[0].container = managed_docker("container") + keygen_all(cfg) + configure(cfg, "alice") + with pytest.raises(Exception, match="Unknown volume 'unknown'"): + export_tar(cfg, "alice", "unknown") + + +def test_import_volume(managed_docker, tmp_path): + src = managed_docker("volume") + dest = managed_docker("volume") + privateer2.util.string_to_volume("hello", src, "test") + path = export_tar_local(src, to_dir=tmp_path) + import_tar(dest, path) + assert privateer2.util.string_from_volume(dest, "test") == "hello" + + +def test_instructions_to_import_volume(managed_docker, tmp_path, capsys): + src = managed_docker("volume") + dest = managed_docker("volume") + privateer2.util.string_to_volume("hello", src, "test") + path = export_tar_local(src, to_dir=tmp_path) + capsys.readouterr() + import_tar(dest, path, dry_run=True) + out = capsys.readouterr() + lines = out.out.strip().split("\n") + cmd = ( + f" docker run --rm -v {path}:/src.tar:ro " + f"-v {dest}:/privateer -w /privateer ubuntu tar -xvpf /src.tar" + ) + assert "Command to manually run import:" in lines + assert f" docker volume create {dest}" in lines + assert cmd in lines + + +def test_dont_import_into_existing_volume(tmp_path, managed_docker): + dest = managed_docker("volume") + docker.from_env().volumes.create(dest) + path = str(tmp_path / "foo.tar") + msg = f"Volume '{dest}' already exists, please delete first" + with pytest.raises(Exception, match=msg): + import_tar(dest, path) + + +def test_throw_if_tarfile_does_not_exist(tmp_path, managed_docker): + dest = managed_docker("volume") + path = str(tmp_path / "foo.tar") + msg = f"Input file '{path}' does not exist" + with pytest.raises(Exception, match=msg): + import_tar(dest, path) diff --git a/tests/test_util.py b/tests/test_util.py index 912fcea..5ccc757 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -148,6 +148,7 @@ def test_can_take_ownership_of_a_file(tmp_path, managed_docker): expected = [ "docker", "run", + "--rm", *privateer2.util.mounts_str(mounts), "-w", "/src",