diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e8d131ae..28b94e9c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,6 +30,9 @@ jobs: with: packages: ${{ env.SYSTEM_PACKAGES }} + - name: Install bubblewrap + run: sudo apt update -y && sudo apt install bubblewrap -y + - uses: actions/cache@v3 with: path: ~/.cache/pip diff --git a/damnit/backend/extract_data.py b/damnit/backend/extract_data.py index 6dd91bd6..18da962f 100644 --- a/damnit/backend/extract_data.py +++ b/damnit/backend/extract_data.py @@ -1,4 +1,5 @@ import argparse +import contextlib import getpass import os import logging @@ -27,6 +28,7 @@ from ..ctxsupport.ctxrunner import get_user_variables from ..definitions import UPDATE_BROKERS, UPDATE_TOPIC from .db import DamnitDB +from .sandboxing import Bubblewrap @@ -92,7 +94,7 @@ def loop(): def extract_in_subprocess( proposal, run, out_path, cluster=False, run_data=RunData.ALL, match=(), - python_exe=None, mock=False, tee_output=None + python_exe=None, mock=False, tee_output=None, sandbox=True, ): if not python_exe: python_exe = sys.executable @@ -106,6 +108,24 @@ def extract_in_subprocess( for m in match: args.extend(['--match', m]) + if sandbox: + bubblewrap = Bubblewrap() + with contextlib.suppress(Exception): + bubblewrap.add_bind_proposal(proposal) + bubblewrap.add_bind_venv(Path(sys.executable)) + if python_exe and Path(sys.executable) != Path(python_exe): + bubblewrap.add_bind_venv(Path(python_exe)) + bubblewrap.add_bind(Path(__file__).parents[1] / 'ctxsupport') # ctxsupport_dir + bubblewrap.add_bind(out_path.parent.absolute()) + # NOTE: do not bind mount in the file path, this mount is done via inodes so if + # the file is updated by being overwritten then the mounted version will not be + # in sync + bubblewrap.add_bind(Path.cwd().absolute()) + # TODO: done to get both the context file and the DB into the container, should + # be changed to work off of their actual paths instead of assuming it's in cwd + + args = bubblewrap.build_command(args) + with TemporaryDirectory() as td: # Save a separate copy of the reduced data, so we can send an update # with only the variables that we've extracted. @@ -217,7 +237,8 @@ def add_to_db(reduced_data, db: sqlite3.Connection, proposal, run): class Extractor: _proposal = None - def __init__(self): + def __init__(self, sandbox=True): + self.sandbox = sandbox self.db = DamnitDB() self.kafka_prd = KafkaProducer( bootstrap_servers=UPDATE_BROKERS, @@ -266,6 +287,7 @@ def extract_and_ingest(self, proposal, run, cluster=False, reduced_data = extract_in_subprocess( proposal, run, out_path, cluster=cluster, run_data=run_data, match=match, python_exe=python_exe, mock=mock, tee_output=tee_output, + sandbox=self.sandbox ) log.info("Reduced data has %d fields", len(reduced_data)) add_to_db(reduced_data, self.db.conn, proposal, run) @@ -285,6 +307,9 @@ def extract_and_ingest(self, proposal, run, cluster=False, for m in match: python_cmd.extend(["--match", m]) + if not self.sandbox: + python_cmd.append("--no-sandbox") + res = subprocess.run([ 'sbatch', '--parsable', *self.slurm_options(), @@ -305,9 +330,9 @@ def proposal_runs(proposal): return set(int(p.stem[1:]) for p in raw_dir.glob("*")) -def reprocess(runs, proposal=None, match=(), mock=False): +def reprocess(runs, proposal=None, match=(), mock=False, sandbox=True): """Called by the 'amore-proto reprocess' subcommand""" - extr = Extractor() + extr = Extractor(sandbox=sandbox) if proposal is None: proposal = extr.proposal @@ -376,6 +401,7 @@ def reprocess(runs, proposal=None, match=(), mock=False): ap.add_argument('run', type=int) ap.add_argument('run_data', choices=('raw', 'proc', 'all')) ap.add_argument('--cluster-job', action="store_true") + ap.add_argument('--no-sandbox', action="store_true") ap.add_argument('--match', action="append", default=[]) args = ap.parse_args() logging.basicConfig(level=logging.INFO, @@ -389,7 +415,7 @@ def reprocess(runs, proposal=None, match=(), mock=False): log.info("Extracting cluster variables in Slurm job %s on %s", os.environ.get('SLURM_JOB_ID', '?'), socket.gethostname()) - Extractor().extract_and_ingest(args.proposal, args.run, + Extractor(sandbox=not args.no_sandbox).extract_and_ingest(args.proposal, args.run, cluster=args.cluster_job, run_data=RunData(args.run_data), match=args.match) diff --git a/damnit/backend/listener.py b/damnit/backend/listener.py index bd4199e4..c7347b4d 100644 --- a/damnit/backend/listener.py +++ b/damnit/backend/listener.py @@ -67,7 +67,8 @@ def watch_processes_finish(q: queue.Queue): class EventProcessor: - def __init__(self, context_dir=Path('.')): + def __init__(self, sandbox: bool, context_dir=Path('.')): + self.sandbox = sandbox self.context_dir = context_dir self.db = DamnitDB.from_dir(context_dir) # Fail fast if read-only - https://stackoverflow.com/a/44707371/434217 @@ -147,13 +148,18 @@ def handle_event(self, record, msg: dict, run_data: RunData): with log_path.open('ab') as logf: # Create subprocess to process the run - extract_proc = subprocess.Popen([ - sys.executable, '-m', 'damnit.backend.extract_data', - str(proposal), str(run), run_data.value - ], cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT) + cmd = [sys.executable, '-m', 'damnit.backend.extract_data', str(proposal), + str(run), run_data.value] + + if not self.sandbox: + cmd.append('--no-sandbox') + + extract_proc = subprocess.Popen( + cmd, cwd=self.context_dir, stdout=logf, stderr=subprocess.STDOUT + ) self.extract_procs_queue.put((proposal, run, extract_proc)) -def listen(): +def listen(sandbox: bool): # Set up logging to a file file_handler = logging.FileHandler("amore.log") formatter = logging.root.handlers[0].formatter @@ -161,8 +167,14 @@ def listen(): logging.root.addHandler(file_handler) log.info(f"Running on {platform.node()} under user {getpass.getuser()}, PID {os.getpid()}") + + if sandbox: + log.info("Sandboxing of processes enabled") + else: + log.warning("Sandboxing disabled") + try: - with EventProcessor() as processor: + with EventProcessor(sandbox=sandbox) as processor: processor.run() except KeyboardInterrupt: log.error("Stopping on Ctrl + C") @@ -176,6 +188,3 @@ def listen(): # can start the backend). if os.stat("amore.log").st_uid == os.getuid(): os.chmod("amore.log", 0o666) - -if __name__ == '__main__': - listen() diff --git a/damnit/backend/sandboxing.py b/damnit/backend/sandboxing.py new file mode 100644 index 00000000..aa5c8597 --- /dev/null +++ b/damnit/backend/sandboxing.py @@ -0,0 +1,149 @@ +from __future__ import annotations + +from pathlib import Path +import shlex +import subprocess + +from extra_data.read_machinery import find_proposal + + +class Bubblewrap: + """A class representing a sandbox environment using Bubblewrap. + + Bubblewrap is a sandboxing tool that creates a restricted environment for processes, + this class provides methods to configure and build a bubblewrap sandbox for running + a context file such that it only has access to data from the relevant proposal. + + Attributes: + command (list[str]): The base command for running in the sandbox. + command_binds (list[tuple[str, str]]): List of bind mounts commands. + """ + + def __init__(self): + self.command = [ + "bwrap", + "--die-with-parent", # Kill sandbox if parent process dies + "--unshare-all", # Unshare all namespaces + "--share-net", # Share network namespace + "--dev", "/dev", # Bind mount /dev + "--bind", "/tmp", "/tmp", # Bind mount tmp in + "--dir", "/gpfs", # Create empty directory at /gpfs + ] + + self.command_binds: list[tuple[str, str, str]] = [] + + for path in ( + "/bin", + "/etc/resolv.conf", + "/lib", + "/lib64", + "/sbin", + "/usr", + ): + self.add_bind(Path(path), ro=True) + + if Path("/gpfs/exfel/sw/software").exists(): + self.add_bind(Path("/gpfs/exfel/sw/software"), ro=True) + + def add_bind( + self, source: Path, dest: Path | None = None, ro: bool = False + ) -> None: + """Adds a bind mount to the sandbox. + + !!! warning + + Bind mounts are done on inodes, if a program updates a file mounted into the + sandbox by overwriting it then the inode changes and the file will not be + in sync between host/sandbox. To avoid this mount the parent directory. + + Args: + source (Path): The source path to be bind mounted. + dest (Path, optional): The destination path in the sandbox. If not provided, the source path is used. + ro (bool, optional): Whether the bind mount should be read-only. Defaults to False. + + Raises: + ValueError: If the source path is not absolute. + """ + if not source.is_absolute(): + raise ValueError("Source path must be absolute") + + if dest is None: + dest = source + + self.command_binds.append( + ( + f"--{'ro-' if ro else ''}bind", + shlex.quote(str(source)), + shlex.quote(str(dest)), + ) + ) + + def add_bind_proposal(self, proposal_id: int) -> None: + """Adds bind mounts for a proposal directory and its contents. + + Args: + proposal_id (int): The ID of the proposal. + + Raises: + FileNotFoundError: If the proposal directory is not found. + """ + proposal_dir = Path(find_proposal(f"p{proposal_id:06d}")) + + self.add_bind(proposal_dir) + + for path in proposal_dir.iterdir(): + self.add_bind(path.resolve()) + + def add_bind_venv(self, python_exec: Path) -> None: + """Adds all paths required by a virtual environment to the sandbox. + + This function will use the given python executable to first call `sys.prefix` to + check if the executable is in a venv, if it is then `sysconfig.get_paths()` is + used to find required paths and add them paths as read-only binds. + + Args: + python_exec (Path): The path to the Python executable. + + Raises: + subprocess.CalledProcessError: If the command to get the virtual environment paths fails. + """ + venv = subprocess.check_output( + [python_exec, "-c", "import sys; print(sys.prefix != sys.base_prefix)"] + ).decode("utf-8") + + if venv == "False": + return + + paths = subprocess.check_output( + [ + python_exec, + "-c", + 'import sysconfig; print(" ".join(v for v in sysconfig.get_paths().values()))', + ] + ).decode("utf-8") + + for path in paths.split(): + path = Path(path) + self.add_bind(path, ro=True) + if path.is_symlink(): + self.add_bind(path.resolve(), ro=True) + + def build_command(self, command: str | list[str]) -> list[str]: + """Builds the final command for running in the sandbox. + + Args: + command (str or list[str]): The command to be executed in the sandbox. + + Returns: + list[str]: The final command for running in the sandbox. + """ + _command = self.command.copy() + + for bind in self.command_binds: + _command.extend(bind) + + _command.append("--") # End of bubblewrap arguments + + _command.extend(command if isinstance(command, list) else [command]) + + return _command diff --git a/damnit/backend/supervisord.py b/damnit/backend/supervisord.py index fca0846d..bc8db9c8 100644 --- a/damnit/backend/supervisord.py +++ b/damnit/backend/supervisord.py @@ -80,7 +80,7 @@ def write_supervisord_conf(root_path): if config_path.stat().st_uid == os.getuid(): os.chmod(config_path, 0o666) -def start_backend(root_path: Path, try_again=True): +def start_backend(root_path: Path, sandbox: bool, try_again=True): config_path = root_path / "supervisord.conf" if not config_path.is_file(): write_supervisord_conf(root_path) @@ -106,10 +106,10 @@ def start_backend(root_path: Path, try_again=True): return False if try_again: - return start_backend(root_path, try_again=False) + return start_backend(root_path, sandbox, try_again=False) elif rc == 3: # 3 means it's stopped and we need to start the program - cmd = subprocess.run([*supervisorctl, "start", "damnit"]) + cmd = subprocess.run([*supervisorctl, "start", "damnit", "" if sandbox else "--no-sandbox"]) if cmd.returncode != 0: log.error(f"Couldn't start supervisord, tried to run command: {' '.join(cmd)}\n" f"Return code: {cmd.returncode}" @@ -134,7 +134,7 @@ def start_backend(root_path: Path, try_again=True): return True -def initialize_and_start_backend(root_path, proposal=None): +def initialize_and_start_backend(root_path, sandbox = True, proposal=None): # Ensure the directory exists root_path.mkdir(parents=True, exist_ok=True) if root_path.stat().st_uid == os.getuid(): @@ -160,4 +160,4 @@ def initialize_and_start_backend(root_path, proposal=None): os.chmod(context_path, 0o666) # Start backend - return start_backend(root_path) + return start_backend(root_path, sandbox) diff --git a/damnit/backend/test_listener.py b/damnit/backend/test_listener.py index 6a5a7ed9..5c80d2c4 100644 --- a/damnit/backend/test_listener.py +++ b/damnit/backend/test_listener.py @@ -55,9 +55,9 @@ def run(self): log.error("Error processing event", exc_info=True) -def listen(): +def listen(sandbox: bool): try: - with TestEventProcessor() as processor: + with TestEventProcessor(sandbox) as processor: processor.run() except KeyboardInterrupt: print("Stopping on Ctrl-C") diff --git a/damnit/cli.py b/damnit/cli.py index 58851093..182f1eba 100644 --- a/damnit/cli.py +++ b/damnit/cli.py @@ -1,6 +1,7 @@ import inspect import logging import os +import shutil import sys import textwrap import traceback @@ -73,6 +74,10 @@ def main(): '--daemonize', action='store_true', help="Start the listener under a separate process managed by supervisord." ) + listen_ap.add_argument( + '--no-sandbox', action='store_true', + help="Start with sandboxing disabled." + ) listen_ap.add_argument( 'context_dir', type=Path, nargs='?', default='.', help="Directory to store summarised results" @@ -82,6 +87,9 @@ def main(): 'reprocess', help="Extract data from specified runs. This does not send live updates yet." ) + reprocess_ap.add_argument( + "--no-sandbox", action="store_true", + ) reprocess_ap.add_argument( "--mock", action="store_true", help="Use a fake run object instead of loading one from disk." @@ -164,11 +172,19 @@ def main(): from .backend.db import db_path from .backend import initialize_and_start_backend + sandbox = not args.no_sandbox + + if sandbox and not shutil.which("bwrap"): + raise RuntimeError( + "`bwrap` executable not found in $PATH, add it to $PATH or start " + "backend/listener with `--no-sandbox`" + ) + if args.daemonize: if not db_path(args.context_dir).is_file(): sys.exit("You must create a database with `amore-proto proposal` before starting the listener.") - return initialize_and_start_backend(args.context_dir) + return initialize_and_start_backend(args.context_dir, sandbox=sandbox) else: if args.test: from .backend.test_listener import listen @@ -176,14 +192,14 @@ def main(): from .backend.listener import listen os.chdir(args.context_dir) - return listen() + return listen(sandbox=sandbox) elif args.subcmd == 'reprocess': # Hide some logging from Kafka to make things more readable logging.getLogger('kafka').setLevel(logging.WARNING) from .backend.extract_data import reprocess - reprocess(args.run, args.proposal, args.match, args.mock) + reprocess(args.run, args.proposal, args.match, args.mock, not args.no_sandbox) elif args.subcmd == 'proposal': from .backend.db import DamnitDB diff --git a/damnit/gui/main_window.py b/damnit/gui/main_window.py index 2b07937d..dbab20ef 100644 --- a/damnit/gui/main_window.py +++ b/damnit/gui/main_window.py @@ -1137,7 +1137,7 @@ def prompt_setup_db_and_backend(context_dir: Path, prop_no=None, parent=None): ) if not ok: return False - initialize_and_start_backend(context_dir, prop_no) + initialize_and_start_backend(context_dir, proposal=prop_no) # Check if the backend is running elif not backend_is_running(context_dir): @@ -1147,7 +1147,7 @@ def prompt_setup_db_and_backend(context_dir: Path, prop_no=None, parent=None): "This is only necessary if new runs are expected." ) if button == QMessageBox.Yes: - initialize_and_start_backend(context_dir, prop_no) + initialize_and_start_backend(context_dir, proposal=prop_no) return True diff --git a/tests/conftest.py b/tests/conftest.py index 4db8cfbc..4d2cc81a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -26,7 +26,7 @@ def make_mock_db(ctx, mock_db): os.chdir(db_dir) reduced_data = extract_in_subprocess(1234, 42, extracted_data_dir / "p1234_r42.h5", - mock=True) + mock=True, sandbox=False) add_to_db(reduced_data, db.conn, 1234, 42) finally: os.chdir(cwd) diff --git a/tests/test_backend.py b/tests/test_backend.py index 1fc41d60..d1b89dcf 100644 --- a/tests/test_backend.py +++ b/tests/test_backend.py @@ -354,7 +354,8 @@ def test_add_to_db(mock_db): np.testing.assert_array_equal(pickle.loads(row["image"]), reduced_data["image"]) assert row["none"] == reduced_data["none"] -def test_extractor(mock_ctx, mock_db, mock_run, monkeypatch): +@pytest.mark.parametrize("sandbox", [True, False]) +def test_extractor(mock_ctx, mock_db, mock_run, monkeypatch, sandbox): # Change to the DB directory db_dir, db = mock_db db.metameta["proposal"] = 1234 @@ -382,7 +383,7 @@ def slurm_scalar(run): # Create Extractor with a mocked KafkaProducer with patch(f"{pkg}.KafkaProducer") as _: - extractor = Extractor() + extractor = Extractor(sandbox=sandbox) # Test regular variables and slurm variables are executed reduced_data = { "array": np.arange(10) } @@ -391,9 +392,14 @@ def slurm_scalar(run): extractor.extract_and_ingest(1234, 42, cluster=False, run_data=RunData.ALL) extract_in_subprocess.assert_called_once() + assert extract_in_subprocess.call_args[1].get("sandbox") == sandbox extractor.kafka_prd.send.assert_called_once() subprocess_run.assert_called_once() + if not sandbox: + # checks flag in value of sbatch wrap + assert "--no-sandbox" in subprocess_run.call_args[0][0][-1] + # This works because we loaded damnit.context above from ctxrunner import main @@ -545,7 +551,7 @@ def subprocess_runner(): pkg = "damnit.backend.supervisord" with patch(f"{pkg}.write_supervisord_conf", side_effect=mock_write_supervisord_conf): - assert initialize_and_start_backend(db_dir, 1234) + assert initialize_and_start_backend(db_dir, proposal=1234) # The directory should be created if it doesn't exist assert db_dir.is_dir() diff --git a/tests/test_cli.py b/tests/test_cli.py index 709c6178..da2d89c2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -113,6 +113,18 @@ def amore_proto(args): main() listen.assert_called_once() + with (amore_proto(["listen", "--no-sandbox"]), + patch(f"{pkg}.listener.listen") as listen): + main() + listen.assert_called_once_with(sandbox=False) + + with (amore_proto(["listen"]), + patch("shutil.which") as shutil): + shutil.return_value = None + # Check error raised when sandbox executable not found + with pytest.raises(RuntimeError): + main() + # Should fail without an existing database with (amore_proto(["listen", "--daemonize"]) as initialize_and_start_backend, pytest.raises(SystemExit)): diff --git a/tests/test_gui.py b/tests/test_gui.py index 85065222..728bfa0c 100644 --- a/tests/test_gui.py +++ b/tests/test_gui.py @@ -324,7 +324,7 @@ def helper_patch(): # We expect the database to be initialized and the backend started win.autoconfigure.assert_called_once_with(db_dir, proposal=1234) - initialize_and_start_backend.assert_called_once_with(db_dir, 1234) + initialize_and_start_backend.assert_called_once_with(db_dir, proposal=1234) # Create the directory and database file to fake the database already existing db_dir.mkdir(parents=True) @@ -346,7 +346,7 @@ def helper_patch(): # This time the database is already initialized win.autoconfigure.assert_called_once_with(db_dir, proposal=1234) - initialize_and_start_backend.assert_called_once_with(db_dir, 1234) + initialize_and_start_backend.assert_called_once_with(db_dir, proposal=1234) def test_user_vars(mock_ctx_user, mock_user_vars, mock_db, qtbot): @@ -522,10 +522,10 @@ def get_value_from_db(field_name): raise ValueError(f"Error in field_name: the variable name '{field_name}' is not of the form '[a-zA-Z_]\\w+'") return db.conn.execute(f"SELECT {field_name} FROM runs WHERE runnr = ?", (run_number,)).fetchone()[0] - # Check that editing is prevented when trying to modfiy a non-editable column + # Check that editing is prevented when trying to modfiy a non-editable column assert open_editor_and_get_delegate("dep_number").widget is None - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_number").widget is not None change_to_value_and_close("15.4") @@ -536,7 +536,7 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert abs(get_value_from_db("user_number") - 15.4) < 1e-5 - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_number").widget is not None # Try to assign a value of the wrong type @@ -544,7 +544,7 @@ def get_value_from_db(field_name): # Check that the value is still the same as before assert abs(get_value_from_field("user_number") - 15.4) < 1e-5 - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_number").widget is not None # Try to assign an empty value (i.e. deletes the cell) @@ -554,7 +554,7 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert get_value_from_db("user_number") is None - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_integer").widget is not None change_to_value_and_close("42") @@ -565,7 +565,7 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert get_value_from_db("user_integer") == 42 - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_integer").widget is not None # Try to assign an empty value (i.e. deletes the cell) @@ -575,7 +575,7 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert get_value_from_db("user_integer") is None - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_string").widget is not None change_to_value_and_close("Cool string") @@ -585,7 +585,7 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert get_value_from_db("user_string") == "Cool string" - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_string").widget is not None # Try to assign an empty value (i.e. deletes the cell) @@ -595,7 +595,7 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert get_value_from_db("user_string") is None - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_boolean").widget is not None change_to_value_and_close("T") @@ -605,14 +605,14 @@ def get_value_from_db(field_name): # Check that the value in the db matches what was typed in the table assert get_value_from_db("user_boolean") - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_boolean").widget is not None change_to_value_and_close("no") # Check that the value in the table is of the correct type and value assert not get_value_from_field("user_boolean") - # Check that editing is allowed when trying to modfiy a user editable column + # Check that editing is allowed when trying to modfiy a user editable column assert open_editor_and_get_delegate("user_boolean").widget is not None # Try to assign an empty value (i.e. deletes the cell) diff --git a/tests/test_sandboxing.py b/tests/test_sandboxing.py new file mode 100644 index 00000000..4c1ac44e --- /dev/null +++ b/tests/test_sandboxing.py @@ -0,0 +1,153 @@ +import os +import subprocess +import sysconfig +from pathlib import Path +from unittest.mock import MagicMock +import sys +from mock import patch + +import pytest +from damnit.backend.extract_data import extract_in_subprocess + +from damnit.backend.sandboxing import Bubblewrap + + +@pytest.fixture +def bubblewrap(): + return Bubblewrap() + + +@pytest.fixture(scope="session") +def mock_proposal_1111(tmp_path_factory): + proposal_str = "p0001111" + root: Path = tmp_path_factory.mktemp("root") + + # 'real' directories + usr = root / "u" / "usr" / proposal_str + raw = root / "pnfs" / "archive" / proposal_str + + for d in (usr, raw): + d.mkdir(parents=True) + + # fake gpfs structure - proposal dir with symlinks to usr and raw + gpfs = root / "gpfs" + p = gpfs / "p0001111" + + p.mkdir(parents=True) + + usr_link = p / "usr" + usr_link.symlink_to(usr) + + raw_link = p / "raw" + raw_link.symlink_to(raw) + + return p + + +@pytest.mark.parametrize( + "src,dest,ro,expected", + [ + (Path("/source"), None, False, ("--bind", "/source", "/source")), + (Path("/source"), None, True, ("--ro-bind", "/source", "/source")), + (Path("/source"), Path("/dest"), False, ("--bind", "/source", "/dest")), + ], +) +def test_add_bind(bubblewrap, src, dest, ro, expected): + bubblewrap.add_bind(src, dest, ro) + + assert expected in bubblewrap.command_binds + + +def test_add_bind_proposal(bubblewrap, monkeypatch, mock_proposal_1111): + proposal_id = 1111 + + find_proposal = MagicMock(return_value=str(mock_proposal_1111)) + monkeypatch.setattr("damnit.backend.sandboxing.find_proposal", find_proposal) + + bubblewrap.add_bind_proposal(proposal_id) + + assert find_proposal.call_args == ((f"p{proposal_id:06d}",),) + + binds = [b[1] for b in bubblewrap.command_binds] + + assert str(mock_proposal_1111) in binds + + assert any("u/usr" in b for b in binds) + assert any("pnfs/archive" in b for b in binds) + + +def test_add_bind_venv(bubblewrap, monkeypatch): + python_exec = Path("/path/to/python") + + paths = [ + "/path/venv/lib", + "/path/venv/include", + "/path/venv/bin", + ] + + monkeypatch.setattr( + "subprocess.check_output", + MagicMock(return_value="\n".join(paths).encode("utf-8")), + ) + + bubblewrap.add_bind_venv(python_exec) + + assert ("--ro-bind", "/path/venv/lib", "/path/venv/lib") in bubblewrap.command_binds + assert ( + "--ro-bind", + "/path/venv/include", + "/path/venv/include", + ) in bubblewrap.command_binds + assert ("--ro-bind", "/path/venv/bin", "/path/venv/bin") in bubblewrap.command_binds + + +def test_add_bind_venv_with_subprocess(bubblewrap): + python_exec = Path(sys.executable) + bubblewrap.add_bind_venv(python_exec) + + for path in sysconfig.get_paths().values(): + assert ("--ro-bind", str(path), str(path)) in bubblewrap.command_binds + + +def test_write_in_bwrap(bubblewrap, tmp_path): + bubblewrap.add_bind(tmp_path) + + out = tmp_path / "text" + cmd = ["touch", str(out.absolute())] + cmd = bubblewrap.build_command(cmd) + + subprocess.check_call(cmd) + + assert out.exists() + + assert out.stat().st_uid == os.getuid() + + +def test_write_in_bwrap_ro_fail(bubblewrap, tmp_path): + bubblewrap.add_bind(tmp_path, ro=True) + + res = subprocess.call( + bubblewrap.build_command(["touch", str(tmp_path / "hi")]), + stderr=subprocess.STDOUT, + ) + + assert res != 0 + + +@patch("damnit.backend.extract_data.load_reduced_data") +def test_extract_data_call(bubblewrap, tmp_path): + out_path = tmp_path / "out" + out_path.mkdir(parents=True) + + with patch("subprocess.run") as subprocess_run: + extract_in_subprocess(0, 0, out_path) + subprocess_run.assert_called() + args = subprocess_run.call_args[0][0] + bwrap = args[: args.index("--")] + + out_file = out_path / "touch" + subprocess.check_call([*bwrap, "--", "touch", str(out_file)]) + assert out_file.exists() + + root = Path(__file__).parent.parent + subprocess.check_call([*bwrap, "--", "stat", str(root)])