diff --git a/README.md b/README.md index de8b715..dda0cfe 100644 --- a/README.md +++ b/README.md @@ -143,6 +143,10 @@ Options: metadata.json --unsigned-metadata FILE JSON file to be copied to unsigned- metadata.json + --signed-metadata-json TEXT JSON string to be written to data/signed- + metadata.json + --unsigned-metadata-json TEXT JSON string to be written to unsigned- + metadata.json -s, --sign : Sign using certificate chain and private key files (can be repeated) diff --git a/src/nabit/bin/cli.py b/src/nabit/bin/cli.py index 1bae9b8..d90ad22 100644 --- a/src/nabit/bin/cli.py +++ b/src/nabit/bin/cli.py @@ -20,10 +20,14 @@ def main(): @click.option('--path', '-p', 'paths', multiple=True, type=click.Path(exists=True, path_type=Path), help='File or directory to archive (can be repeated)') @click.option('--hard-link', is_flag=True, help='Use hard links when copying files (when possible)') @click.option('--info', '-i', multiple=True, help='bag-info.txt metadata in key:value format (can be repeated)') -@click.option('--signed-metadata', type=click.Path(exists=True, path_type=Path, dir_okay=False), +@click.option('--signed-metadata', 'signed_metadata_path', type=click.Path(exists=True, path_type=Path, dir_okay=False), help='JSON file to be copied to data/signed-metadata.json') -@click.option('--unsigned-metadata', type=click.Path(exists=True, path_type=Path, dir_okay=False), +@click.option('--unsigned-metadata', 'unsigned_metadata_path', type=click.Path(exists=True, path_type=Path, dir_okay=False), help='JSON file to be copied to unsigned-metadata.json') +@click.option('--signed-metadata-json', type=str, + help='JSON string to be written to data/signed-metadata.json') +@click.option('--unsigned-metadata-json', type=str, + help='JSON string to be written to unsigned-metadata.json') @click.option('--sign', '-s', 'signature_args', multiple=True, help='Sign using certificate chain and private key files (can be repeated)', metavar=':', @@ -33,21 +37,44 @@ def main(): metavar=' | :', ) @click.pass_context -def archive(ctx, bag_path, amend, urls, paths, hard_link, info, signed_metadata, unsigned_metadata, signature_args): +def archive( + ctx, + bag_path, + amend, + urls, + paths, + hard_link, + info, + signed_metadata_path, + unsigned_metadata_path, + signed_metadata_json, + unsigned_metadata_json, + signature_args +): """ Archive files and URLs into a BagIt package. bag_path is the destination directory for the package. """ - # Validate JSON files if provided - for metadata_path in (signed_metadata, unsigned_metadata): - if not metadata_path: - continue - if not metadata_path.suffix.lower() == '.json': - raise click.BadParameter(f'Metadata file must be a .json file, got "{metadata_path}"') - try: - json.loads(metadata_path.read_text()) - except json.JSONDecodeError as e: - raise click.BadParameter(f'Metadata file must be valid JSON, got "{metadata_path}": {e}') + # Process metadata from files and JSON strings + metadata = {'signed': None, 'unsigned': None} + for prefix in ('signed', 'unsigned'): + metadata_path = ctx.params[f'{prefix}_metadata_path'] + metadata_json = ctx.params[f'{prefix}_metadata_json'] + + if metadata_path and metadata_json: + raise click.BadParameter(f"Cannot specify both --{prefix}-metadata and --{prefix}-metadata-json") + if metadata_path: + if not metadata_path.suffix.lower() == '.json': + raise click.BadParameter(f'Metadata file must be a .json file, got "{metadata_path}"') + try: + metadata[prefix] = json.loads(metadata_path.read_text()) + except json.JSONDecodeError as e: + raise click.BadParameter(f'Metadata file must be valid JSON, got "{metadata_path}": {e}') + elif metadata_json: + try: + metadata[prefix] = json.loads(metadata_json) + except json.JSONDecodeError as e: + raise click.BadParameter(f'Invalid JSON string for --{prefix}-metadata-json: {e}') # Check if output directory exists and is not empty if bag_path.exists() and any(bag_path.iterdir()): @@ -116,8 +143,8 @@ def archive(ctx, bag_path, amend, urls, paths, hard_link, info, signed_metadata, urls=urls, bag_info=bag_info, signatures=signatures, - signed_metadata=signed_metadata, - unsigned_metadata=unsigned_metadata, + signed_metadata=metadata['signed'], + unsigned_metadata=metadata['unsigned'], amend=amend, use_hard_links=hard_link, ) diff --git a/src/nabit/lib/archive.py b/src/nabit/lib/archive.py index 4163f11..f5214eb 100644 --- a/src/nabit/lib/archive.py +++ b/src/nabit/lib/archive.py @@ -8,6 +8,7 @@ from .sign import validate_signatures, KNOWN_TSAS, add_signatures from .. import __version__ import hashlib +import json # files to ignore when copying directories IGNORE_PATTERNS = ['.DS_Store'] @@ -83,8 +84,8 @@ def package( paths: list[Path | str] | None = None, bag_info: dict | None = None, signatures: list[dict] | None = None, - signed_metadata: Path | str | None = None, - unsigned_metadata: Path | str | None = None, + signed_metadata: dict | None = None, + unsigned_metadata: dict | None = None, use_hard_links: bool = False, ) -> None: """ @@ -93,8 +94,8 @@ def package( Copy all paths, using hard links, into data/files/. Include bag_info in bag-info.txt. If signatures are provided, add them to tagmanifest-sha256.txt. - Copy signed_metadata to data/signed-metadata.json. - Copy unsigned_metadata to unsigned-metadata.json. + Write signed_metadata to data/signed-metadata.json. + Write unsigned_metadata to unsigned-metadata.json. """ bag_info = bag_info or {} @@ -111,9 +112,9 @@ def package( # Add metadata files if signed_metadata is not None: - os.link(signed_metadata, data_path / "signed-metadata.json") + (data_path / "signed-metadata.json").write_text(json.dumps(signed_metadata, indent=2)) if unsigned_metadata is not None: - os.link(unsigned_metadata, output_path / "unsigned-metadata.json") + (output_path / "unsigned-metadata.json").write_text(json.dumps(unsigned_metadata, indent=2)) ## add bag files bag_changed = not amend diff --git a/tests/conftest.py b/tests/conftest.py index 9fa6962..4002b41 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -27,8 +27,8 @@ def test_bag(tmp_path, test_files): package( output_path=bag_path, paths=test_files["payload"], - signed_metadata=test_files["signed_metadata"], - unsigned_metadata=test_files["unsigned_metadata"], + signed_metadata=test_files["signed_metadata"].read_text(), + unsigned_metadata=test_files["unsigned_metadata"].read_text(), bag_info={"Source-Organization": "Test Org"} ) return bag_path @@ -60,8 +60,8 @@ def signed_bag(tmp_path, test_files, root_ca): package( output_path=bag_path, paths=test_files["payload"], - signed_metadata=test_files["signed_metadata"], - unsigned_metadata=test_files["unsigned_metadata"], + signed_metadata=test_files["signed_metadata"].read_text(), + unsigned_metadata=test_files["unsigned_metadata"].read_text(), bag_info={"Source-Organization": "Test Org"}, signatures=[ { diff --git a/tests/test_cli.py b/tests/test_cli.py index 2c0930f..949ed76 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,6 +3,7 @@ from inline_snapshot import snapshot import json import re +import pytest from tests.utils import validate_passing from .utils import validate_passing, validate_failing @@ -80,8 +81,19 @@ def test_url_payload(runner, tmp_path, server): assert (bag_path / 'data/files/another.html').read_text() == 'another content' assert (bag_path / 'data/files/test.txt').read_text() == 'test content' -def test_metadata(runner, tmp_path, test_files): +@pytest.mark.parametrize('metadata_format', ['file', 'json']) +def test_metadata(runner, tmp_path, test_files, metadata_format): bag_path = tmp_path / 'bag' + if metadata_format == 'file': + extra_args = [ + '--unsigned-metadata', str(test_files["unsigned_metadata"]), + '--signed-metadata', str(test_files["signed_metadata"]), + ] + else: + extra_args = [ + '--unsigned-metadata-json', '{"metadata": "unsigned"}', + '--signed-metadata-json', '{"metadata": "signed"}', + ] run(runner, [ 'archive', str(bag_path), @@ -89,8 +101,7 @@ def test_metadata(runner, tmp_path, test_files): '-i', 'Source-Organization:Test Org', '-i', 'Contact-Email:test1@example.com', '-i', 'Contact-Email:test2@example.com', - '--unsigned-metadata', str(test_files["unsigned_metadata"]), - '--signed-metadata', str(test_files["signed_metadata"]), + *extra_args, ]) assert validate_passing(bag_path) == snapshot("""\ WARNING: No headers.warc found; archive lacks request and response metadata @@ -313,6 +324,21 @@ def test_invalid_metadata_file_contents(runner, tmp_path, test_files): '--signed-metadata', str(tmp_path / 'metadata.json'), ], exit_code=2, output='Metadata file must be valid JSON') +def test_invalid_metadata_json_string(runner, tmp_path, test_files): + run(runner, [ + 'archive', + str(tmp_path / 'bag'), + '--signed-metadata-json', 'invalid json', + ], exit_code=2, output='Invalid JSON') + +def test_cannot_combine_metadata_file_and_json(runner, tmp_path, test_files): + run(runner, [ + 'archive', + str(tmp_path / 'bag'), + '--signed-metadata', str(test_files["signed_metadata"]), + '--signed-metadata-json', '{"metadata": "signed"}', + ], exit_code=2, output='Cannot specify both --signed-metadata and --signed-metadata-json') + def test_invalid_info_format(runner, tmp_path): run(runner, [ 'archive',