Skip to content

Commit

Permalink
Add json metadata option
Browse files Browse the repository at this point in the history
  • Loading branch information
jcushman committed Dec 3, 2024
1 parent f253dc1 commit b4ce501
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 28 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ Options:
metadata.json
--unsigned-metadata FILE JSON file to be copied to unsigned-
metadata.json
--signed-metadata-json TEXT JSON string to be written to data/signed-
metadata.json
--unsigned-metadata-json TEXT JSON string to be written to unsigned-
metadata.json
-s, --sign <cert_chain>:<key_file>
Sign using certificate chain and private key
files (can be repeated)
Expand Down
57 changes: 42 additions & 15 deletions src/nabit/bin/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@ def main():
@click.option('--path', '-p', 'paths', multiple=True, type=click.Path(exists=True, path_type=Path), help='File or directory to archive (can be repeated)')
@click.option('--hard-link', is_flag=True, help='Use hard links when copying files (when possible)')
@click.option('--info', '-i', multiple=True, help='bag-info.txt metadata in key:value format (can be repeated)')
@click.option('--signed-metadata', type=click.Path(exists=True, path_type=Path, dir_okay=False),
@click.option('--signed-metadata', 'signed_metadata_path', type=click.Path(exists=True, path_type=Path, dir_okay=False),
help='JSON file to be copied to data/signed-metadata.json')
@click.option('--unsigned-metadata', type=click.Path(exists=True, path_type=Path, dir_okay=False),
@click.option('--unsigned-metadata', 'unsigned_metadata_path', type=click.Path(exists=True, path_type=Path, dir_okay=False),
help='JSON file to be copied to unsigned-metadata.json')
@click.option('--signed-metadata-json', type=str,
help='JSON string to be written to data/signed-metadata.json')
@click.option('--unsigned-metadata-json', type=str,
help='JSON string to be written to unsigned-metadata.json')
@click.option('--sign', '-s', 'signature_args', multiple=True,
help='Sign using certificate chain and private key files (can be repeated)',
metavar='<cert_chain>:<key_file>',
Expand All @@ -33,21 +37,44 @@ def main():
metavar='<tsa_keyword> | <cert_chain>:<url>',
)
@click.pass_context
def archive(ctx, bag_path, amend, urls, paths, hard_link, info, signed_metadata, unsigned_metadata, signature_args):
def archive(
ctx,
bag_path,
amend,
urls,
paths,
hard_link,
info,
signed_metadata_path,
unsigned_metadata_path,
signed_metadata_json,
unsigned_metadata_json,
signature_args
):
"""
Archive files and URLs into a BagIt package.
bag_path is the destination directory for the package.
"""
# Validate JSON files if provided
for metadata_path in (signed_metadata, unsigned_metadata):
if not metadata_path:
continue
if not metadata_path.suffix.lower() == '.json':
raise click.BadParameter(f'Metadata file must be a .json file, got "{metadata_path}"')
try:
json.loads(metadata_path.read_text())
except json.JSONDecodeError as e:
raise click.BadParameter(f'Metadata file must be valid JSON, got "{metadata_path}": {e}')
# Process metadata from files and JSON strings
metadata = {'signed': None, 'unsigned': None}
for prefix in ('signed', 'unsigned'):
metadata_path = ctx.params[f'{prefix}_metadata_path']
metadata_json = ctx.params[f'{prefix}_metadata_json']

if metadata_path and metadata_json:
raise click.BadParameter(f"Cannot specify both --{prefix}-metadata and --{prefix}-metadata-json")
if metadata_path:
if not metadata_path.suffix.lower() == '.json':
raise click.BadParameter(f'Metadata file must be a .json file, got "{metadata_path}"')
try:
metadata[prefix] = json.loads(metadata_path.read_text())
except json.JSONDecodeError as e:
raise click.BadParameter(f'Metadata file must be valid JSON, got "{metadata_path}": {e}')
elif metadata_json:
try:
metadata[prefix] = json.loads(metadata_json)
except json.JSONDecodeError as e:
raise click.BadParameter(f'Invalid JSON string for --{prefix}-metadata-json: {e}')

# Check if output directory exists and is not empty
if bag_path.exists() and any(bag_path.iterdir()):
Expand Down Expand Up @@ -116,8 +143,8 @@ def archive(ctx, bag_path, amend, urls, paths, hard_link, info, signed_metadata,
urls=urls,
bag_info=bag_info,
signatures=signatures,
signed_metadata=signed_metadata,
unsigned_metadata=unsigned_metadata,
signed_metadata=metadata['signed'],
unsigned_metadata=metadata['unsigned'],
amend=amend,
use_hard_links=hard_link,
)
Expand Down
13 changes: 7 additions & 6 deletions src/nabit/lib/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .sign import validate_signatures, KNOWN_TSAS, add_signatures
from .. import __version__
import hashlib
import json

# files to ignore when copying directories
IGNORE_PATTERNS = ['.DS_Store']
Expand Down Expand Up @@ -83,8 +84,8 @@ def package(
paths: list[Path | str] | None = None,
bag_info: dict | None = None,
signatures: list[dict] | None = None,
signed_metadata: Path | str | None = None,
unsigned_metadata: Path | str | None = None,
signed_metadata: dict | None = None,
unsigned_metadata: dict | None = None,
use_hard_links: bool = False,
) -> None:
"""
Expand All @@ -93,8 +94,8 @@ def package(
Copy all paths, using hard links, into data/files/.
Include bag_info in bag-info.txt.
If signatures are provided, add them to tagmanifest-sha256.txt.
Copy signed_metadata to data/signed-metadata.json.
Copy unsigned_metadata to unsigned-metadata.json.
Write signed_metadata to data/signed-metadata.json.
Write unsigned_metadata to unsigned-metadata.json.
"""
bag_info = bag_info or {}

Expand All @@ -111,9 +112,9 @@ def package(

# Add metadata files
if signed_metadata is not None:
os.link(signed_metadata, data_path / "signed-metadata.json")
(data_path / "signed-metadata.json").write_text(json.dumps(signed_metadata, indent=2))
if unsigned_metadata is not None:
os.link(unsigned_metadata, output_path / "unsigned-metadata.json")
(output_path / "unsigned-metadata.json").write_text(json.dumps(unsigned_metadata, indent=2))

## add bag files
bag_changed = not amend
Expand Down
8 changes: 4 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def test_bag(tmp_path, test_files):
package(
output_path=bag_path,
paths=test_files["payload"],
signed_metadata=test_files["signed_metadata"],
unsigned_metadata=test_files["unsigned_metadata"],
signed_metadata=test_files["signed_metadata"].read_text(),
unsigned_metadata=test_files["unsigned_metadata"].read_text(),
bag_info={"Source-Organization": "Test Org"}
)
return bag_path
Expand Down Expand Up @@ -60,8 +60,8 @@ def signed_bag(tmp_path, test_files, root_ca):
package(
output_path=bag_path,
paths=test_files["payload"],
signed_metadata=test_files["signed_metadata"],
unsigned_metadata=test_files["unsigned_metadata"],
signed_metadata=test_files["signed_metadata"].read_text(),
unsigned_metadata=test_files["unsigned_metadata"].read_text(),
bag_info={"Source-Organization": "Test Org"},
signatures=[
{
Expand Down
32 changes: 29 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from inline_snapshot import snapshot
import json
import re
import pytest

from tests.utils import validate_passing
from .utils import validate_passing, validate_failing
Expand Down Expand Up @@ -80,17 +81,27 @@ def test_url_payload(runner, tmp_path, server):
assert (bag_path / 'data/files/another.html').read_text() == 'another content'
assert (bag_path / 'data/files/test.txt').read_text() == 'test content'

def test_metadata(runner, tmp_path, test_files):
@pytest.mark.parametrize('metadata_format', ['file', 'json'])
def test_metadata(runner, tmp_path, test_files, metadata_format):
bag_path = tmp_path / 'bag'
if metadata_format == 'file':
extra_args = [
'--unsigned-metadata', str(test_files["unsigned_metadata"]),
'--signed-metadata', str(test_files["signed_metadata"]),
]
else:
extra_args = [
'--unsigned-metadata-json', '{"metadata": "unsigned"}',
'--signed-metadata-json', '{"metadata": "signed"}',
]
run(runner, [
'archive',
str(bag_path),
'-p', str(test_files["payload"][0]),
'-i', 'Source-Organization:Test Org',
'-i', 'Contact-Email:[email protected]',
'-i', 'Contact-Email:[email protected]',
'--unsigned-metadata', str(test_files["unsigned_metadata"]),
'--signed-metadata', str(test_files["signed_metadata"]),
*extra_args,
])
assert validate_passing(bag_path) == snapshot("""\
WARNING: No headers.warc found; archive lacks request and response metadata
Expand Down Expand Up @@ -313,6 +324,21 @@ def test_invalid_metadata_file_contents(runner, tmp_path, test_files):
'--signed-metadata', str(tmp_path / 'metadata.json'),
], exit_code=2, output='Metadata file must be valid JSON')

def test_invalid_metadata_json_string(runner, tmp_path, test_files):
run(runner, [
'archive',
str(tmp_path / 'bag'),
'--signed-metadata-json', 'invalid json',
], exit_code=2, output='Invalid JSON')

def test_cannot_combine_metadata_file_and_json(runner, tmp_path, test_files):
run(runner, [
'archive',
str(tmp_path / 'bag'),
'--signed-metadata', str(test_files["signed_metadata"]),
'--signed-metadata-json', '{"metadata": "signed"}',
], exit_code=2, output='Cannot specify both --signed-metadata and --signed-metadata-json')

def test_invalid_info_format(runner, tmp_path):
run(runner, [
'archive',
Expand Down

0 comments on commit b4ce501

Please sign in to comment.