From 13c7b241efe9550f83ea46cebe175325890a6ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20Lindstr=C3=B6m?= Date: Wed, 27 Sep 2023 17:53:59 +0200 Subject: [PATCH] Add min- and max-created filters to lddb_json_shape.py --- librisxl-tools/scripts/lddb_json_shape.py | 46 +++++++++++++++++------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/librisxl-tools/scripts/lddb_json_shape.py b/librisxl-tools/scripts/lddb_json_shape.py index 78ceeefa86..f809f02452 100644 --- a/librisxl-tools/scripts/lddb_json_shape.py +++ b/librisxl-tools/scripts/lddb_json_shape.py @@ -1,3 +1,4 @@ +from __future__ import annotations import json import os @@ -69,32 +70,45 @@ def count_value(k, v, shape): if __name__ == '__main__': + from datetime import datetime + from pathlib import Path from time import time + import argparse import sys - from pathlib import Path - args = sys.argv[:] - cmd = args.pop(0) - if not args: - print(f'USAGE: {cmd} OUT_DIR', file=sys.stderr) - sys.exit(1) + argp = argparse.ArgumentParser() + argp.add_argument('-d', '--debug', action='store_true', default=False) + argp.add_argument('-c', '--min-created') # inclusive + argp.add_argument('-C', '--max-created') # exclusive + argp.add_argument('outdir', metavar='OUT_DIR') + + args = argp.parse_args() - outpath = Path(args.pop(0)) SUFFIX = '.json' + + outpath: Path|None = Path(args.outdir) + assert outpath + if outpath.suffix == SUFFIX: outdir = outpath.parent else: outdir = outpath outpath = None + if not outdir.is_dir(): outdir.mkdir(parents=True, exist_ok=True) - index = {} - work_by_type_index = {} - instance_index = {} - work_index = {} + # NOTE: fromisoformat with timezones requires Python 3.11 + min_inc_created: datetime|None = datetime.fromisoformat(args.min_created) if args.min_created else None + max_ex_created: datetime|None = datetime.fromisoformat(args.max_created) if args.max_created else None + print(min_inc_created, max_ex_created, file=sys.stderr) + + index: dict = {} + work_by_type_index: dict = {} + instance_index: dict = {} + work_index: dict = {} - t_last = 0 + t_last = 0.0 cr = '\r' for i, l in enumerate(sys.stdin): if not l.rstrip(): @@ -109,6 +123,14 @@ def count_value(k, v, shape): try: data = json.loads(l) + + if '@graph' in data: + created = datetime.fromisoformat(data['@graph'][0]['created']) + if min_inc_created and created < min_inc_created: + continue + if max_ex_created and created >= max_ex_created: + continue + thing, work = reshape(data) compute_shape(thing, index) if work: