diff --git a/Makefile b/Makefile index 0b9f4ac..e615fa9 100644 --- a/Makefile +++ b/Makefile @@ -65,8 +65,8 @@ deps: #deps-ubuntu test -x $(BINDIR)/scribo-cli && \ $(BINDIR)/scribo-cli sauvola --help >/dev/null 2>&1 || \ $(MAKE) build-olena - ocrd ocrd-tool --help >/dev/null 2>&1 || \ - $(PIP) install ocrd # needed for ocrd CLI (and bashlib) + $(PIP) install -U pip + $(PIP) install "ocrd>=2.13" # needed for ocrd CLI (and bashlib) # Install install: deps diff --git a/ocrd-olena-binarize b/ocrd-olena-binarize index 56e4020..b2a1cd3 100755 --- a/ocrd-olena-binarize +++ b/ocrd-olena-binarize @@ -349,7 +349,7 @@ function main { # shellcheck source=../core/ocrd/bashlib/lib.bash source $(ocrd bashlib filename) ocrd__wrap "$SHAREDIR/ocrd-tool.json" "ocrd-olena-binarize" "$@" - ocrd__minversion 2.11.0 + ocrd__minversion 2.13.0 scribo_options=(--enable-negate-output) case ${params[impl]} in @@ -393,6 +393,21 @@ function main { -k mimetype \ -k pageId \ --download)) + page_pages=($(ocrd workspace find \ + -G $in_file_grp \ + -m $MIMETYPE_PAGE \ + -k pageId)) + multi_pages=($(ocrd workspace find \ + -G $in_file_grp \ + -m //image/.* \ + -k pageId | sort | uniq -d)) + declare -A is_page is_multi + for page in "${page_pages[@]}"; do + is_page[$page]=1 + done + for page in "${multi_pages[@]}"; do + is_multi[$page]=1 + done local IFS=$' \t\n' local n=0 zeros=0000 for csv in "${files[@]}"; do @@ -410,8 +425,15 @@ function main { if ! test -f "${in_fpath#file://}"; then ocrd log error "input file ID=${in_id} (pageId=${in_pageId} MIME=${in_mimetype}) is not on disk" continue - fi - + fi + # fileGrps may contain PAGE and (derived) images + # so if this pageId has a PAGE file, ignore all others + # and otherwise if it has multiple images, raise an error + if ((${is_page[$in_pageId]:-0})); then + test x$in_mimetype != x$MIMETYPE_PAGE && continue + elif ((${is_multi[$in_pageId]:-0})); then + ocrd_raise "No PAGE-XML for page $in_pageId in fileGrp '$in_file_grp' but multiple images." + fi local out_id="${in_id//$in_file_grp/$out_file_grp}" if [ "x$out_id" = "x$in_id" ]; then out_id=${out_file_grp}_${zeros:0:$((4-${#n}))}$n diff --git a/ocrd-tool.json b/ocrd-tool.json index 10636c2..49ece16 100644 --- a/ocrd-tool.json +++ b/ocrd-tool.json @@ -1,5 +1,5 @@ { - "version": "1.2.0", + "version": "1.2.1", "git_url": "https://github.com/OCR-D/ocrd_olena", "tools": { "ocrd-olena-binarize": {