From f957fd9ffb52d943c5f876f64937fb40d21f6924 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 13 Oct 2020 18:35:02 +0200 Subject: [PATCH 1/4] fix niblack k option passing --- ocrd-olena-binarize | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd-olena-binarize b/ocrd-olena-binarize index 4dbb468..1f687c5 100755 --- a/ocrd-olena-binarize +++ b/ocrd-olena-binarize @@ -360,7 +360,7 @@ function main { scribo_options+=(--disable-negate-input) # has default -0.2 not 0.34 scribo_options+=(--k $($PYTHON -c "print(${params[k]}/-1.7)")) - ;& # fall through + ;;& # get more sauvola|kim|wolf) scribo_options+=(--k ${params[k]}) ;;& # get more From 5fdaa10f487e194a28ec2149b511ce26c5878d67 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 13 Oct 2020 21:44:59 +0200 Subject: [PATCH 2/4] always ensure cropping (also with existing AlternativeImage) --- ocrd-olena-binarize | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/ocrd-olena-binarize b/ocrd-olena-binarize index 1f687c5..90d46c3 100755 --- a/ocrd-olena-binarize +++ b/ocrd-olena-binarize @@ -243,27 +243,29 @@ EOF "$out_fpath" ) image_in_fpath=$(xmlstarlet "${options[@]}") ocrd log info "found imageFilename '${image_in_fpath}' for input file ID=${in_id} (pageId=${in_pageId})" - image_in_id=$(image_id_from_fpath "$image_in_fpath" "$in_id" "$in_pageId") - image_in_fpath="${image_in_fpath#file://}" - if options=( --no-doc-namespace sel - -N "pc=${namespace}" -t - -v '/pc:PcGts/pc:Page/pc:Border/pc:Coords/@points' - "$out_fpath" ) - border=$(xmlstarlet "${options[@]}"); then - ocrd log debug "Using explicitly set page border '$border' for input file ID=${in_id} (pageId=${in_pageId})" - local tmpfile - tmpfile=$(mktemp --tmpdir ocrd-olena-binarize-cropped.XXXXXX) - xywh_from_points $border | { - read width height left top - convert "${image_in_fpath}[0]" -crop ${width}x${height}+${left}+${top} "$tmpfile" - } - image_in_fpath="$tmpfile" - comments="cropped,binarized" - else - comments="binarized" - fi + comments=binarized fi - + image_in_id=$(image_id_from_fpath "$image_in_fpath" "$in_id" "$in_pageId") + image_in_fpath="${image_in_fpath#file://}" + local tmpfile= + if [[ "$comments" =~ cropped ]]; then + ocrd log debug "Using page border in input file ID=${in_id} (pageId=${in_pageId})" + elif options=( --no-doc-namespace sel + -N "pc=${namespace}" -t + -v '/pc:PcGts/pc:Page/pc:Border/pc:Coords/@points' + "$out_fpath" ) + # FIXME: add a bashlib wrapper for workspace.image_from_page to use here + border=$(xmlstarlet "${options[@]}"); then + ocrd log debug "Cropping to page border '$border' in input file ID=${in_id} (pageId=${in_pageId})" + tmpfile=$(mktemp --tmpdir ocrd-olena-binarize-cropped.XXXXXX) + xywh_from_points $border | { + read width height left top + convert "${image_in_fpath}[0]" -crop ${width}x${height}+${left}+${top} "$tmpfile" + } + image_in_fpath="$tmpfile" + comments="${comments%binarized}cropped,binarized" + fi + # set output names image_out_id="${image_in_id}-BIN_${params[impl]}" image_out_fpath="${out_file_grp}/${image_out_id}.png" From ce31995605000cea0d6f821a75e6ea7f21d23607 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 13 Oct 2020 21:45:58 +0200 Subject: [PATCH 3/4] remove temporary images from cropping afterwards --- ocrd-olena-binarize | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ocrd-olena-binarize b/ocrd-olena-binarize index 90d46c3..15388d9 100755 --- a/ocrd-olena-binarize +++ b/ocrd-olena-binarize @@ -281,6 +281,11 @@ EOF local scribo_extra=$(auto_winsize "${original_image_in_fpath}" "${in_pageId}") scribo-cli "${params[impl]}" "${image_in_fpath}" "${image_out_fpath}" "${scribo_options[@]}" ${scribo_extra} + + # Remove temporary image file, if any + if [ -n "$tmpfile" ]; then + rm "$tmpfile" + fi # Add image file to METS ocrd workspace add \ From 2e2197b6d5b9d56775d2b8a6cb5705c630652592 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Tue, 13 Oct 2020 22:36:00 +0200 Subject: [PATCH 4/4] :package: 1.2.3 --- CHANGELOG.md | 40 +++++++++++++++++++++++++++------------- ocrd-tool.json | 6 +++--- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3333187..1889d07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,17 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +## [1.2.3] - 2020-10-13 + +Fixed: + + - pass correct `k` value for `niblack` + +Changed: + + - also crop AlternativeImage to page border if not already + - remove temporary images from cropping afterwards + ## [1.2.2] - 2020-09-30 Fixed: @@ -142,17 +153,20 @@ Changed First release -[1.2.0]: ../../compare/v1.2.0...v1.1.10 -[1.1.10]: ../../compare/v1.1.10...v1.1.9 -[1.1.9]: ../../compare/v1.1.9...v1.1.8 -[1.1.8]: ../../compare/v1.1.8...v1.1.7 -[1.1.7]: ../../compare/v1.1.7...v1.1.6 -[1.1.6]: ../../compare/v1.1.6...v1.1.5 -[1.1.5]: ../../compare/v1.1.5...v1.1.4 -[1.1.4]: ../../compare/v1.1.4...v1.1.3 -[1.1.3]: ../../compare/v1.1.3...v1.1.2 -[1.1.2]: ../../compare/v1.1.2...v1.1.1 -[1.1.1]: ../../compare/v1.1.1...v1.1.0 -[1.1.0]: ../../compare/v1.1.0...v1.0.0 -[1.0.0]: ../../compare/v1.0.0...v0.0.2 +[1.2.3]: ../../compare/v1.2.2...v1.2.3 +[1.2.2]: ../../compare/v1.2.1...v1.2.2 +[1.2.1]: ../../compare/v1.2.0...v1.2.1 +[1.2.0]: ../../compare/v1.1.10...v1.2.0 +[1.1.10]: ../../compare/v1.1.9...v1.1.10 +[1.1.9]: ../../compare/v1.1.8...v1.1.9 +[1.1.8]: ../../compare/v1.1.7...v1.1.8 +[1.1.7]: ../../compare/v1.1.6...v1.1.7 +[1.1.6]: ../../compare/v1.1.5...v1.1.6 +[1.1.5]: ../../compare/v1.1.4...v1.1.5 +[1.1.4]: ../../compare/v1.1.3...v1.1.4 +[1.1.3]: ../../compare/v1.1.2...v1.1.3 +[1.1.2]: ../../compare/v1.1.1...v1.1.2 +[1.1.1]: ../../compare/v1.1.0...v1.1.1 +[1.1.0]: ../../compare/v1.0.0...v1.1.0 +[1.0.0]: ../../compare/v0.0.2...v1.0.0 [0.0.2]: ../../compare/HEAD...v0.0.2 diff --git a/ocrd-tool.json b/ocrd-tool.json index 52181ac..63757fb 100644 --- a/ocrd-tool.json +++ b/ocrd-tool.json @@ -1,10 +1,10 @@ { - "version": "1.2.2", + "version": "1.2.3", "git_url": "https://github.com/OCR-D/ocrd_olena", "tools": { "ocrd-olena-binarize": { "executable": "ocrd-olena-binarize", - "description": "OLENA's binarization algos for OCR-D (on page-level)", + "description": "popular binarization algorithms implemented by Olena/SCRIBO, wrapped for OCR-D (on page level only)", "categories": [ "Image preprocessing" ], @@ -36,7 +36,7 @@ "default": 0.34 }, "win-size": { - "description": "The (odd) window size in pixels; when zero (default), set to DPI; for Otsu, does not apply", + "description": "The (odd) window size in pixels; when zero (default), set to DPI (or 301); for Otsu, does not apply", "type": "number", "format": "integer", "default": 0