From e6828cb8696079b70f35a11441b6910ea49aa4ba Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 18 Oct 2023 21:34:51 +0200 Subject: [PATCH 01/10] =?UTF-8?q?=F0=9F=8E=A8=20(Mostly)=20sort=20import?= =?UTF-8?q?=20blocks=20using=20ruff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/cli.py | 2 +- ocrd_calamari/config.py | 1 + ocrd_calamari/fix_calamari1_model.py | 9 +++++---- ocrd_calamari/recognize.py | 29 +++++++++++++++------------- setup.py | 4 ++-- test/base.py | 2 +- test/test_recognize.py | 5 +++-- 7 files changed, 29 insertions(+), 23 deletions(-) diff --git a/ocrd_calamari/cli.py b/ocrd_calamari/cli.py index 068b065..9228acb 100644 --- a/ocrd_calamari/cli.py +++ b/ocrd_calamari/cli.py @@ -1,6 +1,6 @@ import click - from ocrd.decorators import ocrd_cli_options, ocrd_cli_wrap_processor + from ocrd_calamari.recognize import CalamariRecognize diff --git a/ocrd_calamari/config.py b/ocrd_calamari/config.py index a514169..1729f8c 100644 --- a/ocrd_calamari/config.py +++ b/ocrd_calamari/config.py @@ -1,4 +1,5 @@ import json + from pkg_resources import resource_string OCRD_TOOL = json.loads(resource_string(__name__, "ocrd-tool.json").decode("utf8")) diff --git a/ocrd_calamari/fix_calamari1_model.py b/ocrd_calamari/fix_calamari1_model.py index e26a53e..7427abf 100644 --- a/ocrd_calamari/fix_calamari1_model.py +++ b/ocrd_calamari/fix_calamari1_model.py @@ -1,8 +1,9 @@ -import re import json -import click -from glob import glob +import re from copy import deepcopy +from glob import glob + +import click from ocrd_calamari.util import working_directory @@ -23,7 +24,7 @@ def fix_calamari1_model(checkpoint_dir): old_j = deepcopy(j) for v in j["model"].values(): - if type(v) != dict: + if isinstance(v, dict): continue for child in v.get("children", []): for replacement in child.get("replacements", []): diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index a7195be..a8803ba 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -1,42 +1,45 @@ from __future__ import absolute_import -import os import itertools +import os from glob import glob import numpy as np +from ocrd import Processor +from ocrd_modelfactory import page_from_file +from ocrd_models.ocrd_page import ( + CoordsType, + GlyphType, + TextEquivType, + WordType, + to_xml, +) from ocrd_utils import ( + MIMETYPE_PAGE, assert_file_grp_cardinality, coordinates_for_segment, getLogger, make_file_id, points_from_polygon, polygon_from_x0y0x1y1, - MIMETYPE_PAGE, tf_disable_interactive_logs, ) # Disable tensorflow/keras logging via print before importing calamari +# (and disable ruff's import checks and sorting here) +# ruff: noqa: E402 +# ruff: isort: off tf_disable_interactive_logs() +from tensorflow import __version__ as tensorflow_version from calamari_ocr import __version__ as calamari_version from calamari_ocr.ocr import MultiPredictor from calamari_ocr.ocr.voting import voter_from_proto from calamari_ocr.proto import VoterParams -from ocrd import Processor -from ocrd_modelfactory import page_from_file -from ocrd_models.ocrd_page import ( - TextEquivType, - WordType, - GlyphType, - CoordsType, - to_xml, -) +# ruff: isort: on from ocrd_calamari.config import OCRD_TOOL -from tensorflow import __version__ as tensorflow_version - TOOL = "ocrd-calamari-recognize" diff --git a/setup.py b/setup.py index 1bbdd50..85540bc 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- -from pathlib import Path import json +from pathlib import Path -from setuptools import setup, find_packages +from setuptools import find_packages, setup with open("./ocrd-tool.json", "r") as f: version = json.load(f)["version"] diff --git a/test/base.py b/test/base.py index f98f459..329a8f6 100644 --- a/test/base.py +++ b/test/base.py @@ -4,10 +4,10 @@ import sys from test.assets import assets -from ocrd_utils import initLogging PWD = os.path.dirname(os.path.realpath(__file__)) sys.path.append(PWD + "/../ocrd") +from ocrd_utils import initLogging initLogging() diff --git a/test/test_recognize.py b/test/test_recognize.py index c5d4b61..f539593 100644 --- a/test/test_recognize.py +++ b/test/test_recognize.py @@ -1,14 +1,15 @@ +import logging import os import shutil import subprocess import tempfile -from lxml import etree import pytest -import logging +from lxml import etree from ocrd.resolver import Resolver from ocrd_calamari import CalamariRecognize + from .base import assets METS_KANT = assets.url_of( From 83570015850f6dbed492c20ea984ec598835362e Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 18 Oct 2023 21:35:31 +0200 Subject: [PATCH 02/10] =?UTF-8?q?=E2=9A=99=20ruff:=20enable=20"I"=20checks?= =?UTF-8?q?=20(e.g.=20import=20block=20sorting)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ruff does not by default enable the "I" checks, which includes the wanted import sorting. The ruff configuration goes to ruff.toml for now, as we have not switched to pyproject.toml yet. --- ruff.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 ruff.toml diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..5ffb100 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,2 @@ +# TODO: This should go to pyproject.toml once we have one +select = ["E", "F", "I"] From dca61c08c364acab2ddd16ee48a8fd9485c28ecb Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 18 Oct 2023 21:39:05 +0200 Subject: [PATCH 03/10] =?UTF-8?q?=F0=9F=8E=A8=20Sort=20remaining=20import?= =?UTF-8?q?=20blocks=20using=20ruff?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/base.py b/test/base.py index 329a8f6..e5d26ad 100644 --- a/test/base.py +++ b/test/base.py @@ -2,12 +2,12 @@ import os import sys - from test.assets import assets +from ocrd_utils import initLogging + PWD = os.path.dirname(os.path.realpath(__file__)) sys.path.append(PWD + "/../ocrd") -from ocrd_utils import initLogging initLogging() From d1d6efe1df0a8e5212ef03ec0034cc48f1519cd0 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 18 Oct 2023 21:44:59 +0200 Subject: [PATCH 04/10] =?UTF-8?q?=E2=9A=99=20vim:=20use=20.editorconfig=20?= =?UTF-8?q?instead=20of=20modelines?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .editorconfig | 29 +++++++++++++++++++++++++++++ ocrd_calamari/recognize.py | 3 --- test/test_recognize.py | 3 --- 3 files changed, 29 insertions(+), 6 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..6959d70 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,29 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true +max_line_length = 88 +tab_width = 4 + +[{*.cfg, *.ini, *.html, *.yaml, *.yml}] +indent_size = 2 + +[*.json] +indent_size = 2 +insert_final_newline = true + +# trailing spaces in markdown indicate word wrap +[*.md] +trim_trailing_whitespace = false + +[*.py] +multi_line_output = 3 +include_trailing_comma = True +force_grid_wrap = 0 +use_parentheses = True +ensure_newline_before_comments = True diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index a8803ba..ca91a74 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -393,6 +393,3 @@ def _page_update_higher_textequiv_levels(level, pcgts): for line in lines ) region.set_TextEquiv([TextEquivType(Unicode=region_unicode)]) # remove old - - -# vim:tw=120: diff --git a/test/test_recognize.py b/test/test_recognize.py index f539593..126ba57 100644 --- a/test/test_recognize.py +++ b/test/test_recognize.py @@ -189,6 +189,3 @@ def test_glyphs(workspace): # The result should contain a lot of glyphs glyphs = tree.xpath("//pc:Glyph", namespaces=nsmap) assert len(glyphs) >= 100 - - -# vim:tw=120: From badda381ca9ac792b828e2ba264dd28810aba2ac Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Wed, 18 Oct 2023 21:58:33 +0200 Subject: [PATCH 05/10] =?UTF-8?q?=F0=9F=8E=A8=20Shorten=20long=20lines?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Black does not shorten comments if they exceed the wanted line length. ruff complains about them anyway. As the long lines are actually undesired (makes it hard to read comments in e.g. my terminal editor), shorten them manually. --- ocrd_calamari/recognize.py | 45 ++++++++++++++++++++++++-------------- test/test_recognize.py | 25 +++++++++++++-------- 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index ca91a74..831baa1 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -67,8 +67,14 @@ def setup(self): self.network_input_channels = self.predictor.predictors[ 0 ].network.input_channels - # self.network_input_channels = self.predictor.predictors[0].network_params.channels # not used! - # binarization = self.predictor.predictors[0].model_params.data_preprocessor.binarization # not used! + + # not used: + # self.network_input_channels = \ + # self.predictor.predictors[0].network_params.channels + # not used: + # binarization = \ + # self.predictor.predictors[0].model_params\ + # .data_preprocessor.binarization # self.features = ('' if self.network_input_channels != 1 else # 'binarized' if binarization != 'GRAY' else # 'grayscale_normalized') @@ -82,9 +88,10 @@ def process(self): """ Perform text recognition with Calamari on the workspace. - If ``texequiv_level`` is ``word`` or ``glyph``, then additionally create word / glyph level segments by - splitting at white space characters / glyph boundaries. In the case of ``glyph``, add all alternative character - hypotheses down to ``glyph_conf_cutoff`` confidence threshold. + If ``texequiv_level`` is ``word`` or ``glyph``, then additionally create word / + glyph level segments by splitting at white space characters / glyph boundaries. + In the case of ``glyph``, add all alternative character hypotheses down to + ``glyph_conf_cutoff`` confidence threshold. """ log = getLogger("processor.CalamariRecognize") @@ -174,10 +181,12 @@ def process(self): # Build line text on our own # - # Calamari does whitespace post-processing on prediction.sentence, while it does not do the same - # on prediction.positions. Do it on our own to have consistency. + # Calamari does whitespace post-processing on prediction.sentence, + # while it does not do the same on prediction.positions. Do it on + # our own to have consistency. # - # XXX Check Calamari's built-in post-processing on prediction.sentence + # XXX Check Calamari's built-in post-processing on + # prediction.sentence def _sort_chars(p): """Filter and sort chars of prediction p""" @@ -249,8 +258,9 @@ def _drop_double_spaces_generator(positions): # Save word results # - # Calamari OCR does not provide word positions, so we infer word positions from a. text segmentation - # and b. the glyph positions. This is necessary because the PAGE XML format enforces a strict + # Calamari OCR does not provide word positions, so we infer word + # positions from a. text segmentation and b. the glyph positions. + # This is necessary because the PAGE XML format enforces a strict # hierarchy of lines > words > glyphs. def _words(s): @@ -319,7 +329,9 @@ def _words(s): ) # Add predictions (= TextEquivs) - char_index_start = 1 # Must start with 1, see https://ocr-d.github.io/page#multiple-textequivs + char_index_start = 1 + # Index must start with 1, see + # https://ocr-d.github.io/page#multiple-textequivs for char_index, char in enumerate( _sort_chars(p), start=char_index_start ): @@ -354,13 +366,14 @@ def _words(s): ) -# TODO: This is a copy of ocrd_tesserocr's function, and should probably be moved to a ocrd lib +# TODO: This is a copy of ocrd_tesserocr's function, and should probably be moved to a +# ocrd lib def _page_update_higher_textequiv_levels(level, pcgts): - """Update the TextEquivs of all PAGE-XML hierarchy levels above `level` for consistency. + """Update the TextEquivs of all higher PAGE-XML hierarchy levels for consistency. - Starting with the hierarchy level chosen for processing, - join all first TextEquiv (by the rules governing the respective level) - into TextEquiv of the next higher level, replacing them. + Starting with the hierarchy level `level`chosen for processing, join all first + TextEquiv (by the rules governing the respective level) into TextEquiv of the next + higher level, replacing them. """ regions = pcgts.get_Page().get_TextRegion() if level != "region": diff --git a/test/test_recognize.py b/test/test_recognize.py index 126ba57..f4e3587 100644 --- a/test/test_recognize.py +++ b/test/test_recognize.py @@ -23,8 +23,9 @@ def page_namespace(tree): """Return the PAGE content namespace used in the given ElementTree. - This relies on the assumption that, in any given PAGE content file, the root element has the local name "PcGts". We - do not check if the files uses any valid PAGE namespace. + This relies on the assumption that, in any given PAGE content file, the root element + has the local name "PcGts". We do not check if the files uses any valid PAGE + namespace. """ root_name = etree.QName(tree.getroot().tag) if root_name.localname == "PcGts": @@ -61,9 +62,10 @@ def workspace(): # The binarization options I have are: # - # a. ocrd_kraken which tries to install cltsm, whose installation is borken on my machine (protobuf) - # b. ocrd_olena which 1. I cannot fully install via pip and 2. whose dependency olena doesn't compile on my - # machine + # a. ocrd_kraken which tries to install cltsm, whose installation is borken on my + # machine (protobuf) + # b. ocrd_olena which 1. I cannot fully install via pip and 2. whose dependency + # olena doesn't compile on my machine # c. just fumble with the original files # # So I'm going for option c. @@ -72,7 +74,8 @@ def workspace(): path = os.path.join(workspace.directory, imgf.local_filename) subprocess.call(["mogrify", "-threshold", "50%", path]) - # Remove GT Words and TextEquivs, to not accidently check GT text instead of the OCR text + # Remove GT Words and TextEquivs, to not accidently check GT text instead of the + # OCR text # XXX Review data again for of in workspace.mets.find_files(fileGrp="OCR-D-GT-SEG-WORD-GLYPH"): workspace.download_file(of) @@ -152,7 +155,9 @@ def test_word_segmentation(workspace): )[0] assert line is not None - # The textline should a. contain multiple words and b. these should concatenate fine to produce the same line text + # The textline should + # a. contain multiple words and + # b. these should concatenate fine to produce the same line text words = line.xpath(".//pc:Word", namespaces=nsmap) assert len(words) >= 2 words_text = " ".join( @@ -162,7 +167,8 @@ def test_word_segmentation(workspace): line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=nsmap)[0].text assert words_text == line_text - # For extra measure, check that we're not seeing any glyphs, as we asked for textequiv_level == "word" + # For extra measure, check that we're not seeing any glyphs, as we asked for + # textequiv_level == "word" glyphs = tree.xpath("//pc:Glyph", namespaces=nsmap) assert len(glyphs) == 0 @@ -174,7 +180,8 @@ def test_glyphs(workspace): output_file_grp="OCR-D-OCR-CALAMARI", parameter={ "checkpoint_dir": CHECKPOINT_DIR, - "textequiv_level": "glyph", # Note that we're going down to glyph level here + # Note that we're going down to glyph level here + "textequiv_level": "glyph", }, ).process() workspace.save_mets() From 5cc13df53aa55b24172083761491433994b4ae9e Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 19 Oct 2023 17:13:46 +0200 Subject: [PATCH 06/10] =?UTF-8?q?=F0=9F=8E=A8=20Use=20f-string=20in=20log.?= =?UTF-8?q?warning()?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/recognize.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 831baa1..6fe36ef 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -235,9 +235,8 @@ def _drop_double_spaces_generator(positions): line_text = "".join(_sort_chars(p)[0].char for p in positions) if line_text != prediction.sentence: log.warning( - "Our own line text is not the same as Calamari's: '%s' != '%s'", - line_text, - prediction.sentence, + f"Our own line text is not the same as Calamari's:" + f"'{line_text}' != '{prediction.sentence}'" ) # Delete existing results From 01aa75ce6b94e8241f743a2da2e83351f6be6346 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 19 Oct 2023 17:14:24 +0200 Subject: [PATCH 07/10] =?UTF-8?q?=F0=9F=8E=A8=20Minor=20reformat=20(Black)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ocrd_calamari/recognize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index 6fe36ef..f18578b 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -36,6 +36,7 @@ from calamari_ocr.ocr import MultiPredictor from calamari_ocr.ocr.voting import voter_from_proto from calamari_ocr.proto import VoterParams + # ruff: isort: on from ocrd_calamari.config import OCRD_TOOL From 8db9bc5d28b1477576f62980acaaa604138808b8 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 19 Oct 2023 17:24:14 +0200 Subject: [PATCH 08/10] =?UTF-8?q?=E2=9A=99=20pre-commit:=20update=20config?= =?UTF-8?q?=20(using=20pre-commit=20autoupdate)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 8 ++++---- ocrd_calamari/recognize.py | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cd67254..cc88c56 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -13,18 +13,18 @@ repos: - id: check-ast - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 23.10.0 hooks: - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.280 + rev: v0.1.0 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.4.1 + rev: v1.6.1 hooks: - id: mypy additional_dependencies: ['types-setuptools'] diff --git a/ocrd_calamari/recognize.py b/ocrd_calamari/recognize.py index f18578b..fb8b580 100644 --- a/ocrd_calamari/recognize.py +++ b/ocrd_calamari/recognize.py @@ -99,7 +99,7 @@ def process(self): assert_file_grp_cardinality(self.input_file_grp, 1) assert_file_grp_cardinality(self.output_file_grp, 1) - for (n, input_file) in enumerate(self.input_files): + for n, input_file in enumerate(self.input_files): page_id = input_file.pageId or input_file.ID log.info("INPUT FILE %i / %s", n, page_id) pcgts = page_from_file(self.workspace.download_file(input_file)) @@ -173,7 +173,6 @@ def process(self): for line, line_coords, raw_results in zip( textlines, line_coordss, raw_results_all ): - for i, p in enumerate(raw_results): p.prediction.id = "fold_{}".format(i) From ca5a95da75147ba9db4b73db697715c7ed9d54e3 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 19 Oct 2023 17:32:21 +0200 Subject: [PATCH 09/10] =?UTF-8?q?=E2=9A=99=20pre-commit:=20test=20pre-comm?= =?UTF-8?q?it-update?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc88c56..09b2e2a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,5 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 - hooks: +- hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-json @@ -11,20 +7,26 @@ repos: - id: check-yaml - id: check-added-large-files - id: check-ast - -- repo: https://github.com/psf/black - rev: 23.10.0 - hooks: + repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 +- hooks: - id: black - -- repo: https://github.com/astral-sh/ruff-pre-commit + repo: https://github.com/psf/black + rev: 23.10.0 +- hooks: + - args: + - --fix + - --exit-non-zero-on-fix + id: ruff + repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.1.0 - hooks: - - id: ruff - args: [--fix, --exit-non-zero-on-fix] - -- repo: https://github.com/pre-commit/mirrors-mypy +- hooks: + - additional_dependencies: + - types-setuptools + id: mypy + repo: https://github.com/pre-commit/mirrors-mypy rev: v1.6.1 - hooks: - - id: mypy - additional_dependencies: ['types-setuptools'] +- hooks: + - id: pre-commit-update + repo: https://gitlab.com/vojko.pribudic/pre-commit-update + rev: v0.1.0 From a1aa6abc730668bb3097b937c94d871589504e80 Mon Sep 17 00:00:00 2001 From: Mike Gerber Date: Thu, 19 Oct 2023 17:44:25 +0200 Subject: [PATCH 10/10] =?UTF-8?q?=F0=9F=90=9B=20Fix=20logic=20error=20in?= =?UTF-8?q?=20fix-calamari1-model?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An earlier change changed comparing types to using isinstance() but got the logic wrong. Correct it. --- ocrd_calamari/fix_calamari1_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_calamari/fix_calamari1_model.py b/ocrd_calamari/fix_calamari1_model.py index 7427abf..4989594 100644 --- a/ocrd_calamari/fix_calamari1_model.py +++ b/ocrd_calamari/fix_calamari1_model.py @@ -24,7 +24,7 @@ def fix_calamari1_model(checkpoint_dir): old_j = deepcopy(j) for v in j["model"].values(): - if isinstance(v, dict): + if not isinstance(v, dict): continue for child in v.get("children", []): for replacement in child.get("replacements", []):