Skip to content

Commit

Permalink
from-masks: pick default colordict from parameter, omit initial # sig…
Browse files Browse the repository at this point in the history
…n from colorspecs
  • Loading branch information
bertsky committed Feb 6, 2021
1 parent b8ea41a commit 980b07d
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 10 deletions.
10 changes: 2 additions & 8 deletions ocrd_segment/import_image_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
from ocrd import Processor

from .config import OCRD_TOOL
from .extract_pages import CLASSES

TOOL = 'ocrd-segment-from-masks'

Expand Down Expand Up @@ -71,11 +70,6 @@ def process(self):
assert_file_grp_cardinality(self.output_file_grp, 1)

colordict = self.parameter['colordict']
if not colordict:
LOG.info('Using default PAGE colordict')
colordict = dict(('#' + col, name)
for name, col in CLASSES.items()
if name)
typedict = {"TextRegion": TextTypeSimpleType,
"GraphicRegion": GraphicsTypeSimpleType,
"ChartType": ChartTypeSimpleType}
Expand All @@ -96,9 +90,9 @@ def process(self):
segmentation_pil = Image.open(segmentation_filename)
has_alpha = segmentation_pil.mode == 'RGBA'
if has_alpha:
colorformat = "#%08X"
colorformat = "%08X"
else:
colorformat = "#%06X"
colorformat = "%06X"
if segmentation_pil.mode != 'RGB':
segmentation_pil = segmentation_pil.convert('RGB')
# convert to array
Expand Down
56 changes: 54 additions & 2 deletions ocrd_segment/ocrd-tool.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,60 @@
"parameters": {
"colordict": {
"type": "object",
"default": {},
"description": "Mapping from color values in the input masks to region types to annotate; color must be encoded hexadecimal (e.g. '#00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped colors will be ignored (i.e. treated as background)). Cf. output of ocrd-segment-extract-pages for an example (this is also the default)."
"default": {
"FFFFFF00": "",
"FFFFFFFF": "Border",
"8B4513FF": "TableRegion",
"4682B4FF": "AdvertRegion",
"FF8C00FF": "ChemRegion",
"9400D3FF": "MusicRegion",
"9ACDD2FF": "MapRegion",
"0000FFFF": "TextRegion",
"0000FFFA": "TextRegion:paragraph",
"0000FFF5": "TextRegion:heading",
"0000FFF0": "TextRegion:caption",
"0000FFEB": "TextRegion:header",
"0000FFE6": "TextRegion:footer",
"0000FFE1": "TextRegion:page-number",
"0000FFDC": "TextRegion:drop-capital",
"0000FFD7": "TextRegion:credit",
"0000FFD2": "TextRegion:floating",
"0000FFCD": "TextRegion:signature-mark",
"0000FFC8": "TextRegion:catch-word",
"0000FFC3": "TextRegion:marginalia",
"0000FFBE": "TextRegion:footnote",
"0000FFB9": "TextRegion:footnote-continued",
"0000FFB4": "TextRegion:endnote",
"0000FFAF": "TextRegion:TOC-entry",
"0000FFA5": "TextRegion:list-label",
"0000FFA0": "TextRegion:other",
"800080FF": "ChartRegion",
"800080FA": "ChartRegion:bar",
"800080F5": "ChartRegion:line",
"800080F0": "ChartRegion:pie",
"800080EB": "ChartRegion:scatter",
"800080E6": "ChartRegion:surface",
"800080E1": "ChartRegion:other",
"008000FF": "GraphicRegion",
"008000FA": "GraphicRegion:logo",
"008000F0": "GraphicRegion:letterhead",
"008000EB": "GraphicRegion:decoration",
"008000E6": "GraphicRegion:frame",
"008000E1": "GraphicRegion:handwritten-annotation",
"008000DC": "GraphicRegion:stamp",
"008000D7": "GraphicRegion:signature",
"008000D2": "GraphicRegion:barcode",
"008000CD": "GraphicRegion:paper-grow",
"008000C8": "GraphicRegion:punch-hole",
"008000C3": "GraphicRegion:other",
"00CED1FF": "ImageRegion",
"B8860BFF": "LineDrawingRegion",
"00BFFFFF": "MathsRegion",
"FF0000FF": "NoiseRegion",
"FF00FFFF": "SeparatorRegion",
"646464FF": "UnknownRegion",
"637C81FF": "CustomRegion"},
"description": "Mapping from color values in the input masks to region types to annotate; color must be encoded hexadecimal (e.g. '00FF00'); region type equals the element name in PAGE-XML, optionally followed by a colon and a subtype (e.g. 'TextRegion:paragraph'; unmapped colors will be ignored (i.e. treated as background)). Default is PageViewer color scheme. Cf. colordict.json output and colordict parameter of ocrd-segment-extract-pages."
}
}
},
Expand Down

0 comments on commit 980b07d

Please sign in to comment.