Skip to content

Commit

Permalink
extract-pages: multi-level mask output via plot_segmasks, optional …
Browse files Browse the repository at this point in the history
…alpha compositing via `plot_overlay`
  • Loading branch information
bertsky committed Feb 7, 2021
1 parent 980b07d commit b024b80
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 85 deletions.
247 changes: 164 additions & 83 deletions ocrd_segment/extract_pages.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import absolute_import

import json
import numpy as np
from collections import namedtuple
import os.path
from PIL import Image, ImageDraw
Expand All @@ -14,6 +15,7 @@
assert_file_grp_cardinality,
coordinates_of_segment,
xywh_from_polygon,
polygon_from_bbox,
MIME_TO_EXT
)
from ocrd_modelfactory import page_from_file
Expand All @@ -22,14 +24,17 @@
from .config import OCRD_TOOL

TOOL = 'ocrd-segment-extract-pages'
# region classes and their colours in mask (dbg) images:
# region classes and their colours in mask (pseg) images:
# (from prima-page-viewer/src/org/primaresearch/page/viewer/ui/render/PageContentColors,
# but added alpha channel to also discern subtype, if not visually;
# ordered such that overlaps still allows maximum separation)
# (Not used any more; now as default to ocrd-tool.json parameter.)
# pragma pylint: disable=bad-whitespace
CLASSES = {
'': 'FFFFFF00',
'Glyph': '2E8B08FF',
'Word': 'B22222FF',
'TextLine': '32CD32FF',
'Border': 'FFFFFFFF',
'TableRegion': '8B4513FF',
'AdvertRegion': '4682B4FF',
Expand Down Expand Up @@ -100,24 +105,36 @@ def process(self):
and coordinates relative to the page (which depending on the workflow could
already be cropped, deskewed, dewarped, binarized etc). Extract the image of
the (cropped, deskewed, dewarped) page, both in binarized form (if available)
and non-binarized form. In addition, create a new image with masks for all
regions, color-coded by type. Create two JSON files with region types and
coordinates: one (page-wise) in our custom format and one (global) in MS-COCO.
and non-binarized form. In addition, create a new image with masks for each
segment type in ``plot_segmasks``, color-coded by class according to ``colordict``.
Create two JSON files with region types and coordinates: one (page-wise) in
our custom format and one (global) in MS-COCO.
The output file group may be given as a comma-separated list to separate
these 3 page-level images. Write files as follows:
these 3 kinds of images. If fewer than 3 fileGrps are specified, they will
share the same fileGrp (and directory). In particular, write files as follows:
* in the first (or only) output file group (directory):
- ID + '.png': raw image of the (preprocessed) page
- ID + '.json': region coordinates/classes (custom format)
* in the second (or first) output file group (directory):
* in the second (or only) output file group (directory):
- ID + '.bin.png': binarized image of the (preprocessed) page, if available
* in the third (or first) output file group (directory):
- ID + '.dbg.png': debug image
* in the third (or second or only) output file group (directory):
- ID + '.pseg.png': mask image of page; contents depend on ``plot_segmasks``:
1. if it contains `page`, fill page frame,
2. if it contains `region`, fill region segmentation/classification,
3. if it contains `line`, fill text line segmentation,
4. if it contains `word`, fill word segmentation,
5. if it contains `glyph`, fill glyph segmentation,
where each follow-up layer and segment draws over the previous state, starting
with a blank (white) image - unless ``plot_overlay`` is true, in which case
each layer and segment is superimposed (alpha blended) onto the previous one,
starting with the raw image.
In addition, write a file for all pages at once:
* in the third (or first) output file group (directory):
- output_file_grp + '.coco.json': region coordinates/classes (MS-COCO format)
- output_file_grp + '.colordict.json': color definitions (as in PAGE viewer)
- output_file_grp + '.colordict.json': the used ``colordict``
(This is intended for training and evaluation of region segmentation models.)
"""
Expand All @@ -127,10 +144,10 @@ def process(self):
if len(file_groups) > 3:
raise Exception("at most 3 output file grps allowed (raw, [binarized, [mask]] image)")
if len(file_groups) > 2:
dbg_image_grp = file_groups[2]
mask_image_grp = file_groups[2]
else:
dbg_image_grp = file_groups[0]
LOG.info("No output file group for debug images specified, falling back to output filegrp '%s'", dbg_image_grp)
mask_image_grp = file_groups[0]
LOG.info("No output file group for mask images specified, falling back to output filegrp '%s'", mask_image_grp)
if len(file_groups) > 1:
bin_image_grp = file_groups[1]
else:
Expand Down Expand Up @@ -199,73 +216,78 @@ def process(self):
LOG.warning('Page "%s" has no binarized images, skipping .bin', page_id)
else:
raise
page_image_dbg = Image.new(mode='RGBA', size=page_image.size,
color='#' + classes[''])
if 'Border' not in classes:
pass
elif page.get_Border():
polygon = coordinates_of_segment(
page.get_Border(), page_image, page_coords).tolist()
ImageDraw.Draw(page_image_dbg).polygon(
list(map(tuple, polygon)),
fill='#' + classes['Border'])
# init multi-level mask output
if self.parameter['plot_overlay']:
page_image_segmask = page_image.convert('RGBA')
else:
page_image_segmask = Image.new(mode='RGBA',
size=page_image.size,
color='#FFFFFF00')
neighbors = dict()
for level in ['page', 'region', 'line', 'word', 'glyph']:
neighbors[level] = list()
# produce border mask plot, if necessary
if page.get_Border():
poly = segment_poly(page_id, page.get_Border(), page_coords)
else:
page_image_dbg.paste('#' + classes['Border'],
(0, 0, page_image.width, page_image.height))
poly = Polygon(polygon_from_bbox(0, 0, page_image.width, page_image.height))
if 'page' in self.parameter['plot_segmasks']:
plot_segment(page_id, page.get_Border(), poly, 'Border', classes,
page_image_segmask, [], self.parameter['plot_overlay'])
# get regions and aggregate masks on all hierarchy levels
description = {'angle': page.get_orientation()}
regions = dict()
for name in classes.keys():
if not name or name == 'Border' or ':' in name:
# no subtypes here
if not name or not name.endswith('Region'):
# no region subtypes or non-region types here
continue
regions[name] = getattr(page, 'get_' + name)()
description = {'angle': page.get_orientation()}
Neighbor = namedtuple('Neighbor', ['id', 'poly', 'type'])
neighbors = []
#regions[name] = getattr(page, 'get_' + name)()
regions[name] = page.get_AllRegions(classes=name[:-6], order='reading-order')
for rtype, rlist in regions.items():
for region in rlist:
if rtype in ['TextRegion', 'ChartRegion', 'GraphicRegion']:
subrtype = region.get_type()
else:
subrtype = None
polygon = coordinates_of_segment(
region, page_image, page_coords)
polygon2 = polygon.reshape(1, -1).tolist()
polygon = polygon.tolist()
xywh = xywh_from_polygon(polygon)
# validate coordinates and check intersection with neighbours
# (which would melt into another in the mask image):
try:
poly = Polygon(polygon)
reason = ''
except ValueError as err:
reason = err
if not poly.is_valid:
reason = explain_validity(poly)
elif poly.is_empty:
reason = 'is empty'
elif poly.bounds[0] < 0 or poly.bounds[1] < 0:
reason = 'is negative'
elif poly.length < 4:
reason = 'has too few points'
if reason:
LOG.error('Page "%s" region "%s" %s',
page_id, region.id, reason)
if subrtype:
rtype0 = rtype + ':' + subrtype
else:
rtype0 = rtype
poly = segment_poly(page_id, region, page_coords)
# produce region mask plot, if necessary
if 'region' in self.parameter['plot_segmasks']:
plot_segment(page_id, region, poly, rtype0, classes,
page_image_segmask, neighbors['region'],
self.parameter['plot_overlay'])
if rtype == 'TextRegion':
lines = region.get_TextLine()
for line in lines:
# produce line mask plot, if necessary
poly = segment_poly(page_id, line, page_coords)
if 'line' in self.parameter['plot_segmasks']:
plot_segment(page_id, line, poly, 'TextLine', classes,
page_image_segmask, neighbors['line'],
self.parameter['plot_overlay'])
words = line.get_Word()
for word in words:
# produce line mask plot, if necessary
poly = segment_poly(page_id, word, page_coords)
if 'word' in self.parameter['plot_segmasks']:
plot_segment(page_id, word, poly, 'Word', classes,
page_image_segmask, neighbors['word'],
self.parameter['plot_overlay'])
glyphs = word.get_Glyph()
for glyph in glyphs:
# produce line mask plot, if necessary
poly = segment_poly(page_id, glyph, page_coords)
if 'glyph' in self.parameter['plot_segmasks']:
plot_segment(page_id, glyph, poly, 'Glyph', classes,
page_image_segmask, neighbors['glyph'],
self.parameter['plot_overlay'])
if not poly:
continue
poly_prep = prep(poly)
for neighbor in neighbors:
if (rtype == neighbor.type and
poly_prep.intersects(neighbor.poly) and
poly.intersection(neighbor.poly).area > 0):
LOG.warning('Page "%s" region "%s" intersects neighbour "%s" (IoU: %.3f)',
page_id, region.id, neighbor.id,
poly.intersection(neighbor.poly).area / \
poly.union(neighbor.poly).area)
elif (rtype != neighbor.type and
poly_prep.within(neighbor.poly)):
LOG.warning('Page "%s" region "%s" within neighbour "%s" (IoU: %.3f)',
page_id, region.id, neighbor.id,
poly.area / neighbor.poly.area)
neighbors.append(Neighbor(region.id, poly, rtype))
polygon = np.array(poly.exterior, np.int)[:-1].tolist()
xywh = xywh_from_polygon(polygon)
area = poly.area
description.setdefault('regions', []).append(
{ 'type': rtype,
Expand All @@ -280,29 +302,25 @@ def process(self):
'file_grp': self.input_file_grp,
'METS.UID': self.workspace.mets.unique_identifier
})
# draw region:
ImageDraw.Draw(page_image_dbg).polygon(
list(map(tuple, polygon)),
fill='#' + classes[(rtype + ':' + subrtype) if subrtype else rtype])
# COCO: add annotations
i += 1
annotations.append(
{'id': i, 'image_id': num_page_id,
'category_id': next((cat['id'] for cat in categories if cat['name'] == subrtype),
next((cat['id'] for cat in categories if cat['name'] == rtype))),
'segmentation': polygon2,
'segmentation': np.array(poly.exterior, np.int)[:-1].reshape(1, -1).tolist(),
'area': area,
'bbox': [xywh['x'], xywh['y'], xywh['w'], xywh['h']],
'iscrowd': 0})
self.workspace.save_image_file(page_image_dbg,
file_id + '.dbg',
dbg_image_grp,

self.workspace.save_image_file(page_image_segmask,
file_id + '.pseg',
mask_image_grp,
page_id=page_id,
mimetype=self.parameter['mimetype'])
self.workspace.add_file(
ID=file_id + '.json',
file_grp=dbg_image_grp,
file_grp=mask_image_grp,
pageId=input_file.pageId,
local_filename=file_path.replace(MIME_TO_EXT[self.parameter['mimetype']], '.json'),
mimetype='application/json',
Expand All @@ -321,12 +339,12 @@ def process(self):
'height': page_image.height})

# COCO: write result
file_id = dbg_image_grp + '.coco.json'
LOG.info('Writing COCO result file "%s" in "%s"', file_id, dbg_image_grp)
file_id = mask_image_grp + '.coco.json'
LOG.info('Writing COCO result file "%s" in "%s"', file_id, mask_image_grp)
self.workspace.add_file(
ID=file_id,
file_grp=dbg_image_grp,
local_filename=os.path.join(dbg_image_grp, file_id),
file_grp=mask_image_grp,
local_filename=os.path.join(mask_image_grp, file_id),
mimetype='application/json',
pageId=None,
content=json.dumps(
Expand All @@ -335,10 +353,73 @@ def process(self):
'annotations': annotations}))

# write inverse colordict (for ocrd-segment-from-masks)
file_id = dbg_image_grp + '.colordict.json'
file_id = mask_image_grp + '.colordict.json'
LOG.info('Writing colordict file "%s" in .', file_id)
with open(file_id, 'w') as out:
json.dump(dict((col, name)
for name, col in classes.items()
if name),
out)

def segment_poly(page_id, segment, coords):
LOG = getLogger('processor.ExtractPages')
polygon = coordinates_of_segment(segment, None, coords)
# validate coordinates
try:
poly = Polygon(polygon)
reason = ''
if not poly.is_valid:
reason = explain_validity(poly)
elif poly.is_empty:
reason = 'is empty'
elif poly.bounds[0] < 0 or poly.bounds[1] < 0:
reason = 'is negative'
elif poly.length < 4:
reason = 'has too few points'
except ValueError as err:
reason = err
if reason:
tag = segment.__class__.__name__.replace('Type', '')
if tag != 'Border':
tag += ' "%s"' % segment.id
LOG.error('Page "%s" %s %s', page_id, tag, reason)
return None
return poly

def plot_segment(page_id, segment, poly, stype, classes, image, neighbors, alpha=False):
if not poly:
return
if stype not in classes:
LOG.error('mask plots requested, but "colordict" does not contain a "%s" mapping', stype)
return
color = classes[stype]
Neighbor = namedtuple('Neighbor', ['id', 'poly', 'type'])
LOG = getLogger('processor.ExtractPages')
# check intersection with neighbours
# (which would melt into another in the mask image)
if segment and hasattr(segment, 'id') and not alpha:
poly_prep = prep(poly)
for neighbor in neighbors:
if (stype == neighbor.type and
poly_prep.intersects(neighbor.poly) and
poly.intersection(neighbor.poly).area > 0):
inter = poly.intersection(neighbor.poly).area
union = poly.union(neighbor.poly).area
LOG.warning('Page "%s" segment "%s" intersects neighbour "%s" (IoU: %.3f)',
page_id, segment.id, neighbor.id, inter / union)
elif (stype != neighbor.type and
poly_prep.within(neighbor.poly)):
LOG.warning('Page "%s" segment "%s" within neighbour "%s" (IoU: %.3f)',
page_id, segment.id, neighbor.id,
poly.area / neighbor.poly.area)
neighbors.append(Neighbor(segment.id, poly, stype))
# draw segment
if alpha:
layer = Image.new(mode='RGBA', size=image.size, color='#FFFFFF00')
ImageDraw.Draw(layer).polygon(list(map(tuple, poly.exterior.coords[:-1])),
fill='#' + color[:6] + '1E',
outline='#' + color[:6] + '96')
image.alpha_composite(layer)
else:
ImageDraw.Draw(image).polygon(list(map(tuple, poly.exterior.coords[:-1])),
fill='#' + color)
Loading

0 comments on commit b024b80

Please sign in to comment.