Skip to content

Commit

Permalink
Changed StructureImageExtractor to throw IOException not all Exceptio…
Browse files Browse the repository at this point in the history
…n, moved some classes out of main package. Added some javadoc
  • Loading branch information
dkatzel-ncats committed Jun 14, 2019
1 parent 68a9632 commit b97d1df
Show file tree
Hide file tree
Showing 23 changed files with 579 additions and 547 deletions.
882 changes: 441 additions & 441 deletions reports/testSet1MolVec.txt

Large diffs are not rendered by default.

40 changes: 36 additions & 4 deletions src/main/java/gov/nih/ncats/molvec/Molvec.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,58 @@

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;

import gov.nih.ncats.molvec.algo.StructureImageExtractor;

public class Molvec {
/**
*
*/
public final class Molvec {

public static String ocr(File image) throws Exception{
/**
* Analyze the given image and try to recognize a molecular structure.
* @param image the image to analyze, can not be null.
* @return a String encoded in mol format of the recognized molecular structure.
* @throws IOException if there are any problems parsing the images.
* @throws NullPointerException if image is null.
*/
public static String ocr(File image) throws IOException{
checkNotNull(image);
StructureImageExtractor sie = new StructureImageExtractor(image);
String mol = sie.getCtab().toMol();
return mol;

}

public static String ocr(byte[] image) throws Exception{
private static void checkNotNull(Object obj){
Objects.requireNonNull(obj, "image can not be null");
}
/**
* Analyze the given image encoded data as a bytre array and try to recognize a molecular structure.
* @param image the image to analyze, can not be null.
* @return a String encoded in mol format of the recognized molecular structure.
* @throws IOException if there are any problems parsing the images.
* @throws NullPointerException if image is null.
*/
public static String ocr(byte[] image) throws IOException{
checkNotNull(image);
StructureImageExtractor sie = new StructureImageExtractor(image);
return sie.getCtab().toMol();

}
public static String ocr(BufferedImage image) throws Exception{
/**
* Analyze the given image and try to recognize a molecular structure.
* @param image the image to analyze, can not be null.
* @return a String encoded in mol format of the recognized molecular structure.
* @throws IOException if there are any problems parsing the images.
* @throws NullPointerException if image is null.
*/
public static String ocr(BufferedImage image) throws IOException{
checkNotNull(image);
StructureImageExtractor sie = StructureImageExtractor.createFromImage(image);
return sie.getCtab().toMol();

Expand Down
4 changes: 1 addition & 3 deletions src/main/java/gov/nih/ncats/molvec/algo/BranchNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import javax.swing.tree.TreeNode;

import gov.nih.ncats.molvec.CachedSupplier;
import gov.nih.ncats.molvec.util.CachedSupplier;


class BranchNode{
Expand Down
142 changes: 82 additions & 60 deletions src/main/java/gov/nih/ncats/molvec/algo/StructureImageExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
import java.awt.Graphics2D;
import java.awt.RenderingHints;
import java.awt.Shape;
import java.awt.geom.AffineTransform;
import java.awt.geom.Line2D;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.awt.geom.*;
import java.awt.image.BufferedImage;
import java.awt.image.ColorConvertOp;
import java.awt.image.ColorModel;
Expand Down Expand Up @@ -42,9 +39,9 @@

import javax.imageio.ImageIO;

import gov.nih.ncats.molvec.Bitmap;
import gov.nih.ncats.molvec.Bitmap.WedgeInfo;
import gov.nih.ncats.molvec.CachedSupplier;
import gov.nih.ncats.molvec.image.Bitmap;
import gov.nih.ncats.molvec.image.Bitmap.WedgeInfo;
import gov.nih.ncats.molvec.util.CachedSupplier;
import gov.nih.ncats.molvec.image.Binarization;
import gov.nih.ncats.molvec.image.binarization.LeastPopulatedThreshold;
import gov.nih.ncats.molvec.image.binarization.SigmaThreshold;
Expand Down Expand Up @@ -280,12 +277,16 @@ public List<Shape> getRescueOCRShapes() {
* @return
* @throws IOException
*/
public static StructureImageExtractor createFromImage(BufferedImage bufferedImage)throws Exception{
public static StructureImageExtractor createFromImage(BufferedImage bufferedImage)throws IOException{
BufferedImage img = bufferedImage;
if(BufferedImage.TYPE_BYTE_GRAY != bufferedImage.getType()){
img = toGrayScale(bufferedImage);
}
return new StructureImageExtractor(img.getRaster());
try {
return new StructureImageExtractor(img.getRaster());
}catch(InterruptedException e){
throw new IOException("interrupted", e);
}

}

Expand All @@ -312,51 +313,62 @@ private static BufferedImage toGrayScale(BufferedImage image){

/**
* Create a new {@link StructureImageExtractor}, using a given {@link Raster}.
* @param the raster to be processed
* @param raster the raster to be processed
* @throws Exception
*/
public StructureImageExtractor(Raster raster)throws Exception{
public StructureImageExtractor(Raster raster)throws IOException, InterruptedException{
this(raster, false);
}

/**
* Create a new {@link StructureImageExtractor}, using a given {@link Raster}, if debug is specified,
* debug information will be printed to standard out, and the connection table steps will be preserved
* which can be obtained via {@link #getCtabRaw()}.
* @param the raster to be processed
* @param if true, print debug information to standard out
* @param raster the raster to be processed
* @param debug if true, print debug information to standard out
* @throws Exception
*/
public StructureImageExtractor(Raster raster, boolean debug )throws Exception{
public StructureImageExtractor(Raster raster, boolean debug )throws IOException{
this.DEBUG = debug;

try{
load(Bitmap.createBitmap(raster,DEF_BINARIZATION).clean(), true);
}catch(ImageTooSmallException e){
File bi= stdResize(raster,3);
load(bitmap = Bitmap.read(bi,RESIZE_BINARIZATION).clean(),false);
}catch(ImageTooSpottyException e){
try{
load(Bitmap.createBitmap(raster,TOO_WASHED_BINARIZATION).clean(), false);
}catch(ImageTooSmallException ex){
File bi= stdResize(raster,3);
load(bitmap = Bitmap.read(bi,RESIZE_BINARIZATION).clean(),false);
try {
try {
load(Bitmap.createBitmap(raster, DEF_BINARIZATION).clean(), true);
} catch (ImageTooSmallException e) {
File bi = stdResize(raster, 3);
load(bitmap = Bitmap.read(bi, RESIZE_BINARIZATION).clean(), false);
} catch (ImageTooSpottyException e) {
try {
load(Bitmap.createBitmap(raster, TOO_WASHED_BINARIZATION).clean(), false);
} catch (ImageTooSmallException ex) {
File bi = stdResize(raster, 3);
load(bitmap = Bitmap.read(bi, RESIZE_BINARIZATION).clean(), false);
}
}
}catch(InterruptedException e){
throw new IOException("interrupted", e);
}
}
public StructureImageExtractor(byte[] file, boolean debug) throws Exception{
public StructureImageExtractor(byte[] file, boolean debug) throws IOException{
this.DEBUG=debug;
load(file);
try {
load(file);
}catch(InterruptedException e){
throw new IOException("interrupted", e);
}
}
public StructureImageExtractor(File file, boolean debug) throws Exception{
public StructureImageExtractor(File file, boolean debug) throws IOException{
this.DEBUG=debug;
load(file);
try{
load(file);
}catch(InterruptedException e){
throw new IOException("interrupted", e);
}
}

public StructureImageExtractor(byte[] file) throws Exception{
public StructureImageExtractor(byte[] file) throws IOException{
this(file,false);
}
public StructureImageExtractor(File file) throws Exception{
public StructureImageExtractor(File file) throws IOException{
this(file,false);
}

Expand Down Expand Up @@ -435,7 +447,7 @@ private static CharType computeCharType(Tuple<Character,Number> tup){



private void processOCR(SCOCR socr, List<ShapeWrapper> polygons,Bitmap bitmap, Bitmap thin, BiConsumer<ShapeWrapper,List<Tuple<Character,Number>>> onFind) throws Exception{
private void processOCR(SCOCR socr, List<ShapeWrapper> polygons,Bitmap bitmap, Bitmap thin, BiConsumer<ShapeWrapper,List<Tuple<Character,Number>>> onFind) throws InterruptedException{

boolean[] interupt=new boolean[]{false};
/*
Expand Down Expand Up @@ -621,7 +633,9 @@ private void processOCR(SCOCR socr, List<ShapeWrapper> polygons,Bitmap bitmap, B
}
});

if(interupt[0])throw new InterruptedException();
if(interupt[0]){
throw new InterruptedException();
}

polygons.removeAll(toRemoveShapes);
polygons.addAll(toAddShapes);
Expand Down Expand Up @@ -818,19 +832,19 @@ private void processOCRShape(SCOCR socr, ShapeWrapper inputShape, Bitmap bitmap,
* very small structure images.
* @author tyler
*/
private class ImageTooSmallException extends Exception{}
private class ImageTooSmallException extends IOException{}

/**
* Thrown to signify that the supplied image has characteristics expected from
* a split/spotty thresholding or very washed image.
* @author tyler
*/
private class ImageTooSpottyException extends Exception{}
private class ImageTooSpottyException extends IOException{}




private void load(byte[] file) throws Exception{
private void load(byte[] file) throws IOException, InterruptedException{
try{
load(bitmap = Bitmap.read(file,DEF_BINARIZATION).clean(), true);
}catch(ImageTooSmallException e){
Expand All @@ -846,7 +860,7 @@ private void load(byte[] file) throws Exception{
}

}
private void load(File file) throws Exception{
private void load(File file) throws IOException, InterruptedException{
try{
load(bitmap = Bitmap.read(file,DEF_BINARIZATION).clean(),true);
}catch(ImageTooSmallException e){
Expand Down Expand Up @@ -1105,7 +1119,7 @@ private void rescueOCR(List<LineWrapper> lines, List<ShapeWrapper> polygons, Set
});
}

private void load(Bitmap aBitMap, boolean allowThresholdTooLowThrow) throws Exception{
private void load(Bitmap aBitMap, boolean allowThresholdTooLowThrow) throws IOException, InterruptedException{


List<Shape> realRescueOCRCandidates = Collections.synchronizedList(new ArrayList<>());
Expand All @@ -1128,17 +1142,17 @@ private void load(Bitmap aBitMap, boolean allowThresholdTooLowThrow) throws Exce
thin = bitmap.thin();
boolean blurred=false;

{
List<int[]> hollow =thin.findHollowPoints();
if(hollow.size()> 0.002*thin.fractionPixelsOn()*thin.width()*thin.height()){
bitmap=new Bitmap.BitmapBuilder(bitmap).boxBlur(1).threshold(2).build();
thin=bitmap.thin();
blurred=true;
}

List<int[]> hollow =thin.findHollowPoints();

if(hollow.size()> 0.002*thin.fractionPixelsOn()*thin.width()*thin.height()){
bitmap=new Bitmap.BitmapBuilder(bitmap).boxBlur(1).threshold(2).build();
thin=bitmap.thin();
blurred=true;
}


}

// Bitmap bitmap2=new Bitmap.BitmapBuilder(bitmap).boxBlur(1).threshold(1).build();
polygons = bitmap.connectedComponents(Bitmap.Bbox.DoublePolygon)
.stream()
Expand Down Expand Up @@ -1238,7 +1252,7 @@ private void load(Bitmap aBitMap, boolean allowThresholdTooLowThrow) throws Exce
// segments are generated for thinned bitmap only, since
// it can quite noisy on normal bitmap!
if (isLarge) {
throw new IllegalStateException("Cannot support images with over 4000 polygons at this time");
throw new IOException("Cannot support images with over 4000 polygons at this time");
}

Set<ShapeWrapper> likelyOCR= Collections.synchronizedSet(new LinkedHashSet<>());
Expand Down Expand Up @@ -5762,22 +5776,30 @@ private void load(Bitmap aBitMap, boolean allowThresholdTooLowThrow) throws Exce
offX+=ddx;
offY+=ddy;
offsum++;
Point2D pp1=at.inverseTransform(new Point2D.Double(px, py), null);
updates.add(Tuple.of(n,pp1));
try {
Point2D pp1 = at.inverseTransform(new Point2D.Double(px, py), null);
updates.add(Tuple.of(n, pp1));
}catch (NoninvertibleTransformException nit){
throw new IOException("error inverting point", nit);
}
}
}


if(updates.size()>5){
Point2D poff1=at.inverseTransform(new Point2D.Double(0, 0), null);
Point2D poff2=at.inverseTransform(new Point2D.Double(offX/offsum, offY/offsum), null);
double fudgex = poff2.getX()-poff1.getX();
double fudgey = poff2.getY()-poff1.getY();

changeList.add(updates.stream()
.map(Tuple.vmap(p1->(Point2D)new Point2D.Double(p1.getX()+fudgex, p1.getY()+fudgey)))
.collect(Collectors.toList())
);
try{
Point2D poff1=at.inverseTransform(new Point2D.Double(0, 0), null);
Point2D poff2=at.inverseTransform(new Point2D.Double(offX/offsum, offY/offsum), null);
double fudgex = poff2.getX()-poff1.getX();
double fudgey = poff2.getY()-poff1.getY();

changeList.add(updates.stream()
.map(Tuple.vmap(p1->(Point2D)new Point2D.Double(p1.getX()+fudgex, p1.getY()+fudgey)))
.collect(Collectors.toList())
);
}catch (NoninvertibleTransformException nit){
throw new IOException("error inverting point", nit);
}
}
}

Expand Down
1 change: 0 additions & 1 deletion src/main/java/gov/nih/ncats/molvec/image/Binarization.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import java.util.function.Consumer;
import java.util.function.Predicate;

import gov.nih.ncats.molvec.Bitmap;
import gov.nih.ncats.molvec.image.binarization.ImageStats;

public interface Binarization {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package gov.nih.ncats.molvec;
package gov.nih.ncats.molvec.image;

import java.awt.Point;
import java.awt.Polygon;
Expand Down Expand Up @@ -26,23 +26,18 @@
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Stack;
import java.util.function.BiFunction;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand All @@ -52,14 +47,11 @@

import javax.imageio.ImageIO;

import gov.nih.ncats.molvec.util.CachedSupplier;
import gov.nih.ncats.molvec.algo.StructureImageExtractor;
import gov.nih.ncats.molvec.algo.Tuple;
import gov.nih.ncats.molvec.image.Binarization;
import gov.nih.ncats.molvec.image.ImageUtil;
import gov.nih.ncats.molvec.image.TiffTags;
import gov.nih.ncats.molvec.image.binarization.AdaptiveThreshold;
import gov.nih.ncats.molvec.image.binarization.ImageStats;
import gov.nih.ncats.molvec.ui.RasterBasedCosineSCOCR.RasterChar;
import gov.nih.ncats.molvec.util.GeomUtil;
import gov.nih.ncats.molvec.util.GeomUtil.LineDistanceCalculator;
import gov.nih.ncats.molvec.util.GeomUtil.LineWrapper;
Expand Down
Loading

0 comments on commit b97d1df

Please sign in to comment.