Skip to content

Commit

Permalink
[RELEASE] iText pdfOCR 3.0.2
Browse files Browse the repository at this point in the history
  • Loading branch information
iText-CI committed Jan 30, 2024
2 parents da196c6 + 151cd11 commit 78c4b90
Show file tree
Hide file tree
Showing 136 changed files with 1,204 additions and 183 deletions.
2 changes: 1 addition & 1 deletion pdfocr-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<groupId>com.itextpdf</groupId>
<artifactId>pdfocr-root</artifactId>
<version>3.0.1</version>
<version>3.0.2</version>
</parent>

<artifactId>pdfocr-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.pdfocr;

/**
* OCR properties passed to the OCR engine as part of {@link OcrProcessContext}.
*/
public interface IOcrProcessProperties {
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
311 changes: 251 additions & 60 deletions pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down Expand Up @@ -99,6 +99,11 @@ public class OcrPdfCreatorProperties {

private IMetaInfo metaInfo;

/**
* Indicates whether the created pdf is tagged or not.
*/
private boolean tagged = false;

/**
* Creates a new {@link OcrPdfCreatorProperties} instance.
*/
Expand Down Expand Up @@ -369,6 +374,26 @@ public OcrPdfCreatorProperties setImageRotationHandler(
return this;
}

/**
* Defines whether pdf document should be tagged or not.
*
* @param tagged {@code true} if the result pdf is expected to be tagged, {@code false} otherwise.
* @return this {@link OcrPdfCreatorProperties} instance.
*/
public OcrPdfCreatorProperties setTagged(boolean tagged) {
this.tagged = tagged;
return this;
}

/**
* Retrieve information on whether pdf document should be tagged or not.
*
* @return {@code true} if the result pdf is expected to be tagged, {@code false} otherwise.
*/
public boolean isTagged() {
return tagged;
}

/**
* Set meta info for this {@link OcrPdfCreatorProperties}.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down Expand Up @@ -28,6 +28,8 @@ This file is part of the iText (R) project.
public class OcrProcessContext {
private AbstractPdfOcrEventHelper ocrEventHelper;

private IOcrProcessProperties ocrProcessProperties;

/**
* Creates an instance of ocr process context
*
Expand All @@ -54,4 +56,22 @@ public AbstractPdfOcrEventHelper getOcrEventHelper() {
public void setOcrEventHelper(AbstractPdfOcrEventHelper eventHelper) {
this.ocrEventHelper = eventHelper;
}

/**
* Set extra OCR process properties.
*
* @param ocrProcessProperties extra OCR process properties.
*/
void setOcrProcessProperties(IOcrProcessProperties ocrProcessProperties) {
this.ocrProcessProperties = ocrProcessProperties;
}

/**
* Get extra OCR process properties.
*
* @return extra OCR process properties.
*/
public IOcrProcessProperties getOcrProcessProperties() {
return ocrProcessProperties;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down Expand Up @@ -96,6 +96,7 @@ static float calculateFontSize(final Document document, final String line,

try {
Paragraph paragraph = new Paragraph(line);
paragraph.setMargin(0);
paragraph.setWidth(bbox.getWidth());
paragraph.setFontFamily(fontFamily);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
36 changes: 30 additions & 6 deletions pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand All @@ -23,11 +23,7 @@ This file is part of the iText (R) project.
package com.itextpdf.pdfocr;

import com.itextpdf.kernel.geom.Rectangle;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import com.itextpdf.pdfocr.structuretree.LogicalStructureTreeItem;

/**
* This class describes how recognized text is positioned on the image
Expand All @@ -45,6 +41,11 @@ public class TextInfo {
*/
private Rectangle bboxRect;

/**
* If LogicalStructureTreeItem is set, then {@link TextInfo}s are expected to be in logical order.
*/
private LogicalStructureTreeItem logicalStructureTreeItem;

/**
* Creates a new {@link TextInfo} instance.
*/
Expand Down Expand Up @@ -107,4 +108,27 @@ public Rectangle getBboxRect() {
public void setBboxRect(final Rectangle bbox) {
this.bboxRect = new Rectangle(bbox);
}

/**
* Retrieves structure tree item for the text item.
*
* @return structure tree item.
*/
public LogicalStructureTreeItem getLogicalStructureTreeItem() {
return logicalStructureTreeItem;
}

/**
* Sets logical structure tree parent item for the text info. It allows to organize text chunks
* into logical hierarchy, e.g. specify document paragraphs, tables, etc.
* <p>
*
* If LogicalStructureTreeItem is set, then the list of {@link TextInfo}s in {@link IOcrEngine#doImageOcr}
* return value is expected to be in logical order.
*
* @param logicalStructureTreeItem structure tree item.
*/
public void setLogicalStructureTreeItem(LogicalStructureTreeItem logicalStructureTreeItem) {
this.logicalStructureTreeItem = logicalStructureTreeItem;
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down Expand Up @@ -33,6 +33,7 @@ public class PdfOcrExceptionMessageConstant {
public static final String CANNOT_CREATE_PDF_DOCUMENT = "Cannot create PDF document: {0}";
public static final String STATISTICS_EVENT_TYPE_CANT_BE_NULL = "Statistics event type can't be null";
public static final String STATISTICS_EVENT_TYPE_IS_NOT_DETECTED = "Statistics event type is not detected.";
public static final String TAGGING_IS_NOT_SUPPORTED = "Tagging is not supported by the OCR engine.";

private PdfOcrExceptionMessageConstant() {
//Private constructor will prevent the instantiation of this class directly
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2023 Apryse Group NV
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package com.itextpdf.pdfocr.structuretree;

/**
* This class represents artifact structure tree item. Attaching such item to the text info means that
* the text will be marked as artifact.
*/
public final class ArtifactItem extends LogicalStructureTreeItem {
private final static ArtifactItem ARTIFACT_INSTANCE = new ArtifactItem();

private ArtifactItem() {
super();
}

/**
* Retrieve an instance of {@link ArtifactItem}.
*
* @return an instance of {@link ArtifactItem}.
*/
public static ArtifactItem getInstance() {
return ARTIFACT_INSTANCE;
}
}
Loading

0 comments on commit 78c4b90

Please sign in to comment.