From 695ddf513f6a7a9be40e8d882e3ce018443bfa78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?lm=E5=88=98=E8=8C=97?= Date: Wed, 20 Jan 2021 19:50:53 +0800 Subject: [PATCH] add pagenumber in output json file --- src/main/java/technology/tabula/Table.java | 7 +++++++ src/main/java/technology/tabula/TableWithRulingLines.java | 3 ++- .../tabula/extractors/BasicExtractionAlgorithm.java | 1 + .../tabula/extractors/SpreadsheetExtractionAlgorithm.java | 2 +- src/main/java/technology/tabula/json/TableSerializer.java | 1 + 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/main/java/technology/tabula/Table.java b/src/main/java/technology/tabula/Table.java index c031c9ed..ae98c634 100644 --- a/src/main/java/technology/tabula/Table.java +++ b/src/main/java/technology/tabula/Table.java @@ -23,11 +23,18 @@ public Table(ExtractionAlgorithm extractionAlgorithm) { private int rowCount = 0; private int colCount = 0; + private int pageNumber = 0; /* visible for testing */ final TreeMap cells = new TreeMap<>(); public int getRowCount() { return rowCount; } public int getColCount() { return colCount; } + public int getPageNumber() { + return pageNumber; + } + public void setPageNumber(int number) { + pageNumber = number; + } public String getExtractionMethod() { return extractionMethod; } diff --git a/src/main/java/technology/tabula/TableWithRulingLines.java b/src/main/java/technology/tabula/TableWithRulingLines.java index c119f191..e668bcc8 100644 --- a/src/main/java/technology/tabula/TableWithRulingLines.java +++ b/src/main/java/technology/tabula/TableWithRulingLines.java @@ -14,12 +14,13 @@ public class TableWithRulingLines extends Table { List verticalRulings, horizontalRulings; RectangleSpatialIndex si = new RectangleSpatialIndex<>(); - public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm) { + public TableWithRulingLines(Rectangle area, List cells, List horizontalRulings, List verticalRulings, ExtractionAlgorithm extractionAlgorithm, int number) { super(extractionAlgorithm); this.setRect(area); this.verticalRulings = verticalRulings; this.horizontalRulings = horizontalRulings; this.addCells(cells); + this.setPageNumber(number); } private void addCells(List cells) { diff --git a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java index dcd01695..edcd8916 100644 --- a/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/BasicExtractionAlgorithm.java @@ -65,6 +65,7 @@ public int compare(Ruling arg0, Ruling arg1) { Table table = new Table(this); table.setRect(page.getLeft(), page.getTop(), page.getWidth(), page.getHeight()); + table.setPageNumber(page.getPageNumber()); for (int i = 0; i < lines.size(); i++) { Line line = lines.get(i); diff --git a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java index c377507c..de8da1d7 100644 --- a/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java +++ b/src/main/java/technology/tabula/extractors/SpreadsheetExtractionAlgorithm.java @@ -131,7 +131,7 @@ else if (r.vertical()) { } } - TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this); + TableWithRulingLines t = new TableWithRulingLines(area, overlappingCells, horizontalOverlappingRulings, verticalOverlappingRulings, this, page.getPageNumber()); spreadsheets.add(t); } Utils.sort(spreadsheets, Rectangle.ILL_DEFINED_ORDER); diff --git a/src/main/java/technology/tabula/json/TableSerializer.java b/src/main/java/technology/tabula/json/TableSerializer.java index 2ba20bcd..0d84f60f 100644 --- a/src/main/java/technology/tabula/json/TableSerializer.java +++ b/src/main/java/technology/tabula/json/TableSerializer.java @@ -30,6 +30,7 @@ public JsonElement serialize(Table table, Type type, JsonSerializationContext co json.addProperty("height", table.getHeight()); json.addProperty("right", table.getRight()); json.addProperty("bottom", table.getBottom()); + json.addProperty("page_number", table.getPageNumber()); json.add("data", data); for (List tableRow : table.getRows()) {