From c888fb2f2581ab22e703d8d5afa4dbfb637b2957 Mon Sep 17 00:00:00 2001 From: Romuald Rousseau Date: Sat, 21 Sep 2024 18:17:18 +0800 Subject: [PATCH 1/6] feat: Add sheet name as configurable --- .../any2json/loader/csv/CsvDocument.java | 10 +++++----- .../any2json/loader/dbf/DbfDocument.java | 10 +++++----- .../any2json/loader/excel/xls/XlsDocument.java | 2 +- .../any2json/loader/excel/xlsx/XlsxDocument.java | 2 +- .../any2json/loader/excel/xml/XmlDocument.java | 2 +- .../any2json/loader/parquet/ParquetDocument.java | 6 +++--- .../any2json/loader/pdf/PdfDocument.java | 10 +++++----- .../com/github/romualdrousseau/any2json/Document.java | 2 +- .../romualdrousseau/any2json/DocumentFactory.java | 10 +++++----- 9 files changed, 27 insertions(+), 27 deletions(-) diff --git a/any2json-csv/src/main/java/com/github/romualdrousseau/any2json/loader/csv/CsvDocument.java b/any2json-csv/src/main/java/com/github/romualdrousseau/any2json/loader/csv/CsvDocument.java index f53c5f3d..9aa6d06b 100644 --- a/any2json-csv/src/main/java/com/github/romualdrousseau/any2json/loader/csv/CsvDocument.java +++ b/any2json-csv/src/main/java/com/github/romualdrousseau/any2json/loader/csv/CsvDocument.java @@ -28,16 +28,17 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File txtFile, final String encoding, final String password) { + public boolean open(final File txtFile, final String encoding, final String password, final String sheetName) { if (txtFile == null) { throw new IllegalArgumentException(); } this.sheet = null; - if (encoding != null && this.openWithEncoding(txtFile, encoding)) { + final var sheetName2 = (sheetName == null) ? Disk.removeExtension(txtFile.getName()) : sheetName; + if (encoding != null && this.openWithEncoding(txtFile, encoding, sheetName2)) { return true; - } else if (this.openWithEncoding(txtFile, "UTF-8")) { + } else if (this.openWithEncoding(txtFile, "UTF-8", sheetName2)) { return true; } else { this.close(); @@ -82,13 +83,12 @@ public void autoRecipe(final BaseSheet sheet) { } } - private boolean openWithEncoding(final File txtFile, final String encoding) { + private boolean openWithEncoding(final File txtFile, final String encoding, final String sheetName) { try { final var reader = new BufferedReader(new InputStreamReader(new FileInputStream(txtFile), encoding)); if (encoding.startsWith("UTF-")) { this.processUtfBOM(reader); } - final var sheetName = Disk.removeExtension(txtFile.getName()); this.sheet = new CsvSheet(sheetName, reader); this.sheet.checkDataEncoding(); return true; diff --git a/any2json-dbf/src/main/java/com/github/romualdrousseau/any2json/loader/dbf/DbfDocument.java b/any2json-dbf/src/main/java/com/github/romualdrousseau/any2json/loader/dbf/DbfDocument.java index 7899bb19..f386f6c3 100644 --- a/any2json-dbf/src/main/java/com/github/romualdrousseau/any2json/loader/dbf/DbfDocument.java +++ b/any2json-dbf/src/main/java/com/github/romualdrousseau/any2json/loader/dbf/DbfDocument.java @@ -28,7 +28,7 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File dbfFile, final String encoding, final String password) { + public boolean open(final File dbfFile, final String encoding, final String password, final String sheetName) { if (dbfFile == null) { throw new IllegalArgumentException(); } @@ -39,9 +39,10 @@ public boolean open(final File dbfFile, final String encoding, final String pass return false; } - if (encoding != null && this.openWithEncoding(dbfFile, encoding)) { + final var sheetName2 = (sheetName == null) ? Disk.removeExtension(dbfFile.getName()) : sheetName; + if (encoding != null && this.openWithEncoding(dbfFile, encoding, sheetName2)) { return true; - } else if (this.openWithEncoding(dbfFile, "ISO-8859-1")) { + } else if (this.openWithEncoding(dbfFile, "ISO-8859-1", sheetName2)) { return true; } else { this.close(); @@ -82,10 +83,9 @@ public Sheet getSheetAt(final int i) { public void autoRecipe(final BaseSheet sheet) { } - private boolean openWithEncoding(final File dbfFile, final String encoding) { + private boolean openWithEncoding(final File dbfFile, final String encoding, final String sheetName) { try { final var reader = new DBFReader(new FileInputStream(dbfFile), Charset.forName(encoding)); - final var sheetName = Disk.removeExtension(dbfFile.getName()); this.sheet = new DbfSheet(sheetName, reader); return true; } catch (final IOException | UnsupportedCharsetException x) { diff --git a/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xls/XlsDocument.java b/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xls/XlsDocument.java index 896fdbb8..26ecc0d2 100644 --- a/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xls/XlsDocument.java +++ b/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xls/XlsDocument.java @@ -33,7 +33,7 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File excelFile, final String encoding, final String password) { + public boolean open(final File excelFile, final String encoding, final String password, final String sheetName) { if (excelFile == null) { throw new IllegalArgumentException(); } diff --git a/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xlsx/XlsxDocument.java b/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xlsx/XlsxDocument.java index db409718..fe83e4df 100644 --- a/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xlsx/XlsxDocument.java +++ b/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xlsx/XlsxDocument.java @@ -41,7 +41,7 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File excelFile, final String encoding, final String password) { + public boolean open(final File excelFile, final String encoding, final String password, final String sheetName) { if (excelFile == null) { throw new IllegalArgumentException(); } diff --git a/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xml/XmlDocument.java b/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xml/XmlDocument.java index a4ade648..b1e4a712 100644 --- a/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xml/XmlDocument.java +++ b/any2json-excel/src/main/java/com/github/romualdrousseau/any2json/loader/excel/xml/XmlDocument.java @@ -37,7 +37,7 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File excelFile, final String encoding, final String password) { + public boolean open(final File excelFile, final String encoding, final String password, final String sheetName) { if (excelFile == null) { throw new IllegalArgumentException(); } diff --git a/any2json-parquet/src/main/java/com/github/romualdrousseau/any2json/loader/parquet/ParquetDocument.java b/any2json-parquet/src/main/java/com/github/romualdrousseau/any2json/loader/parquet/ParquetDocument.java index bc9c5459..6252cc1b 100644 --- a/any2json-parquet/src/main/java/com/github/romualdrousseau/any2json/loader/parquet/ParquetDocument.java +++ b/any2json-parquet/src/main/java/com/github/romualdrousseau/any2json/loader/parquet/ParquetDocument.java @@ -32,7 +32,7 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File parquetFile, final String encoding, final String password) { + public boolean open(final File parquetFile, final String encoding, final String password, final String sheetName) { if (parquetFile == null) { throw new IllegalArgumentException(); } @@ -48,8 +48,8 @@ public boolean open(final File parquetFile, final String encoding, final String final var config = new Configuration(); final var file = HadoopInputFile.fromPath(path, config); final var reader = AvroParquetReader.builder(file).disableCompatibility().build(); - final var sheetName = Disk.removeExtension(parquetFile.getName()); - this.sheet = new ParquetSheet(sheetName, reader); + final var sheetName2 = (sheetName == null) ? Disk.removeExtension(parquetFile.getName()) : sheetName; + this.sheet = new ParquetSheet(sheetName2, reader); return true; } catch (IOException x) { this.close(); diff --git a/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfDocument.java b/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfDocument.java index 625a347f..5bfcc697 100644 --- a/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfDocument.java +++ b/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfDocument.java @@ -31,7 +31,7 @@ protected EnumSet getIntelliCapabilities() { } @Override - public boolean open(final File pdfFile, final String encoding, final String password) { + public boolean open(final File pdfFile, final String encoding, final String password, final String sheetName) { if (pdfFile == null) { throw new IllegalArgumentException(); } @@ -41,9 +41,10 @@ public boolean open(final File pdfFile, final String encoding, final String pass return false; } - if (encoding != null && this.openWithEncoding(pdfFile, encoding)) { + final var sheetName2 = (sheetName == null) ? Disk.removeExtension(pdfFile.getName()) : sheetName; + if (encoding != null && this.openWithEncoding(pdfFile, encoding, sheetName2)) { return true; - } else if (this.openWithEncoding(pdfFile, "ISO-8859-1")) { + } else if (this.openWithEncoding(pdfFile, "ISO-8859-1", sheetName2)) { return true; } else { this.close(); @@ -80,10 +81,9 @@ public Sheet getSheetAt(final int i) { return new BaseSheet(this, this.sheet.getName(), this.sheet.ensureDataLoaded()); } - private boolean openWithEncoding(final File pdfFile, final String encoding) { + private boolean openWithEncoding(final File pdfFile, final String encoding, final String sheetName) { try { final var reader = PDDocument.load(new FileInputStream(pdfFile)); - final var sheetName = Disk.removeExtension(pdfFile.getName()); this.sheet = new PdfSheet(sheetName, reader); return true; } catch (final IOException | UnsupportedCharsetException x) { diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/Document.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/Document.java index d60289d3..4112351a 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/Document.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/Document.java @@ -17,7 +17,7 @@ enum Hint { TagClassifier getTagClassifier(); - boolean open(final File file, final String encoding, final String password); + boolean open(final File file, final String encoding, final String password, final String sheetName); void close(); diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/DocumentFactory.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/DocumentFactory.java index e302de2f..8230b69a 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/DocumentFactory.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/DocumentFactory.java @@ -8,24 +8,24 @@ public class DocumentFactory { public static Document createInstance(final String filePath, final String encoding) { - return DocumentFactory.createInstance(new File(filePath), encoding, null); + return DocumentFactory.createInstance(new File(filePath), encoding, null, null); } public static Document createInstance(final String filePath, final String encoding, final String password) { - return DocumentFactory.createInstance(new File(filePath), encoding, password); + return DocumentFactory.createInstance(new File(filePath), encoding, password, null); } public static Document createInstance(final File file, final String encoding) { - return DocumentFactory.createInstance(file, encoding, null); + return DocumentFactory.createInstance(file, encoding, null, null); } - public static Document createInstance(final File file, final String encoding, final String password) { + public static Document createInstance(final File file, final String encoding, final String password, final String sheetName) { if (file == null) { throw new IllegalArgumentException(); } return DynamicPackages.GetDocumentFactories().stream() .map(DocumentClass::newInstance) - .filter(x -> x.open(file, encoding, password)) + .filter(x -> x.open(file, encoding, password, sheetName)) .findFirst() .orElseThrow(() -> new UnknownFormatConversionException(file.toString())); } From 3378ac2c52af5645f4b01946c0f334ee1e543d29 Mon Sep 17 00:00:00 2001 From: Romuald Rousseau Date: Sat, 21 Sep 2024 18:18:11 +0800 Subject: [PATCH 2/6] fix: Better latice PDF support --- .../any2json/loader/pdf/PdfSheet.java | 121 ++++++++++++------ 1 file changed, 81 insertions(+), 40 deletions(-) diff --git a/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfSheet.java b/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfSheet.java index 3c5f3f63..7b712326 100644 --- a/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfSheet.java +++ b/any2json-pdf/src/main/java/com/github/romualdrousseau/any2json/loader/pdf/PdfSheet.java @@ -24,7 +24,9 @@ class PdfSheet extends PatcheableSheetStore implements Closeable { private static final int BATCH_SIZE = 50000; private static final int MAX_COLUMNS = 100; - private static final double MARGIN = 6.0; + private static final int LATICE_SPACES = 3; // Number of spaces to be consider as a column separator + private static final int LATICE_MARGINS = 1; // Minimum margins to consider the begin of a row + private static final int LATICE_COLUMN_SEPARATORS = 4; // Number of column separators to consider it is a row private final String name; @@ -119,7 +121,8 @@ private DataFrame processRows(final PDDocument reader, final DataFrameWriter wri return writer.getDataFrame(); } - private void processRowsTabular(final SpreadsheetExtractionAlgorithm sea, final Page page, final DataFrameWriter writer) throws IOException { + private void processRowsTabular(final SpreadsheetExtractionAlgorithm sea, final Page page, + final DataFrameWriter writer) throws IOException { final var tables = sea.extract(page); for (final var table : tables) { final var rows = table.getRows(); @@ -135,7 +138,9 @@ private void processRowsTabular(final SpreadsheetExtractionAlgorithm sea, final } } - private void processRowsLatice(BasicExtractionAlgorithm bea, Page page, DataFrameWriter writer) throws IOException { + private void processRowsLatice(final BasicExtractionAlgorithm bea, final Page page, final DataFrameWriter writer) + throws IOException { + final var tableRows = new ArrayList(); final var tables = bea.extract(page); for (final var table : tables) { final var rows = table.getRows(); @@ -148,13 +153,13 @@ private void processRowsLatice(BasicExtractionAlgorithm bea, Page page, DataFram writer.write(Row.of("")); writer.write(Row.of("")); } - final var cells = new ArrayList(); - for (final var text : this.getCells(elements)) { - cells.add(StringUtils.cleanToken(text)); - } - writer.write(Row.of(this.getCells(elements))); + tableRows.add(this.getTableRow(elements)); isPreviousTableRow = true; } else { + if (tableRows.size() > 0) { + this.processTableLatice(tableRows, writer); + tableRows.clear(); + } if (isPreviousTableRow) { writer.write(Row.of("")); writer.write(Row.of("")); @@ -162,8 +167,51 @@ private void processRowsLatice(BasicExtractionAlgorithm bea, Page page, DataFram writer.write(Row.of(StringUtils.cleanToken(this.getText(elements)))); isPreviousTableRow = false; } + } else { + isPreviousTableRow = false; + } + } + } + if (tableRows.size() > 0) { + this.processTableLatice(tableRows, writer); + tableRows.clear(); + } + } + + private void processTableLatice(final ArrayList rows, final DataFrameWriter writer) throws IOException { + final var tabs = new ArrayList(); + final int maxLength = rows.stream().mapToInt(x -> x.length()).max().getAsInt(); + + var last_tab = -1; + for (int i = 0; i < maxLength; i++) { + final int tab = i; + final var allBlanks = rows.stream().allMatch(x -> tab >= x.length() || this.isLaticeSpace(x.charAt(tab))); + if (allBlanks) { + if (last_tab >= 0 && (tab - last_tab) == 1) { + tabs.remove(tabs.size() - 1); + } + tabs.add(tab); + last_tab = tab; + } + } + tabs.add(maxLength - 1); + + for (final var row : rows) { + final var cells = new ArrayList(); + for (int i = 0; i < tabs.size() - 1; i++) { + final var begin = tabs.get(i); + if (begin < row.length()) { + final var end = tabs.get(i + 1); + if (end < row.length() - 1) { + cells.add(StringUtils.cleanToken(row.substring(begin, end))); + } else { + cells.add(StringUtils.cleanToken(row.substring(begin))); + } + } else { + cells.add(""); } } + writer.write(Row.of(cells.toArray(new String[] {}))); } } @@ -172,7 +220,7 @@ private List getElements(final List row) final var elements = new ArrayList(); for (final var cell : row) { for (final var element : cell.getTextElements()) { - if (element instanceof TextElement){ + if (element instanceof TextElement) { elements.add((TextElement) element); } } @@ -180,43 +228,40 @@ private List getElements(final List row) return elements; } - private boolean isTableRow(List elements, boolean isPreviousTableRow) { - var margin = Math.floor(Math.max(elements.get(0).getX() / elements.get(0).getWidthOfSpace() - 4, 0) / 4); - var separators = 0.0; - // var symbols = 0.0; + private boolean isTableRow(final List elements, final boolean isPreviousTableRow) { + final var margins = (int) Math + .floor(Math.max(elements.get(0).getX() / elements.get(0).getWidthOfSpace(), 0) / LATICE_SPACES); + var separators = 0; var x = elements.get(0).getX(); - for (final TextElement element: elements) { - // if (element.getText().isBlank()) { - // symbols += 1.0; - // } - separators += Math.floor(Math.max((element.getX() - x) / element.getWidthOfSpace() - 4, 0) / 4); + for (final TextElement element : elements) { + final var spacing = Math.max((element.getX() - x) / element.getWidthOfSpace() - LATICE_SPACES, 0); + if (spacing > 0) { + separators++; + } x = element.getX(); } - // Very naive Naive Bayes - final var pRow = pRowMargin(margin) * pRowSeparators(separators); - final var pNotRow = pNotRowMargin(margin) * pNotRowSeparators(separators); - return (!isPreviousTableRow) ? pRow > pNotRow : pRow >= pNotRow; + final var pRow = 0.5 * pRowMargin(margins) + 0.5 * pRowSeparators(separators); + return (!isPreviousTableRow) ? pRow == 1.0 : pRow >= 0.5; // Give a bit of lax if we are in a table, i.e. the + // previous row was a table row } - private String[] getCells(List elements) { - var x = 0.0; + private String getTableRow(final List elements) { var text = ""; - for (final TextElement element: elements) { - final var spacing = Math.max((element.getX() - x) / element.getWidthOfSpace() - 4, 0); + for (final TextElement element : elements) { + final var spacing = Math.max(element.getX() / element.getWidthOfSpace() - 1, 0) - text.length(); for (int i = 0; i < spacing; i++) { text += " "; } text += element.getText(); - x = element.getX(); } - return text.split(" +"); + return text; } - private String getText(List elements) { + private String getText(final List elements) { var text = ""; - for (final TextElement element: elements) { + for (final TextElement element : elements) { text += element.getText(); } return text; @@ -236,19 +281,15 @@ private String getCellAt(final int colIndex, final int rowIndex) { return row.get(colIndex); } - private float pRowMargin(final double margin) { - return margin > MARGIN ? 1.0f : 0.0f; - } - - private float pNotRowMargin(final double margin) { - return 1.0f - pRowMargin(margin); + private float pRowMargin(final int margins) { + return margins >= LATICE_MARGINS ? 1.0f : 0.0f; } - private float pRowSeparators(final double separators) { - return separators > 0.0 ? 1.0f : 0.0f; + private float pRowSeparators(final int separators) { + return separators >= LATICE_COLUMN_SEPARATORS ? 1.0f : 0.0f; } - private float pNotRowSeparators(final double separators) { - return 1.0f - this.pRowSeparators(separators); + private boolean isLaticeSpace(final char c) { + return List.of(' ', '-', '_', '|').contains(c); } } From 4c59703a5bab27febed976fd38aacbd2c6dc4ef2 Mon Sep 17 00:00:00 2001 From: Romuald Rousseau Date: Sat, 21 Sep 2024 18:18:41 +0800 Subject: [PATCH 3/6] fix: Fix depedency vulnaribility --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f78b8bb7..86eccf86 100644 --- a/pom.xml +++ b/pom.xml @@ -133,7 +133,7 @@ com.google.protobuf protobuf-java - 3.25.4 + 3.25.5 org.apache.avro From 6abb1e80a88e6f86f5150abb8aa727c374148109 Mon Sep 17 00:00:00 2001 From: Romuald Rousseau Date: Sat, 21 Sep 2024 18:19:15 +0800 Subject: [PATCH 4/6] feat: Add coilumn value format --- .../any2json/base/BaseSheet.java | 36 +++++++++++++++++++ .../any2json/config/Settings.java | 1 + .../any2json/intelli/IntelliHeader.java | 6 ++-- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseSheet.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseSheet.java index 6fa069a0..865281f1 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseSheet.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseSheet.java @@ -45,6 +45,7 @@ public BaseSheet(final BaseDocument document, final String name, final Patcheabl this.pivotValueFormat = "%s " + Settings.PIVOT_VALUE_SUFFIX; this.pivotTypeFormat = "%s " + Settings.PIVOT_TYPE_SUFFIX; this.groupValueFormat = "%s " + Settings.GROUP_VALUE_SUFFIX; + this.columnValueFormat = "%s " + Settings.COLUMN_VALUE_SUFFIX; } @Override @@ -229,6 +230,32 @@ public void patchCell(final int colIndex1, final int rowIndex1, final int colInd this.unmergedAll); } + public void patchCells(final int colIndex1, final int rowIndex1, final int colIndex2, final int rowIndex2, + final List values) { + int colIndex = colIndex2; + for (final var value : values) { + if (value != null) { + this.patchCell(colIndex1, rowIndex1, colIndex, rowIndex2, value); + } + colIndex++; + } + } + + public List searchCell(final String regex, final int offset, final int length, final int nth) { + int n = 0; + for(int i = 0; i < length; i++) { + for (int j = 0; j < this.getLastColumnNum(offset + i); j++) { + final var cell = this.getCellDataAt(j, offset + i); + if (cell.matches(regex)) { + if (++n == nth) { + return List.of(j, offset + i); + } + } + } + } + return null; + } + public boolean notifyStepCompleted(final SheetEvent e) { for (final var listener : listeners) { listener.stepCompleted(e); @@ -382,6 +409,14 @@ public void setGroupValueFormat(final String format) { this.groupValueFormat = format; } + public String getColumnValueFormat() { + return this.columnValueFormat; + } + + public void setColumnValueFormat(final String format) { + this.columnValueFormat = format; + } + public void swapRows(int rowIndex1, int rowIndex2) { final var tmp = this.rowMask.get(rowIndex1); this.rowMask.set(rowIndex1, this.rowMask.get(rowIndex2)); @@ -430,5 +465,6 @@ private int computeLastColumnNum() { private String pivotValueFormat; private String pivotTypeFormat; private String groupValueFormat; + private String columnValueFormat; private List pivotEntityList; } diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/config/Settings.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/config/Settings.java index 01b5d4fa..ea69735c 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/config/Settings.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/config/Settings.java @@ -12,6 +12,7 @@ public class Settings { public final static String PIVOT_VALUE_SUFFIX = "#VALUE?"; public static final String PIVOT_TYPE_SUFFIX = "#TYPE?"; public final static String GROUP_VALUE_SUFFIX = "#GROUP?"; + public final static String COLUMN_VALUE_SUFFIX = "#COLUMN?"; public static final String MERGE_SEPARATOR = " "; } diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/intelli/IntelliHeader.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/intelli/IntelliHeader.java index c7261e7f..ce720868 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/intelli/IntelliHeader.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/intelli/IntelliHeader.java @@ -22,13 +22,13 @@ public IntelliHeader(final BaseHeader header, final boolean disableAutoName) { if (header.isColumnEmpty()) { this.name = ""; } else { - this.name = this.entities().stream().findAny().map(x -> this.getEntitiesAsString()) - .orElse(Settings.PIVOT_VALUE_SUFFIX); + this.name = String.format(this.getTable().getSheet().getColumnValueFormat(), + this.entities().stream().findAny().map(x -> this.getEntitiesAsString()).orElse("VALUE")); } } else if (this.isPivotHeader() || !disableAutoName) { this.name = this.getTable().getSheet().getDocument().getModel().toEntityName(cellValue); } else { - this.name= cellValue; + this.name = cellValue; } this.disableAutoName = disableAutoName; From 5494389f5ea0a03434fad691adc5754d905cbbb7 Mon Sep 17 00:00:00 2001 From: Romuald Rousseau Date: Sat, 21 Sep 2024 18:19:31 +0800 Subject: [PATCH 5/6] fix: Fix cache index --- .../com/github/romualdrousseau/any2json/base/BaseTable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseTable.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseTable.java index 73be8163..a6bd4fd7 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseTable.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/base/BaseTable.java @@ -70,7 +70,7 @@ public BaseRow getRowAt(final int rowIndex) { if (rowIndex < 0 || rowIndex >= this.getNumberOfRows()) { throw new ArrayIndexOutOfBoundsException(rowIndex); } - return this.cachedRows.computeIfAbsent(this.firstRowOffset + rowIndex, (x) -> { + return this.cachedRows.computeIfAbsent(this.firstRow + this.firstRowOffset + rowIndex, (x) -> { final var result = new BaseRow(this, x); // Retrieve ignore status possibly lost in cache removal for (final var i : this.ignoreRows()) { From 71fbb6157917156eeba87c69b1b8c7c92a33dd23 Mon Sep 17 00:00:00 2001 From: Romuald Rousseau Date: Sat, 21 Sep 2024 18:19:48 +0800 Subject: [PATCH 6/6] feat: Add new recipes --- .../any2json/TransformableSheet.java | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/any2json/src/main/java/com/github/romualdrousseau/any2json/TransformableSheet.java b/any2json/src/main/java/com/github/romualdrousseau/any2json/TransformableSheet.java index 9be8bf43..979c03da 100644 --- a/any2json/src/main/java/com/github/romualdrousseau/any2json/TransformableSheet.java +++ b/any2json/src/main/java/com/github/romualdrousseau/any2json/TransformableSheet.java @@ -350,6 +350,16 @@ public void setGroupValueFormat(final String format) { this.sheet.setGroupValueFormat(format); } + /** + * This method sets the name of a column header wihtout name for the sheet using the given + * format. + * + * @param format the format used as String#format(String, Object...)} + */ + public void setColumnValueFormat(final String format) { + this.sheet.setColumnValueFormat(format); + } + /** * This method crops the sheet by dropping all rows and columns on * the edges of the sheet. @@ -413,7 +423,7 @@ public void repeatRowCell(final int rowIndex) { } /** - * This method patches the cells of the given column and row indices with the + * This method patches the cell of the given column and row indices with the * given value. The style is copied from an existing cell. * * @param colIndex1 the column index to copy the style from @@ -427,6 +437,21 @@ public void patchCell(final int colIndex1, final int rowIndex1, final int colInd this.sheet.patchCell(colIndex1, rowIndex1, colIndex2, rowIndex2, value); } + /** + * This method patches the sequence of cells from the given column and row indices with the + * given values. The style is copied from an existing cell. + * + * @param colIndex1 the column index to copy the style from + * @param rowIndex1 the row index to copy the style from + * @param colIndex2 the column index to copy the style to + * @param rowIndex2 the row index to copy the style to + * @param values the values of the destination cells + */ + public void patchCells(final int colIndex1, final int rowIndex1, final int colIndex2, final int rowIndex2, + final List values) { + this.sheet.patchCells(colIndex1, rowIndex1, colIndex2, rowIndex2, values); + } + /** * This method drops the column specified by the given column index from the * sheet. @@ -578,5 +603,40 @@ public void dropRowsWhenEntropyLessThan(final float minEntropy, final int start, DropRowsWhenEntropyLessThan.Apply(this.sheet, minEntropy, start, stop); } + /** + * This method searches for the first occurence of a value that match a regex within a given row. + * + * @param regex the regex to search + * @param rowIndex the row indexc to search + */ + public List searchFirstValue(final String regex, final int rowIndex) { + return this.sheet.searchCell(regex, rowIndex, 1, 1); + } + + /** + * This method searches for the first occurence of a value that match a regex within a given region of rows. + * The region of rows begins at a given offset and has a given number of rows. + * + * @param regex the regex to search + * @param offset the starting offset of the region to search + * @param length the number of rows to search + */ + public List searchFirstValue(final String regex, final int offset, final int length) { + return this.sheet.searchCell(regex, offset, length, 1); + } + + /** + * This method searches for the nth occurence of a value that match a regex within a given region of rows. + * The region of rows begins at a given offset and has a given number of rows. + * + * @param regex the regex to search + * @param offset the starting offset of the region to search + * @param length the number of rows to search + * @param nth the nth occurence to match + */ + public List searchNthValue(final String regex, final int offset, final int length, final int nth) { + return this.sheet.searchCell(regex, offset, length, nth); + } + private final BaseSheet sheet; }