Skip to content

Commit

Permalink
make int parsing more tolerant of stray whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
pjfanning committed Sep 12, 2023
1 parent 1270672 commit 0802856
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.apache.poi.xssf.usermodel.XSSFName;
import org.apache.poi.xssf.usermodel.XSSFTable;

import static com.github.pjfanning.xlsx.impl.NumberUtil.parseInt;

/**
* Copied from POI BaseXSSFEvaluationWorkbook but a lot of stuff is removed because it is not easy
* or impossible to support in excel-streaming-reader
Expand Down Expand Up @@ -78,7 +80,7 @@ private int resolveBookIndex(String bookName) {

// Is it already in numeric form?
try {
return Integer.parseInt(bookName);
return parseInt(bookName);
} catch (NumberFormatException e) {}

// Not properly referenced
Expand Down
12 changes: 12 additions & 0 deletions src/main/java/com/github/pjfanning/xlsx/impl/NumberUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.github.pjfanning.xlsx.impl;

final class NumberUtil {

static int parseInt(final String s) {
return Integer.parseInt(s.trim());
}

static double parseDouble(final String s) {
return Double.parseDouble(s.trim());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import java.util.Calendar;
import java.util.Date;

import static com.github.pjfanning.xlsx.impl.NumberUtil.parseDouble;

public class StreamingCell implements Cell {

private static final Supplier NULL_SUPPLIER = () -> null;
Expand Down Expand Up @@ -197,7 +199,7 @@ public double getNumericCellValue() {
}
}
}
return rawContents == null ? 0.0 : Double.parseDouble(rawContents);
return rawContents == null ? 0.0 : parseDouble(rawContents);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
import java.time.LocalDateTime;
import java.util.*;

import static com.github.pjfanning.xlsx.impl.NumberUtil.parseDouble;
import static com.github.pjfanning.xlsx.impl.NumberUtil.parseInt;

class StreamingRowIterator implements CloseableIterator<Row> {
private static final Logger LOG = LoggerFactory.getLogger(StreamingRowIterator.class);
private static final QName QNAME_HIDDEN = QName.valueOf("hidden");
Expand Down Expand Up @@ -144,7 +147,7 @@ && isSpreadsheetTag(event.asStartElement().getName())) {
Attribute rowNumAttr = startElement.getAttributeByName(QNAME_R);
int rowIndex = currentRowNum;
if (rowNumAttr != null) {
rowIndex = Integer.parseInt(rowNumAttr.getValue()) - 1;
rowIndex = parseInt(rowNumAttr.getValue()) - 1;
currentRowNum = rowIndex;
}
Attribute isHiddenAttr = startElement.getAttributeByName(QNAME_HIDDEN);
Expand All @@ -166,7 +169,7 @@ && isSpreadsheetTag(event.asStartElement().getName())) {
if (styleAttr != null) {
String indexStr = styleAttr.getValue();
try {
int index = Integer.parseInt(indexStr);
final int index = parseInt(indexStr);
currentRow.setRowStyle(stylesTable.getStyleAt(index));
} catch (NumberFormatException nfe) {
LOG.warn("Ignoring invalid row style index {}", indexStr);
Expand All @@ -189,8 +192,8 @@ && isSpreadsheetTag(event.asStartElement().getName())) {
if (isHidden || width >= 0) {
Attribute minAttr = startElement.getAttributeByName(QNAME_MIN);
Attribute maxAttr = startElement.getAttributeByName(QNAME_MAX);
int min = Integer.parseInt(minAttr.getValue()) - 1;
int max = Integer.parseInt(maxAttr.getValue()) - 1;
int min = parseInt(minAttr.getValue()) - 1;
int max = parseInt(maxAttr.getValue()) - 1;
for (int columnIndex = min; columnIndex <= max; columnIndex++) {
if (isHidden) hiddenColumns.add(columnIndex);
if (width >= 0) columnWidths.put(columnIndex, width);
Expand Down Expand Up @@ -226,7 +229,7 @@ && isSpreadsheetTag(event.asStartElement().getName())) {
if (style != null) {
String indexStr = style.getValue();
try {
int index = Integer.parseInt(indexStr);
final int index = Integer.parseInt(indexStr);
currentCell.setCellStyle(stylesTable.getStyleAt(index));
} catch (NumberFormatException nfe) {
LOG.warn("Ignoring invalid style index {}", indexStr);
Expand All @@ -249,7 +252,7 @@ && isSpreadsheetTag(event.asStartElement().getName())) {
for (int i = ref.length() - 1; i >= 0; i--) {
if (!Character.isDigit(ref.charAt(i))) {
try {
streamingSheetReader.setLastRowNum(Integer.parseInt(ref.substring(i + 1)) - 1);
streamingSheetReader.setLastRowNum(parseInt(ref.substring(i + 1)) - 1);
} catch (NumberFormatException ignore) {
}
break;
Expand Down Expand Up @@ -467,7 +470,7 @@ private void parseSheetFormatPr(final StartElement startElement) {
final Attribute baseColWidthAtt = startElement.getAttributeByName(QName.valueOf("baseColWidth"));
if (baseColWidthAtt != null) {
try {
streamingSheetReader.setBaseColWidth(Integer.parseInt(baseColWidthAtt.getValue()));
streamingSheetReader.setBaseColWidth(parseInt(baseColWidthAtt.getValue()));
} catch (Exception e) {
LOG.warn("unable to parse baseColWidth {}", baseColWidthAtt.getValue());
}
Expand Down Expand Up @@ -504,7 +507,7 @@ private Supplier getFormatterForType(String type) {
switch(type) {
case "s": //string stored in shared table
if (!lastContents.isEmpty()) {
int idx = Integer.parseInt(lastContents);
final int idx = parseInt(lastContents);
if (!getBuilder().fullFormatRichText() && sst instanceof SharedStringsTableBase) {
return new LazySupplier<>(() -> ((SharedStringsTableBase)sst).getString(idx));
}
Expand All @@ -525,7 +528,7 @@ private Supplier getFormatterForType(String type) {
final String currentNumericFormat = currentCell.getNumericFormat();

return new LazySupplier<>(() -> dataFormatter.formatRawCellContents(
Double.parseDouble(lastContents),
parseDouble(lastContents),
currentNumericFormatIndex,
currentNumericFormat));

Expand Down Expand Up @@ -580,7 +583,7 @@ private String unformattedContents(Supplier formattedContentSupplier) {
return formattedContent.toString();
}
if (!lastContents.isEmpty()) {
int idx = Integer.parseInt(lastContents);
final int idx = parseInt(lastContents);
if (sst == null) throw new NullPointerException("sst is null");
if (sst instanceof SharedStringsTableBase) {
return ((SharedStringsTableBase)sst).getString(idx);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
import java.io.IOException;
import java.util.*;

import static com.github.pjfanning.xlsx.impl.NumberUtil.parseInt;

public class StreamingSheetReader implements Iterable<Row> {
private static final Logger LOG = LoggerFactory.getLogger(StreamingSheetReader.class);

Expand Down Expand Up @@ -200,7 +202,7 @@ void setFormatString(StartElement startElement, StreamingCell cell) {

if (stylesTable != null) {
if(cellStyleString != null) {
style = stylesTable.getStyleAt(Integer.parseInt(cellStyleString));
style = stylesTable.getStyleAt(parseInt(cellStyleString));
} else if(stylesTable.getNumCellStyles() > 0) {
style = stylesTable.getStyleAt(0);
}
Expand Down
12 changes: 12 additions & 0 deletions src/test/java/com/github/pjfanning/xlsx/StreamingWorkbookTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,18 @@ public void testSheetReadWrongOrder() throws Exception {
}
}

@Test
public void testExtraWhitespace() throws Exception {
try (Workbook workbook = openWorkbook("extra_whitespace.xlsx")) {
Sheet sheet = workbook.getSheetAt(0);
for(Row row : sheet) {
for(Cell cell : row) {

}
}
}
}

private void validateFormatsSheet(Sheet sheet) throws IOException {
Iterator<Row> rowIterator = sheet.rowIterator();

Expand Down
Binary file added src/test/resources/extra_whitespace.xlsx
Binary file not shown.

0 comments on commit 0802856

Please sign in to comment.