Skip to content

Commit

Permalink
Add CSVException that extends IOException thrown on invalid input
Browse files Browse the repository at this point in the history
instead of IOException
  • Loading branch information
garydgregory committed Sep 14, 2024
1 parent 761a337 commit 28441e6
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 33 deletions.
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
<body>
<release version="1.11.1" date="YYYY-MM-DD" description="Feature and bug fix release (Java 8 or above)">
<!-- ADD -->
<action type="add" dev="ggregory" due-to="Gary Gregory">Add CSVException that extends IOException thrown on invalid input instead of IOException.</action>
<!-- FIX -->
<action type="fix" dev="ggregory" due-to="Gary Gregory">Fix PMD issues for port to PMD 7.1.0.</action>
<action type="fix" dev="ggregory" due-to="Dávid Szigecsán, Gary Gregory">Fix some Javadoc links #442.</action>
Expand Down
44 changes: 44 additions & 0 deletions src/main/java/org/apache/commons/csv/CSVException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.csv;

import java.io.IOException;
import java.util.Formatter;
import java.util.IllegalFormatException;

/**
* Signals a CSV exception. For example, this exception is thrown when parsing invalid input.
*
* @since 1.12.0
*/
public class CSVException extends IOException {

private static final long serialVersionUID = 1L;

/**
* Constructs a new instance with a formatted message.
*
* @param format A {@link Formatter} format string.
* @param args See {@link String#format(String, Object...)}.
* @throws IllegalFormatException See {@link String#format(String, Object...)}.
*/
public CSVException(final String format, final Object... args) {
super(String.format(format, args));
}

}
1 change: 1 addition & 0 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -2032,6 +2032,7 @@ public boolean isQuoteCharacterSet() {
* @param reader the input stream
* @return a parser over a stream of {@link CSVRecord}s.
* @throws IOException If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public CSVParser parse(final Reader reader) throws IOException {
return new CSVParser(reader, this);
Expand Down
18 changes: 16 additions & 2 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,11 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
final class CSVRecordIterator implements Iterator<CSVRecord> {
private CSVRecord current;

/**
* Gets the next record.
*
* @return the next record.
*/
private CSVRecord getNextRecord() {
return Uncheck.get(CSVParser.this::nextRecord);
}
Expand Down Expand Up @@ -221,6 +226,7 @@ private static final class Headers {
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
Objects.requireNonNull(file, "file");
Expand All @@ -246,6 +252,7 @@ public static CSVParser parse(final File file, final Charset charset, final CSVF
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
* @since 1.5
*/
@SuppressWarnings("resource")
Expand All @@ -270,6 +277,7 @@ public static CSVParser parse(final InputStream inputStream, final Charset chars
* If the parameters of the format are inconsistent or if either file or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
* @since 1.5
*/
@SuppressWarnings("resource")
Expand All @@ -296,6 +304,7 @@ public static CSVParser parse(final Path path, final Charset charset, final CSVF
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
* @since 1.5
*/
public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
Expand All @@ -314,6 +323,7 @@ public static CSVParser parse(final Reader reader, final CSVFormat format) throw
* If the parameters of the format are inconsistent or if either string or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
Objects.requireNonNull(string, "string");
Expand Down Expand Up @@ -341,6 +351,7 @@ public static CSVParser parse(final String string, final CSVFormat format) throw
* If the parameters of the format are inconsistent or if either url, charset or format are null.
* @throws IOException
* If an I/O error occurs
* @throws CSVException Thrown on invalid input.
*/
@SuppressWarnings("resource")
public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
Expand Down Expand Up @@ -395,6 +406,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
* If the parameters of the format are inconsistent or if either reader or format are null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
*/
public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
this(reader, format, 0, 1);
Expand All @@ -420,6 +432,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
* If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
* @since 1.1
*/
@SuppressWarnings("resource")
Expand Down Expand Up @@ -465,6 +478,7 @@ private Map<String, Integer> createEmptyHeaderMap() {
*
* @return null if the format has no header.
* @throws IOException if there is a problem reading the header or skipping the first record
* @throws CSVException Thrown on invalid input.
*/
private Headers createHeaders() throws IOException {
Map<String, Integer> hdrMap = null;
Expand Down Expand Up @@ -746,8 +760,8 @@ public Iterator<CSVRecord> iterator() {
* Parses the next record from the current point in the stream.
*
* @return the record as an array of values, or {@code null} if the end of the stream has been reached
* @throws IOException
* on parse error or input read-failure
* @throws IOException on parse error or input read-failure
* @throws CSVException Thrown on invalid input.
*/
CSVRecord nextRecord() throws IOException {
CSVRecord result = null;
Expand Down
50 changes: 22 additions & 28 deletions src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,10 @@ private char mapNullToDisabled(final Character c) {
* A token corresponds to a term, a record change or an end-of-file indicator.
* </p>
*
* @param token
* an existing Token object to reuse. The caller is responsible for initializing the Token.
* @param token an existing Token object to reuse. The caller is responsible for initializing the Token.
* @return the next token found.
* @throws IOException on stream access error.
* @throws IOException on stream access error.
* @throws CSVException Thrown on invalid input.
*/
Token nextToken(final Token token) throws IOException {
// Get the last read char (required for empty line detection)
Expand Down Expand Up @@ -307,6 +307,7 @@ Token nextToken(final Token token) throws IOException {
* @throws IOException
* Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
* delimiter or EOL.
* @throws CSVException Thrown on invalid input.
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
token.isQuoted = true;
Expand Down Expand Up @@ -342,8 +343,8 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
token.content.append((char) c);
} else if (!Character.isWhitespace((char) c)) {
// error invalid char between token and next delimiter
throw new IOException(String.format("Invalid char between encapsulated token and delimiter at line: %,d, position: %,d",
getCurrentLineNumber(), getCharacterPosition()));
throw new CSVException("Invalid character between encapsulated token and delimiter at line: %,d, position: %,d",
getCurrentLineNumber(), getCharacterPosition());
}
}
}
Expand All @@ -356,8 +357,7 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
return token;
}
// error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber +
") EOF reached before encapsulated token finished");
throw new CSVException("(startline %,d) EOF reached before encapsulated token finished", startLineNumber);
} else {
// consume character
token.content.append((char) c);
Expand All @@ -368,22 +368,20 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
/**
* Parses a simple token.
* <p>
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
* delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
* Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped delimiters (as \, or \;). The token is finished
* when one of the following conditions becomes true:
* </p>
* <ul>
* <li>The end of line has been reached (EORECORD)</li>
* <li>The end of stream has been reached (EOF)</li>
* <li>An unescaped delimiter has been reached (TOKEN)</li>
* </ul>
*
* @param token
* the current token
* @param ch
* the current character
* @param token the current token
* @param ch the current character
* @return the filled token
* @throws IOException
* on stream access error
* @throws IOException on stream access error
* @throws CSVException Thrown on invalid input.
*/
private Token parseSimpleToken(final Token token, int ch) throws IOException {
// Faster to use while(true)+break than while(token.type == INVALID)
Expand Down Expand Up @@ -420,10 +418,9 @@ private Token parseSimpleToken(final Token token, int ch) throws IOException {
/**
* Appends the next escaped character to the token's content.
*
* @param token
* the current token
* @throws IOException
* on stream access error
* @param token the current token
* @throws IOException on stream access error
* @throws CSVException Thrown on invalid input.
*/
private void appendNextEscapedCharacterToToken(final Token token) throws IOException {
if (isEscapeDelimiter()) {
Expand Down Expand Up @@ -467,15 +464,12 @@ boolean readEndOfLine(int ch) throws IOException {

// TODO escape handling needs more work
/**
* Handle an escape sequence.
* The current character must be the escape character.
* On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
* on the input stream.
* Handle an escape sequence. The current character must be the escape character. On return, the next character is available by calling
* {@link ExtendedBufferedReader#getLastChar()} on the input stream.
*
* @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is
* invalid.
* @throws IOException if there is a problem reading the stream or the end of stream is detected:
* the escape character is not allowed at end of stream
* @return the unescaped character (as an int) or {@link IOUtils#EOF} if char following the escape is invalid.
* @throws IOException if there is a problem reading the stream or the end of stream is detected: the escape character is not allowed at end of stream
* @throws CSVException Thrown on invalid input.
*/
int readEscape() throws IOException {
// the escape char has just been read (normally a backslash)
Expand All @@ -498,7 +492,7 @@ int readEscape() throws IOException {
case Constants.BACKSPACE: // TODO is this correct?
return ch;
case EOF:
throw new IOException("EOF whilst processing escape sequence");
throw new CSVException("EOF while processing escape sequence");
default:
// Now check for meta-characters
if (isMetaChar(ch)) {
Expand Down
8 changes: 5 additions & 3 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -1555,10 +1556,11 @@ public void testThrowExceptionWithLineAndPosition() throws IOException {
.setSkipHeaderRecord(true)
.build();
// @formatter:on

try (CSVParser csvParser = csvFormat.parse(stringReader)) {
final Exception exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
assertTrue(exception.getMessage().contains("Invalid char between encapsulated token and delimiter at line: 2, position: 94"));
final UncheckedIOException exception = assertThrows(UncheckedIOException.class, csvParser::getRecords);
assertInstanceOf(CSVException.class, exception.getCause());
assertTrue(exception.getMessage().contains("Invalid character between encapsulated token and delimiter at line: 2, position: 94"),
exception::getMessage);
}
}

Expand Down

0 comments on commit 28441e6

Please sign in to comment.