diff --git a/src/main/java/io/deephaven/csv/reading/CsvReader.java b/src/main/java/io/deephaven/csv/reading/CsvReader.java index 618d4fe..9a9944a 100644 --- a/src/main/java/io/deephaven/csv/reading/CsvReader.java +++ b/src/main/java/io/deephaven/csv/reading/CsvReader.java @@ -56,8 +56,8 @@ private CsvReader() {} * the CsvReader determines what the column type is, it will use the {@link SinkFactory} to create an * appropriate Sink<T> for the type. Note that the CsvReader might guess wrong, so it might create a * Sink, partially populate it, and then abandon it. The final set of fully-populated Sinks will be returned - * in the CsvReader.Result. Thread safety: The {@link SinkFactory} may be invoked concurrently, therefore - * it must be thread safe. + * in the CsvReader.Result. Thread safety: The {@link SinkFactory} may be invoked concurrently, therefore it + * must be thread safe. * @return A CsvReader.Result containing the column names, the number of columns, and the final set of * fully-populated Sinks. */ @@ -99,8 +99,8 @@ private static Result delimitedReadLogic( private static Result commonReadLogic(final CsvSpecs specs, CellGrabber grabber, byte[][] optionalFirstDataRow, - int numInputCols, int numOutputCols, - String[] headersToUse, final SinkFactory sinkFactory) + int numInputCols, int numOutputCols, + String[] headersToUse, final SinkFactory sinkFactory) throws CsvReaderException { final String[][] nullValueLiteralsToUse = new String[numOutputCols][]; for (int ii = 0; ii < numOutputCols; ++ii) { diff --git a/src/main/java/io/deephaven/csv/reading/ReaderUtil.java b/src/main/java/io/deephaven/csv/reading/ReaderUtil.java index d3492b7..a11e165 100644 --- a/src/main/java/io/deephaven/csv/reading/ReaderUtil.java +++ b/src/main/java/io/deephaven/csv/reading/ReaderUtil.java @@ -31,6 +31,17 @@ public static void trimWhitespace(final ByteSlice cs) { cs.reset(data, begin, end); } + /** + * Get the expected length of a UTF-8 sequence, given its first byte, and its + * corresponding length in the specified units (UTF-16 or UTF-32). + * @param firstByte The first byte of the UTF-8 sequence. + * @param numBytes The number of remaining bytes in the input field (including firstByte). If the UTF-8 + * sequence specifies a number of bytes larger than the number of remaining bytes, an + * exception is thrown. + * @param useUtf32CountingConvention Whether 'charCountResult' should be in units of UTF-32 or UTF-16. + * @param charCountResult The number of UTF-32 or UTF-16 units specified by the UTF-8 character. + * @return The length of the UTF-8 sequence. + */ public static int getUtf8LengthAndCharLength( byte firstByte, int numBytes, boolean useUtf32CountingConvention, MutableInt charCountResult) { @@ -47,6 +58,7 @@ public static int getUtf8LengthAndCharLength( /** * Calculate the expected length of a UTF-8 sequence, given its first byte. + * * @param firstByte The first byte of the sequence. * @return The length of the sequence, in the range 1..4 inclusive. */ diff --git a/src/main/java/io/deephaven/csv/reading/cells/CellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/CellGrabber.java index 4e059ec..37f7838 100644 --- a/src/main/java/io/deephaven/csv/reading/cells/CellGrabber.java +++ b/src/main/java/io/deephaven/csv/reading/cells/CellGrabber.java @@ -23,9 +23,9 @@ void grabNext(final ByteSlice dest, final MutableBoolean lastInRow, final MutableBoolean endOfInput) throws CsvReaderException; /** - * Returns the "physical" row number, that is the row number of the input file. This differs from the "logical" - * row number, which is the row number of the CSV data being processed. The difference arises when, due to - * quotation marks, a single CSV row can span multiple lines of input. + * Returns the "physical" row number, that is the row number of the input file. This differs from the "logical" row + * number, which is the row number of the CSV data being processed. The difference arises when, due to quotation + * marks, a single CSV row can span multiple lines of input. */ int physicalRowNum(); } diff --git a/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java b/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java index 66e2bc7..b1b5a8f 100644 --- a/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java +++ b/src/main/java/io/deephaven/csv/reading/cells/DelimitedCellGrabber.java @@ -73,7 +73,7 @@ public DelimitedCellGrabber( @Override public void grabNext(final ByteSlice dest, final MutableBoolean lastInRow, - final MutableBoolean endOfInput) throws CsvReaderException { + final MutableBoolean endOfInput) throws CsvReaderException { spillBuffer.clear(); startOffset = offset; @@ -103,7 +103,7 @@ public void grabNext(final ByteSlice dest, final MutableBoolean lastInRow, * the row, otherwise the contents will be set to false. */ private void processQuotedMode(final ByteSlice dest, final MutableBoolean lastInRow, - final MutableBoolean endOfInput) throws CsvReaderException { + final MutableBoolean endOfInput) throws CsvReaderException { startOffset = offset; boolean prevCharWasCarriageReturn = false; while (true) { @@ -172,7 +172,7 @@ private void processQuotedMode(final ByteSlice dest, final MutableBoolean lastIn * Process characters in "unquoted mode". This is easy: eat characters until the next field or line delimiter. */ private void processUnquotedMode(final ByteSlice dest, final MutableBoolean lastInRow, - final MutableBoolean endOfInput) throws CsvReaderException { + final MutableBoolean endOfInput) throws CsvReaderException { startOffset = offset; finishField(dest, lastInRow, endOfInput); } @@ -200,7 +200,7 @@ private void skipWhitespace() throws CsvReaderException { * Otherwise, its contents are set to false. */ private void finishField(final ByteSlice dest, final MutableBoolean lastInRow, - final MutableBoolean endOfInput) + final MutableBoolean endOfInput) throws CsvReaderException { while (true) { if (!tryEnsureMore()) { diff --git a/src/main/java/io/deephaven/csv/reading/headers/DelimitedHeaderFinder.java b/src/main/java/io/deephaven/csv/reading/headers/DelimitedHeaderFinder.java index 50afce4..850f947 100644 --- a/src/main/java/io/deephaven/csv/reading/headers/DelimitedHeaderFinder.java +++ b/src/main/java/io/deephaven/csv/reading/headers/DelimitedHeaderFinder.java @@ -18,7 +18,7 @@ public class DelimitedHeaderFinder { * overrides. */ public static String[] determineHeadersToUse(final CsvSpecs specs, - final CellGrabber grabber, final MutableObject firstDataRowHolder) + final CellGrabber grabber, final MutableObject firstDataRowHolder) throws CsvReaderException { String[] headersToUse = null; if (specs.hasHeaderRow()) {