diff --git a/pom.xml b/pom.xml index 268a7b8..e874628 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 edu.pitt.dbmi data-reader - 0.2.2-SNAPSHOT + 0.2.3-SNAPSHOT jar diff --git a/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java b/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java index 2fb3ed3..a96bd7c 100644 --- a/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java +++ b/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java @@ -22,10 +22,13 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; +import java.nio.channels.ClosedByInterruptException; import java.nio.channels.FileChannel; import java.util.Collections; import java.util.LinkedList; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -35,6 +38,8 @@ */ public class BasicDataPreviewer extends AbstractDataPreviewer implements DataPreviewer { + private static final Logger LOGGER = LoggerFactory.getLogger(BasicDataPreviewer.class); + public BasicDataPreviewer(File dataFile) { super(dataFile); } @@ -49,6 +54,16 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t } List linePreviews = new LinkedList<>(); + try { + getPreviews(fromLine, toLine, numOfCharacters, linePreviews); + } catch (ClosedByInterruptException exception) { + LOGGER.error("", exception); + } + + return linePreviews; + } + + protected void getPreviews(int fromLine, int toLine, int numOfCharacters, List list) throws IOException { try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) { long fileSize = fc.size(); long position = 0; @@ -63,7 +78,7 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !isDone) { + while (buffer.hasRemaining() && !isDone && !Thread.currentThread().isInterrupted()) { byte currentChar = buffer.get(); if (skipLine) { if (currentChar == CARRIAGE_RETURN || currentChar == LINE_FEED) { @@ -71,7 +86,7 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t if (charCount > 0) { charCount = 0; - linePreviews.add(lineBuilder.toString()); + list.add(lineBuilder.toString()); lineBuilder.delete(0, lineBuilder.length()); } @@ -88,7 +103,7 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t if (currentChar == CARRIAGE_RETURN || currentChar == LINE_FEED) { if (charCount > 0) { charCount = 0; - linePreviews.add(lineBuilder.toString()); + list.add(lineBuilder.toString()); lineBuilder.delete(0, lineBuilder.length()); } @@ -114,11 +129,9 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); } - - return linePreviews; } } diff --git a/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java index 8ebb97d..6dbc9db 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; +import java.nio.channels.ClosedByInterruptException; import java.nio.channels.FileChannel; /** @@ -36,7 +37,8 @@ public abstract class AbstractDataFileReader { protected static final byte LINE_FEED = '\n'; protected static final byte CARRIAGE_RETURN = '\r'; - protected static final byte SPACE_CHAR = ' '; + protected static final byte SPACE_CHAR = Delimiter.SPACE.getDelimiterChar(); + protected static final String EMPTY_STRING = ""; protected byte quoteCharacter; protected String missingValueMarker; @@ -52,9 +54,9 @@ public AbstractDataFileReader(File dataFile, Delimiter delimiter) { this.dataFile = dataFile; this.delimiter = delimiter; + this.missingValueMarker = EMPTY_STRING; + this.commentMarker = EMPTY_STRING; this.quoteCharacter = -1; - this.commentMarker = ""; - this.numberOfLines = -1; this.numberOfColumns = -1; } @@ -71,41 +73,52 @@ private int countNumberOfColumns() throws IOException { byte[] prefix = commentMarker.getBytes(); int index = 0; - boolean reqCheck = prefix.length > 0; + boolean reqCmntCheck = prefix.length > 0; boolean skipLine = false; - boolean finished = false; boolean hasQuoteChar = false; - byte prevNonBlankChar = SPACE_CHAR; + boolean finished = false; byte prevChar = -1; + byte prevNonBlankChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !finished) { + while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); - if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { + if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) { + prevChar = currChar; + continue; + } + + finished = !skipLine; + if (finished) { + count++; + } + skipLine = false; - finished = prevNonBlankChar > SPACE_CHAR; + reqCmntCheck = prefix.length > 0; + index = 0; + prevNonBlankChar = -1; } else if (!skipLine) { + // save any non-blank char encountered if (currChar > SPACE_CHAR) { prevNonBlankChar = currChar; } - if (reqCheck && prevNonBlankChar > SPACE_CHAR) { + // skip any blank chars at the begining of the line + if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) { + continue; + } + + if (reqCmntCheck) { if (currChar == prefix[index]) { index++; - - // all the comment chars are matched if (index == prefix.length) { - index = 0; skipLine = true; - count = 0; - prevNonBlankChar = SPACE_CHAR; - prevChar = currChar; continue; } } else { - reqCheck = false; + reqCmntCheck = false; } } @@ -114,17 +127,13 @@ private int countNumberOfColumns() throws IOException { } else if (!hasQuoteChar) { switch (delimiter) { case WHITESPACE: - if (currChar > SPACE_CHAR && prevChar <= SPACE_CHAR) { - if (!hasQuoteChar) { - count++; - } + if (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR) { + count++; } break; default: if (currChar == delimChar) { - if (!hasQuoteChar) { - count++; - } + count++; } } } @@ -137,12 +146,10 @@ private int countNumberOfColumns() throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !finished); + } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted()); - if (delimiter != Delimiter.WHITESPACE) { - if (prevNonBlankChar > SPACE_CHAR) { - count++; - } + if (!finished) { + count++; } } @@ -159,52 +166,63 @@ private int countNumberOfLines() throws IOException { byte[] prefix = commentMarker.getBytes(); int index = 0; - boolean reqCheck = prefix.length > 0; + boolean reqCmntCheck = prefix.length > 0; boolean skipLine = false; + boolean moveToEOL = false; + byte prevChar = -1; + byte prevNonBlankChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); - if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { - skipLine = false; - if (index > 0) { - index = 0; + if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) { + continue; + } + + if (!skipLine) { count++; } - } else if (!skipLine) { - if (currChar <= SPACE_CHAR && index == 0) { + index = 0; + moveToEOL = false; + skipLine = false; + prevNonBlankChar = -1; + } else if (!moveToEOL) { + // save any non-blank char encountered + if (currChar > SPACE_CHAR) { + prevNonBlankChar = currChar; + } + + // skip any blank chars at the begining of the line + if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) { continue; } - if (reqCheck) { + if (reqCmntCheck) { if (currChar == prefix[index]) { index++; if (index == prefix.length) { - index = 0; + moveToEOL = true; skipLine = true; } } else { - index = 0; - skipLine = true; - count++; + moveToEOL = true; } } else { - skipLine = true; - count++; + moveToEOL = true; } } + + prevChar = currChar; } position += size; if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while ((position < fileSize) && !Thread.currentThread().isInterrupted()); - // case where no newline at end of file - if (index > 0) { - index = 0; + if (!(prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && !skipLine) { count++; } } @@ -225,8 +243,8 @@ public String getMissingValueMarker() { } public void setMissingValueMarker(String missingValueMarker) { - this.missingValueMarker = (missingValueMarker) == null - ? missingValueMarker + this.missingValueMarker = (missingValueMarker == null) + ? EMPTY_STRING : missingValueMarker.trim(); } @@ -235,14 +253,18 @@ public String getCommentMarker() { } public void setCommentMarker(String commentMarker) { - if (commentMarker != null) { - this.commentMarker = commentMarker.trim(); - } + this.commentMarker = (commentMarker == null) + ? EMPTY_STRING + : commentMarker.trim(); } public int getNumberOfLines() throws IOException { if (numberOfLines == -1) { - numberOfLines = countNumberOfLines(); + try { + numberOfLines = countNumberOfLines(); + } catch (ClosedByInterruptException exception) { + numberOfLines = -1; + } } return numberOfLines; @@ -250,7 +272,11 @@ public int getNumberOfLines() throws IOException { public int getNumberOfColumns() throws IOException { if (numberOfColumns == -1) { - numberOfColumns = countNumberOfColumns(); + try { + numberOfColumns = countNumberOfColumns(); + } catch (ClosedByInterruptException exception) { + numberOfColumns = -1; + } } return numberOfColumns; diff --git a/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java b/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java index 7e3f3e9..14dd8b4 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; +import java.nio.channels.ClosedByInterruptException; import java.nio.channels.FileChannel; import java.util.LinkedList; import java.util.List; @@ -49,11 +50,19 @@ public LowerCovarianceDataReader(File dataFile, Delimiter delimiter) { @Override public Dataset readInData() throws IOException { - int numberOfCases = getNumberOfCases(); - List variables = extractVariables(); - double[][] data = extractCovarianceData(variables.size()); + Dataset dataset; + try { + int numberOfCases = getNumberOfCases(); + List variables = extractVariables(); + double[][] data = extractCovarianceData(variables.size()); + + dataset = new CovarianceDataset(numberOfCases, variables, data); + } catch (ClosedByInterruptException exception) { + dataset = null; + LOGGER.error("", exception); + } - return new CovarianceDataset(numberOfCases, variables, data); + return dataset; } private double[][] extractCovarianceData(int matrixSize) throws IOException { @@ -84,7 +93,7 @@ private double[][] extractCovarianceData(int matrixSize) throws IOException { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); if (skipToData) { - while (buffer.hasRemaining() && skipToData) { + while (buffer.hasRemaining() && skipToData && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -123,7 +132,7 @@ private double[][] extractCovarianceData(int matrixSize) throws IOException { } } - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -251,7 +260,7 @@ private double[][] extractCovarianceData(int matrixSize) throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case where no newline at end of file if (colNum > 0 || dataBuilder.length() > 0) { @@ -316,7 +325,7 @@ private List extractVariables() throws IOException { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); if (skipCaseNum) { - while (buffer.hasRemaining() && skipCaseNum) { + while (buffer.hasRemaining() && skipCaseNum && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -354,7 +363,7 @@ private List extractVariables() throws IOException { } } - while (buffer.hasRemaining() && !doneExtractVars) { + while (buffer.hasRemaining() && !doneExtractVars && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -433,7 +442,7 @@ private List extractVariables() throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !doneExtractVars); + } while (position < fileSize && !doneExtractVars && !Thread.currentThread().isInterrupted()); // data at the end of line if (colNum > 0 || dataBuilder.length() > 0) { @@ -473,7 +482,7 @@ public int getNumberOfCases() throws IOException { byte prevChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !finished) { + while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -522,7 +531,7 @@ public int getNumberOfCases() throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while ((position < fileSize) && !finished); + } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted()); if (dataBuilder.length() > 0) { String value = dataBuilder.toString().trim(); diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java index b6e2fd4..f3952c9 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java @@ -59,68 +59,92 @@ protected int[] getColumnNumbers(Set variables) throws IOException { byte[] prefix = commentMarker.getBytes(); int index = 0; int colNum = 0; - boolean reqCheck = prefix.length > 0; + boolean reqCmntCheck = prefix.length > 0; boolean skipLine = false; - boolean taskFinished = false; boolean hasQuoteChar = false; - byte prevNonBlankChar = SPACE_CHAR; + boolean finished = false; byte prevChar = -1; + byte prevNonBlankChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !taskFinished) { + while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { + if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) { + prevChar = currChar; + continue; + } + + if (skipLine) { + dataBuilder.delete(0, dataBuilder.length()); + } else { + finished = true; + + colNum++; + String value = dataBuilder.toString().trim(); + dataBuilder.delete(0, dataBuilder.length()); + + if (variables.contains(value)) { + indexList.add(colNum); + } + } + skipLine = false; - taskFinished = prevNonBlankChar > SPACE_CHAR; + reqCmntCheck = prefix.length > 0; + index = 0; + prevNonBlankChar = -1; + hasQuoteChar = false; } else if (!skipLine) { + // save any non-blank char encountered if (currChar > SPACE_CHAR) { prevNonBlankChar = currChar; } - if (reqCheck && prevNonBlankChar > SPACE_CHAR) { + // skip any blank chars at the begining of the line + if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) { + continue; + } + + if (reqCmntCheck) { if (currChar == prefix[index]) { index++; if (index == prefix.length) { - index = 0; skipLine = true; - colNum = 0; - prevNonBlankChar = SPACE_CHAR; - dataBuilder.delete(0, dataBuilder.length()); - prevChar = currChar; continue; } } else { - index = 0; - reqCheck = false; + reqCmntCheck = false; } } if (currChar == quoteCharacter) { hasQuoteChar = !hasQuoteChar; - } else if (hasQuoteChar) { - dataBuilder.append((char) currChar); } else { - boolean isDelimiter; - switch (delimiter) { - case WHITESPACE: - isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR); - break; - default: - isDelimiter = (currChar == delimChar); - } + if (hasQuoteChar) { + dataBuilder.append((char) currChar); + } else { + boolean isDelimiter; + switch (delimiter) { + case WHITESPACE: + isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR); + break; + default: + isDelimiter = (currChar == delimChar); + } - if (isDelimiter) { - colNum++; - String value = dataBuilder.toString().trim(); - dataBuilder.delete(0, dataBuilder.length()); + if (isDelimiter) { + colNum++; + String value = dataBuilder.toString().trim(); + dataBuilder.delete(0, dataBuilder.length()); - if (variables.contains(value)) { - indexList.add(colNum); + if (variables.contains(value)) { + indexList.add(colNum); + } + } else { + dataBuilder.append((char) currChar); } - } else { - dataBuilder.append((char) currChar); } } } @@ -132,10 +156,9 @@ protected int[] getColumnNumbers(Set variables) throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !taskFinished); + } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted()); - // data at the end of line - if (colNum > 0 || dataBuilder.length() > 0) { + if (!finished) { colNum++; String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); @@ -147,7 +170,7 @@ protected int[] getColumnNumbers(Set variables) throws IOException { } int[] indices = new int[indexList.size()]; - if (indices.length > 0) { + if (!indexList.isEmpty()) { int i = 0; for (Integer index : indexList) { indices[i++] = index; @@ -161,6 +184,10 @@ protected int[] filterValidColumnNumbers(int[] columnNumbers) throws IOException Set indices = new TreeSet<>(); int numOfVars = getNumberOfColumns(); for (int colNum : columnNumbers) { + if (Thread.currentThread().isInterrupted()) { + break; + } + if (colNum > 0 && colNum <= numOfVars) { indices.add(colNum); } @@ -187,7 +214,7 @@ protected List generateVariables(int[] excludedColumns) throws IOExcepti int numOfCols = getNumberOfColumns(); int length = excludedColumns.length; int excludedIndex = 0; - for (int colNum = 1; colNum <= numOfCols; colNum++) { + for (int colNum = 1; colNum <= numOfCols && !Thread.currentThread().isInterrupted(); colNum++) { if (length > 0 && (excludedIndex < length && colNum == excludedColumns[excludedIndex])) { excludedIndex++; } else { diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java index 528b9d2..8eb463b 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java @@ -83,7 +83,7 @@ protected double[][] extractData(List variables, int[] excludedColumns) // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -121,7 +121,7 @@ protected double[][] extractData(List variables, int[] excludedColumns) } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -250,7 +250,7 @@ protected double[][] extractData(List variables, int[] excludedColumns) if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when there is no newline at end of file if (colNum > 0 || dataBuilder.length() > 0) { @@ -302,90 +302,113 @@ protected List extractVariables(int[] excludedColumns) throws IOExceptio StringBuilder dataBuilder = new StringBuilder(); byte[] prefix = commentMarker.getBytes(); - int index = 0; + int prefixIndex = 0; + int excludedIndex = 0; + int numOfExCols = excludedColumns.length; int colNum = 0; int lineNum = 1; - int numOfExCols = excludedColumns.length; - int excludedIndex = 0; - boolean reqCheck = prefix.length > 0; + boolean reqCmntCheck = prefix.length > 0; boolean skipLine = false; - boolean finished = false; boolean hasQuoteChar = false; - byte prevNonBlankChar = SPACE_CHAR; + boolean finished = false; byte prevChar = -1; + byte prevNonBlankChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !finished) { + while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { - skipLine = false; - if (prevNonBlankChar > SPACE_CHAR) { - finished = true; + if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) { + prevChar = currChar; + continue; + } + + if (skipLine) { + dataBuilder.delete(0, dataBuilder.length()); } else { - lineNum++; - if (currChar == LINE_FEED && prevChar == CARRIAGE_RETURN) { - lineNum--; + finished = true; + + colNum++; + String value = dataBuilder.toString().trim(); + dataBuilder.delete(0, dataBuilder.length()); + + if (numOfExCols == 0 || excludedIndex >= numOfExCols || colNum != excludedColumns[excludedIndex]) { + if (value.length() > 0) { + variables.add(value); + } else { + String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); + LOGGER.error(errMsg); + throw new DataReaderException(errMsg); + } } } + + lineNum++; + skipLine = false; + reqCmntCheck = prefix.length > 0; + prefixIndex = 0; + prevNonBlankChar = -1; + hasQuoteChar = false; } else if (!skipLine) { + // save any non-blank char encountered if (currChar > SPACE_CHAR) { prevNonBlankChar = currChar; } - if (reqCheck && prevNonBlankChar > SPACE_CHAR) { - if (currChar == prefix[index]) { - index++; + // skip any blank chars at the begining of the line + if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) { + prevChar = currChar; + continue; + } - // all the comment chars are matched - if (index == prefix.length) { - index = 0; + if (reqCmntCheck) { + if (currChar == prefix[prefixIndex]) { + prefixIndex++; + if (prefixIndex == prefix.length) { skipLine = true; - colNum = 0; - prevNonBlankChar = SPACE_CHAR; - dataBuilder.delete(0, dataBuilder.length()); - prevChar = currChar; continue; } } else { - index = 0; - reqCheck = false; + reqCmntCheck = false; } } if (currChar == quoteCharacter) { hasQuoteChar = !hasQuoteChar; - } else if (hasQuoteChar) { - dataBuilder.append((char) currChar); } else { - boolean isDelimiter; - switch (delimiter) { - case WHITESPACE: - isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR); - break; - default: - isDelimiter = (currChar == delimChar); - } + if (hasQuoteChar) { + dataBuilder.append((char) currChar); + } else { + boolean isDelimiter; + switch (delimiter) { + case WHITESPACE: + isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR); + break; + default: + isDelimiter = (currChar == delimChar); + } - if (isDelimiter) { - colNum++; - String value = dataBuilder.toString().trim(); - dataBuilder.delete(0, dataBuilder.length()); + if (isDelimiter) { + colNum++; + String value = dataBuilder.toString().trim(); + dataBuilder.delete(0, dataBuilder.length()); - if (numOfExCols > 0 && (excludedIndex < numOfExCols && colNum == excludedColumns[excludedIndex])) { - excludedIndex++; - } else { - if (value.length() > 0) { - variables.add(value); + if (numOfExCols > 0 && (excludedIndex < numOfExCols && colNum == excludedColumns[excludedIndex])) { + excludedIndex++; } else { - String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); - LOGGER.error(errMsg); - throw new DataReaderException(errMsg); + if (value.length() > 0) { + variables.add(value); + } else { + String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); + LOGGER.error(errMsg); + throw new DataReaderException(errMsg); + } } + } else { + dataBuilder.append((char) currChar); } - } else { - dataBuilder.append((char) currChar); } } } @@ -397,29 +420,20 @@ protected List extractVariables(int[] excludedColumns) throws IOExceptio if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !finished); + } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted()); - // data at the end of line - if (colNum > 0 || dataBuilder.length() > 0) { + if (!finished) { colNum++; String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); if (numOfExCols == 0 || excludedIndex >= numOfExCols || colNum != excludedColumns[excludedIndex]) { - switch (delimiter) { - case WHITESPACE: - if (value.length() > 0) { - variables.add(value); - } - break; - default: - if (value.length() > 0) { - variables.add(value); - } else { - String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); - LOGGER.error(errMsg); - throw new DataReaderException(errMsg); - } + if (value.length() > 0) { + variables.add(value); + } else { + String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); + LOGGER.error(errMsg); + throw new DataReaderException(errMsg); } } } diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java index 751424c..0722999 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java @@ -78,7 +78,7 @@ protected int[][] extractAndEncodeData(DiscreteVarInfo[] varInfos, int[] exclude // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -115,7 +115,7 @@ protected int[][] extractAndEncodeData(DiscreteVarInfo[] varInfos, int[] exclude } } - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -237,7 +237,7 @@ protected int[][] extractAndEncodeData(DiscreteVarInfo[] varInfos, int[] exclude if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline char at the end of the file if (colNum > 0 || dataBuilder.length() > 0) { @@ -302,7 +302,7 @@ protected DiscreteVarInfo[] extractVariableData(DiscreteVarInfo[] varInfos, int[ // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -340,7 +340,7 @@ protected DiscreteVarInfo[] extractVariableData(DiscreteVarInfo[] varInfos, int[ } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -453,7 +453,7 @@ protected DiscreteVarInfo[] extractVariableData(DiscreteVarInfo[] varInfos, int[ if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline at end of file if (colNum > 0 || dataBuilder.length() > 0) { @@ -513,7 +513,7 @@ protected DiscreteVarInfo[] extractVariables(int[] excludedColumns) throws IOExc byte prevChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !taskFinished) { + while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -595,7 +595,7 @@ protected DiscreteVarInfo[] extractVariables(int[] excludedColumns) throws IOExc if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !taskFinished); + } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted()); // data at the end of line if (colNum > 0 || dataBuilder.length() > 0) { @@ -639,7 +639,7 @@ protected DiscreteVarInfo[] generateDiscreteVariables(int[] excludedColumns) thr int exColIndex = 0; int varInfoIndex = 0; DiscreteVarInfo[] varInfos = new DiscreteVarInfo[size]; - for (int colNum = 1; colNum <= numOfCols; colNum++) { + for (int colNum = 1; colNum <= numOfCols && !Thread.currentThread().isInterrupted(); colNum++) { if (numOfExCols > 0 && (exColIndex < numOfExCols && colNum == excludedColumns[exColIndex])) { exColIndex++; } else { diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java index 1cbba60..7447da1 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java @@ -22,8 +22,11 @@ import edu.pitt.dbmi.data.Delimiter; import java.io.File; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; import java.util.Collections; import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -33,6 +36,8 @@ */ public abstract class AbstractTabularDataFileReader extends AbstractBasicTabularDataFileReader implements TabularDataReader { + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractTabularDataFileReader.class); + public AbstractTabularDataFileReader(File dataFile, Delimiter delimiter) { super(dataFile, delimiter); } @@ -41,19 +46,42 @@ public AbstractTabularDataFileReader(File dataFile, Delimiter delimiter) { @Override public Dataset readInData(Set excludedVariables) throws IOException { - int[] excludedColumns = hasHeader ? getColumnNumbers(excludedVariables) : new int[0]; + Dataset dataset; + try { + int[] excludedColumns = hasHeader ? getColumnNumbers(excludedVariables) : new int[0]; + dataset = readInDataFromFile(excludedColumns); + } catch (ClosedByInterruptException exception) { + dataset = null; + LOGGER.error("", exception); + } - return readInDataFromFile(excludedColumns); + return dataset; } @Override public Dataset readInData(int[] excludedColumns) throws IOException { - return readInDataFromFile(filterValidColumnNumbers(excludedColumns)); + Dataset dataset; + try { + dataset = readInDataFromFile(filterValidColumnNumbers(excludedColumns)); + } catch (ClosedByInterruptException exception) { + dataset = null; + LOGGER.error("", exception); + } + + return dataset; } @Override public Dataset readInData() throws IOException { - return readInData(Collections.EMPTY_SET); + Dataset dataset; + try { + dataset = readInData(Collections.EMPTY_SET); + } catch (ClosedByInterruptException exception) { + dataset = null; + LOGGER.error("", exception); + } + + return dataset; } } diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java index 5328e71..71a9854 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java @@ -23,6 +23,7 @@ import edu.pitt.dbmi.data.Delimiter; import java.io.File; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,10 +44,18 @@ public ContinuousTabularDataFileReader(File dataFile, Delimiter delimiter) { @Override protected Dataset readInDataFromFile(int[] excludedColumns) throws IOException { - List variables = hasHeader ? extractVariables(excludedColumns) : generateVariables(excludedColumns); - double[][] data = extractData(variables, excludedColumns); + Dataset dataset; + try { + List variables = hasHeader ? extractVariables(excludedColumns) : generateVariables(excludedColumns); + double[][] data = extractData(variables, excludedColumns); - return new ContinuousTabularDataset(variables, data); + dataset = new ContinuousTabularDataset(variables, data); + } catch (ClosedByInterruptException exception) { + dataset = null; + LOGGER.error("", exception); + } + + return dataset; } } diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java index 387b56a..a14ae7a 100644 --- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java +++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java @@ -67,6 +67,10 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[] int mixedVarInfoIndex = 0; for (MixedVarInfo mixedVarInfo : mixedVarInfos) { + if (Thread.currentThread().isInterrupted()) { + break; + } + if (mixedVarInfo.isContinuous()) { mixedVarInfo.clearValues(); continuousData[mixedVarInfoIndex++] = new double[numOfRows]; @@ -105,7 +109,7 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[] // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -143,7 +147,7 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[] } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -296,7 +300,7 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[] if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline char at the end of the file if (colNum > 0 || dataBuilder.length() > 0) { @@ -376,7 +380,7 @@ private MixedVarInfo[] analysMixedVariables(MixedVarInfo[] mixedVarInfos, int[] // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -414,7 +418,7 @@ private MixedVarInfo[] analysMixedVariables(MixedVarInfo[] mixedVarInfos, int[] } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -540,7 +544,7 @@ private MixedVarInfo[] analysMixedVariables(MixedVarInfo[] mixedVarInfos, int[] if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline at end of file if (colNum > 0 || dataBuilder.length() > 0) { @@ -607,7 +611,7 @@ private MixedVarInfo[] extractMixedVariables(int[] excludedColumns) throws IOExc byte prevChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !taskFinished) { + while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -689,7 +693,7 @@ private MixedVarInfo[] extractMixedVariables(int[] excludedColumns) throws IOExc if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !taskFinished); + } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted()); // data at the end of line if (colNum > 0 || dataBuilder.length() > 0) { @@ -733,7 +737,7 @@ protected MixedVarInfo[] generateMixedVariables(int[] excludedColumns) throws IO int exColIndex = 0; int varInfoIndex = 0; MixedVarInfo[] varInfos = new MixedVarInfo[size]; - for (int colNum = 1; colNum <= numOfCols; colNum++) { + for (int colNum = 1; colNum <= numOfCols && !Thread.currentThread().isInterrupted(); colNum++) { if (numOfExCols > 0 && (exColIndex < numOfExCols && colNum == excludedColumns[exColIndex])) { exColIndex++; } else { diff --git a/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java b/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java index 660c35d..a0bc133 100644 --- a/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java +++ b/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java @@ -89,7 +89,7 @@ public static char inferDelimiter(File file, int n, int skip, String comment, ch do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && lineCount < n) { + while (buffer.hasRemaining() && lineCount < n && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (skipLine) { @@ -137,7 +137,7 @@ public static char inferDelimiter(File file, int n, int skip, String comment, ch } else { if (byteBuffer.position() > 0) { byteBuffer.flip(); - while (byteBuffer.hasRemaining()) { + while (byteBuffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte c = byteBuffer.get(); if (c == quoteChar) { hasQuoteChar = !hasQuoteChar; @@ -165,10 +165,10 @@ public static char inferDelimiter(File file, int n, int skip, String comment, ch if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && lineCount < n); + } while (position < fileSize && lineCount < n && !Thread.currentThread().isInterrupted()); int maxIndex = 0; - for (int i = 1; i < delims.length; i++) { + for (int i = 1; i < delims.length && !Thread.currentThread().isInterrupted(); i++) { if (characters[delims[maxIndex]] < characters[delims[i]]) { maxIndex = i; } diff --git a/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java index da2ff34..66876a6 100644 --- a/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java +++ b/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java @@ -29,9 +29,12 @@ import java.io.IOException; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; +import java.nio.channels.ClosedByInterruptException; import java.nio.channels.FileChannel; import java.util.LinkedList; import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -41,6 +44,8 @@ */ public class CovarianceDataFileValidation extends AbstractDataFileReader implements DataFileValidation { + private static final Logger LOGGER = LoggerFactory.getLogger(CovarianceDataFileValidation.class); + private final List validationResults; public CovarianceDataFileValidation(File dataFile, Delimiter delimiter) { @@ -60,15 +65,18 @@ public void validate() { result.setAttribute(ValidationAttribute.ROW_NUMBER, numberOfCases); result.setAttribute(ValidationAttribute.COLUMN_NUMBER, numberOfVariables); validationResults.add(result); + } catch (ClosedByInterruptException exception) { + LOGGER.error("", exception); } catch (IOException exception) { String errMsg = String.format("Unable to read file %s.", dataFile.getName()); ValidationResult result = new ValidationResult(ValidationCode.ERROR, MessageType.FILE_IO_ERROR, errMsg); result.setAttribute(ValidationAttribute.FILE_NAME, dataFile.getName()); validationResults.add(result); + LOGGER.error("Validation failed.", exception); } } - public void validateCovarianceData(int numberOfVariables) throws IOException { + protected void validateCovarianceData(int numberOfVariables) throws IOException { try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) { long fileSize = fc.size(); long position = 0; @@ -93,7 +101,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); if (skipToData) { - while (buffer.hasRemaining() && skipToData) { + while (buffer.hasRemaining() && skipToData && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -132,7 +140,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException { } } - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -290,7 +298,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case where no newline at end of file if (colNum > 0 || dataBuilder.length() > 0) { @@ -352,7 +360,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException { } } - public int validateVariables() throws IOException { + protected int validateVariables() throws IOException { int count = 0; try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) { @@ -378,7 +386,7 @@ public int validateVariables() throws IOException { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); if (skipCaseNum) { - while (buffer.hasRemaining() && skipCaseNum) { + while (buffer.hasRemaining() && skipCaseNum && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -416,7 +424,7 @@ public int validateVariables() throws IOException { } } - while (buffer.hasRemaining() && !doneExtractVars) { + while (buffer.hasRemaining() && !doneExtractVars && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -496,7 +504,7 @@ public int validateVariables() throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !doneExtractVars); + } while (position < fileSize && !doneExtractVars && !Thread.currentThread().isInterrupted()); // data at the end of line // data at the end of line @@ -520,7 +528,7 @@ public int validateVariables() throws IOException { return count; } - public int validateNumberOfCases() throws IOException { + protected int validateNumberOfCases() throws IOException { int count = 0; try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) { @@ -539,7 +547,7 @@ public int validateNumberOfCases() throws IOException { byte prevChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !finished) { + while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -588,7 +596,7 @@ public int validateNumberOfCases() throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while ((position < fileSize) && !finished); + } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted()); if (dataBuilder.length() > 0) { String value = dataBuilder.toString().trim(); diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java index fbd646b..07490f5 100644 --- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java +++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java @@ -72,7 +72,7 @@ protected int validateVariables(int[] excludedColumns) throws IOException { byte prevChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !taskFinished) { + while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -155,7 +155,7 @@ protected int validateVariables(int[] excludedColumns) throws IOException { if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !taskFinished); + } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted()); // data at the end of line if (colNum > 0 || dataBuilder.length() > 0) { diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java index f7b8b13..1dd6721 100644 --- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java +++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java @@ -25,8 +25,11 @@ import edu.pitt.dbmi.data.validation.ValidationResult; import java.io.File; import java.io.IOException; +import java.nio.channels.ClosedByInterruptException; import java.util.Collections; import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * @@ -36,6 +39,8 @@ */ public abstract class AbstractTabularDataValidation extends AbstractTabularDataFileValidation implements TabularDataValidation { + private static final Logger LOGGER = LoggerFactory.getLogger(AbstractTabularDataValidation.class); + protected int markedMissing; protected int assumedMissing; protected int numOfRowsWithMissingValues; @@ -55,11 +60,14 @@ public void validate(Set excludedVariables) { numOfColsWithMissingValues = 0; try { validateDataFromFile(getColumnNumbers(excludedVariables)); + } catch (ClosedByInterruptException exception) { + LOGGER.error("", exception); } catch (IOException exception) { String errMsg = String.format("Unable to read file %s.", dataFile.getName()); ValidationResult result = new ValidationResult(ValidationCode.ERROR, MessageType.FILE_IO_ERROR, errMsg); result.setAttribute(ValidationAttribute.FILE_NAME, dataFile.getName()); validationResults.add(result); + LOGGER.error("Validation failed.", exception); } } @@ -67,11 +75,14 @@ public void validate(Set excludedVariables) { public void validate(int[] excludedColumns) { try { validateDataFromFile(filterValidColumnNumbers(excludedColumns)); + } catch (ClosedByInterruptException exception) { + LOGGER.error("", exception); } catch (IOException exception) { String errMsg = String.format("Unable to read file %s.", dataFile.getName()); ValidationResult result = new ValidationResult(ValidationCode.ERROR, MessageType.FILE_IO_ERROR, errMsg); result.setAttribute(ValidationAttribute.FILE_NAME, dataFile.getName()); validationResults.add(result); + LOGGER.error("Validation failed.", exception); } } diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java index 712e840..d134c41 100644 --- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java +++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java @@ -99,7 +99,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -137,7 +137,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -341,7 +341,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline char at end of file if (colNum > 0 || dataBuilder.length() > 0) { diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java index 43ba6cd..243f18b 100644 --- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java +++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java @@ -53,6 +53,10 @@ protected void validateDataFromFile(int[] excludedColumns) throws IOException { int numOfDiscrete = 0; int numOfContinuous = 0; for (MixedVarInfo var : varInfos) { + if (Thread.currentThread().isInterrupted()) { + break; + } + if (var.isContinuous()) { numOfContinuous++; } else { @@ -111,6 +115,10 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th int mixedVarInfoIndex = 0; for (MixedVarInfo mixedVarInfo : mixedVarInfos) { + if (Thread.currentThread().isInterrupted()) { + break; + } + if (mixedVarInfo.isContinuous()) { mixedVarInfo.clearValues(); continuousData[mixedVarInfoIndex++] = new double[numOfRows]; @@ -150,7 +158,7 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -188,7 +196,7 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -398,7 +406,7 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline char at the end of the file if (colNum > 0 || dataBuilder.length() > 0) { @@ -508,7 +516,7 @@ private MixedVarInfo[] analysMixedVariableValidation(MixedVarInfo[] mixedVarInfo // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -541,7 +549,7 @@ private MixedVarInfo[] analysMixedVariableValidation(MixedVarInfo[] mixedVarInfo } // read in data - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -650,7 +658,7 @@ private MixedVarInfo[] analysMixedVariableValidation(MixedVarInfo[] mixedVarInfo if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline at end of file if (colNum > 0 || dataBuilder.length() > 0) { @@ -709,7 +717,7 @@ private MixedVarInfo[] validateMixedVariables(int[] excludedColumns) throws IOEx byte prevChar = -1; do { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size); - while (buffer.hasRemaining() && !taskFinished) { + while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -792,7 +800,7 @@ private MixedVarInfo[] validateMixedVariables(int[] excludedColumns) throws IOEx if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize && !taskFinished); + } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted()); // data at the end of line if (colNum > 0 || dataBuilder.length() > 0) { diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java index 72d2e10..f5dded0 100644 --- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java +++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java @@ -99,7 +99,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio // skip header, if any if (skipHeader) { - while (buffer.hasRemaining() && skipHeader) { + while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { skipLine = false; @@ -136,7 +136,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio } } - while (buffer.hasRemaining()) { + while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) { byte currChar = buffer.get(); if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) { @@ -318,7 +318,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio if ((position + size) > fileSize) { size = fileSize - position; } - } while (position < fileSize); + } while (position < fileSize && !Thread.currentThread().isInterrupted()); // case when no newline char at the end of the file if (colNum > 0 || dataBuilder.length() > 0) {