diff --git a/pom.xml b/pom.xml
index 268a7b8..e874628 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
4.0.0
edu.pitt.dbmi
data-reader
- 0.2.2-SNAPSHOT
+ 0.2.3-SNAPSHOT
jar
diff --git a/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java b/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java
index 2fb3ed3..a96bd7c 100644
--- a/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java
+++ b/src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java
@@ -22,10 +22,13 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
+import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
*
@@ -35,6 +38,8 @@
*/
public class BasicDataPreviewer extends AbstractDataPreviewer implements DataPreviewer {
+ private static final Logger LOGGER = LoggerFactory.getLogger(BasicDataPreviewer.class);
+
public BasicDataPreviewer(File dataFile) {
super(dataFile);
}
@@ -49,6 +54,16 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t
}
List linePreviews = new LinkedList<>();
+ try {
+ getPreviews(fromLine, toLine, numOfCharacters, linePreviews);
+ } catch (ClosedByInterruptException exception) {
+ LOGGER.error("", exception);
+ }
+
+ return linePreviews;
+ }
+
+ protected void getPreviews(int fromLine, int toLine, int numOfCharacters, List list) throws IOException {
try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) {
long fileSize = fc.size();
long position = 0;
@@ -63,7 +78,7 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !isDone) {
+ while (buffer.hasRemaining() && !isDone && !Thread.currentThread().isInterrupted()) {
byte currentChar = buffer.get();
if (skipLine) {
if (currentChar == CARRIAGE_RETURN || currentChar == LINE_FEED) {
@@ -71,7 +86,7 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t
if (charCount > 0) {
charCount = 0;
- linePreviews.add(lineBuilder.toString());
+ list.add(lineBuilder.toString());
lineBuilder.delete(0, lineBuilder.length());
}
@@ -88,7 +103,7 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t
if (currentChar == CARRIAGE_RETURN || currentChar == LINE_FEED) {
if (charCount > 0) {
charCount = 0;
- linePreviews.add(lineBuilder.toString());
+ list.add(lineBuilder.toString());
lineBuilder.delete(0, lineBuilder.length());
}
@@ -114,11 +129,9 @@ public List getPreviews(int fromLine, int toLine, int numOfCharacters) t
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
}
-
- return linePreviews;
}
}
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java
index 8ebb97d..6dbc9db 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java
@@ -23,6 +23,7 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
+import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;
/**
@@ -36,7 +37,8 @@ public abstract class AbstractDataFileReader {
protected static final byte LINE_FEED = '\n';
protected static final byte CARRIAGE_RETURN = '\r';
- protected static final byte SPACE_CHAR = ' ';
+ protected static final byte SPACE_CHAR = Delimiter.SPACE.getDelimiterChar();
+ protected static final String EMPTY_STRING = "";
protected byte quoteCharacter;
protected String missingValueMarker;
@@ -52,9 +54,9 @@ public AbstractDataFileReader(File dataFile, Delimiter delimiter) {
this.dataFile = dataFile;
this.delimiter = delimiter;
+ this.missingValueMarker = EMPTY_STRING;
+ this.commentMarker = EMPTY_STRING;
this.quoteCharacter = -1;
- this.commentMarker = "";
-
this.numberOfLines = -1;
this.numberOfColumns = -1;
}
@@ -71,41 +73,52 @@ private int countNumberOfColumns() throws IOException {
byte[] prefix = commentMarker.getBytes();
int index = 0;
- boolean reqCheck = prefix.length > 0;
+ boolean reqCmntCheck = prefix.length > 0;
boolean skipLine = false;
- boolean finished = false;
boolean hasQuoteChar = false;
- byte prevNonBlankChar = SPACE_CHAR;
+ boolean finished = false;
byte prevChar = -1;
+ byte prevNonBlankChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !finished) {
+ while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
-
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
+ if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) {
+ prevChar = currChar;
+ continue;
+ }
+
+ finished = !skipLine;
+ if (finished) {
+ count++;
+ }
+
skipLine = false;
- finished = prevNonBlankChar > SPACE_CHAR;
+ reqCmntCheck = prefix.length > 0;
+ index = 0;
+ prevNonBlankChar = -1;
} else if (!skipLine) {
+ // save any non-blank char encountered
if (currChar > SPACE_CHAR) {
prevNonBlankChar = currChar;
}
- if (reqCheck && prevNonBlankChar > SPACE_CHAR) {
+ // skip any blank chars at the begining of the line
+ if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) {
+ continue;
+ }
+
+ if (reqCmntCheck) {
if (currChar == prefix[index]) {
index++;
-
- // all the comment chars are matched
if (index == prefix.length) {
- index = 0;
skipLine = true;
- count = 0;
- prevNonBlankChar = SPACE_CHAR;
-
prevChar = currChar;
continue;
}
} else {
- reqCheck = false;
+ reqCmntCheck = false;
}
}
@@ -114,17 +127,13 @@ private int countNumberOfColumns() throws IOException {
} else if (!hasQuoteChar) {
switch (delimiter) {
case WHITESPACE:
- if (currChar > SPACE_CHAR && prevChar <= SPACE_CHAR) {
- if (!hasQuoteChar) {
- count++;
- }
+ if (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR) {
+ count++;
}
break;
default:
if (currChar == delimChar) {
- if (!hasQuoteChar) {
- count++;
- }
+ count++;
}
}
}
@@ -137,12 +146,10 @@ private int countNumberOfColumns() throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !finished);
+ } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted());
- if (delimiter != Delimiter.WHITESPACE) {
- if (prevNonBlankChar > SPACE_CHAR) {
- count++;
- }
+ if (!finished) {
+ count++;
}
}
@@ -159,52 +166,63 @@ private int countNumberOfLines() throws IOException {
byte[] prefix = commentMarker.getBytes();
int index = 0;
- boolean reqCheck = prefix.length > 0;
+ boolean reqCmntCheck = prefix.length > 0;
boolean skipLine = false;
+ boolean moveToEOL = false;
+ byte prevChar = -1;
+ byte prevNonBlankChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
-
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
- skipLine = false;
- if (index > 0) {
- index = 0;
+ if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) {
+ continue;
+ }
+
+ if (!skipLine) {
count++;
}
- } else if (!skipLine) {
- if (currChar <= SPACE_CHAR && index == 0) {
+ index = 0;
+ moveToEOL = false;
+ skipLine = false;
+ prevNonBlankChar = -1;
+ } else if (!moveToEOL) {
+ // save any non-blank char encountered
+ if (currChar > SPACE_CHAR) {
+ prevNonBlankChar = currChar;
+ }
+
+ // skip any blank chars at the begining of the line
+ if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) {
continue;
}
- if (reqCheck) {
+ if (reqCmntCheck) {
if (currChar == prefix[index]) {
index++;
if (index == prefix.length) {
- index = 0;
+ moveToEOL = true;
skipLine = true;
}
} else {
- index = 0;
- skipLine = true;
- count++;
+ moveToEOL = true;
}
} else {
- skipLine = true;
- count++;
+ moveToEOL = true;
}
}
+
+ prevChar = currChar;
}
position += size;
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while ((position < fileSize) && !Thread.currentThread().isInterrupted());
- // case where no newline at end of file
- if (index > 0) {
- index = 0;
+ if (!(prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && !skipLine) {
count++;
}
}
@@ -225,8 +243,8 @@ public String getMissingValueMarker() {
}
public void setMissingValueMarker(String missingValueMarker) {
- this.missingValueMarker = (missingValueMarker) == null
- ? missingValueMarker
+ this.missingValueMarker = (missingValueMarker == null)
+ ? EMPTY_STRING
: missingValueMarker.trim();
}
@@ -235,14 +253,18 @@ public String getCommentMarker() {
}
public void setCommentMarker(String commentMarker) {
- if (commentMarker != null) {
- this.commentMarker = commentMarker.trim();
- }
+ this.commentMarker = (commentMarker == null)
+ ? EMPTY_STRING
+ : commentMarker.trim();
}
public int getNumberOfLines() throws IOException {
if (numberOfLines == -1) {
- numberOfLines = countNumberOfLines();
+ try {
+ numberOfLines = countNumberOfLines();
+ } catch (ClosedByInterruptException exception) {
+ numberOfLines = -1;
+ }
}
return numberOfLines;
@@ -250,7 +272,11 @@ public int getNumberOfLines() throws IOException {
public int getNumberOfColumns() throws IOException {
if (numberOfColumns == -1) {
- numberOfColumns = countNumberOfColumns();
+ try {
+ numberOfColumns = countNumberOfColumns();
+ } catch (ClosedByInterruptException exception) {
+ numberOfColumns = -1;
+ }
}
return numberOfColumns;
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java b/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java
index 7e3f3e9..14dd8b4 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/covariance/LowerCovarianceDataReader.java
@@ -27,6 +27,7 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
+import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;
import java.util.LinkedList;
import java.util.List;
@@ -49,11 +50,19 @@ public LowerCovarianceDataReader(File dataFile, Delimiter delimiter) {
@Override
public Dataset readInData() throws IOException {
- int numberOfCases = getNumberOfCases();
- List variables = extractVariables();
- double[][] data = extractCovarianceData(variables.size());
+ Dataset dataset;
+ try {
+ int numberOfCases = getNumberOfCases();
+ List variables = extractVariables();
+ double[][] data = extractCovarianceData(variables.size());
+
+ dataset = new CovarianceDataset(numberOfCases, variables, data);
+ } catch (ClosedByInterruptException exception) {
+ dataset = null;
+ LOGGER.error("", exception);
+ }
- return new CovarianceDataset(numberOfCases, variables, data);
+ return dataset;
}
private double[][] extractCovarianceData(int matrixSize) throws IOException {
@@ -84,7 +93,7 @@ private double[][] extractCovarianceData(int matrixSize) throws IOException {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
if (skipToData) {
- while (buffer.hasRemaining() && skipToData) {
+ while (buffer.hasRemaining() && skipToData && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -123,7 +132,7 @@ private double[][] extractCovarianceData(int matrixSize) throws IOException {
}
}
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -251,7 +260,7 @@ private double[][] extractCovarianceData(int matrixSize) throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case where no newline at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -316,7 +325,7 @@ private List extractVariables() throws IOException {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
if (skipCaseNum) {
- while (buffer.hasRemaining() && skipCaseNum) {
+ while (buffer.hasRemaining() && skipCaseNum && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -354,7 +363,7 @@ private List extractVariables() throws IOException {
}
}
- while (buffer.hasRemaining() && !doneExtractVars) {
+ while (buffer.hasRemaining() && !doneExtractVars && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -433,7 +442,7 @@ private List extractVariables() throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !doneExtractVars);
+ } while (position < fileSize && !doneExtractVars && !Thread.currentThread().isInterrupted());
// data at the end of line
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -473,7 +482,7 @@ public int getNumberOfCases() throws IOException {
byte prevChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !finished) {
+ while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -522,7 +531,7 @@ public int getNumberOfCases() throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while ((position < fileSize) && !finished);
+ } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted());
if (dataBuilder.length() > 0) {
String value = dataBuilder.toString().trim();
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java
index b6e2fd4..f3952c9 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractBasicTabularDataFileReader.java
@@ -59,68 +59,92 @@ protected int[] getColumnNumbers(Set variables) throws IOException {
byte[] prefix = commentMarker.getBytes();
int index = 0;
int colNum = 0;
- boolean reqCheck = prefix.length > 0;
+ boolean reqCmntCheck = prefix.length > 0;
boolean skipLine = false;
- boolean taskFinished = false;
boolean hasQuoteChar = false;
- byte prevNonBlankChar = SPACE_CHAR;
+ boolean finished = false;
byte prevChar = -1;
+ byte prevNonBlankChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !taskFinished) {
+ while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
+ if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) {
+ prevChar = currChar;
+ continue;
+ }
+
+ if (skipLine) {
+ dataBuilder.delete(0, dataBuilder.length());
+ } else {
+ finished = true;
+
+ colNum++;
+ String value = dataBuilder.toString().trim();
+ dataBuilder.delete(0, dataBuilder.length());
+
+ if (variables.contains(value)) {
+ indexList.add(colNum);
+ }
+ }
+
skipLine = false;
- taskFinished = prevNonBlankChar > SPACE_CHAR;
+ reqCmntCheck = prefix.length > 0;
+ index = 0;
+ prevNonBlankChar = -1;
+ hasQuoteChar = false;
} else if (!skipLine) {
+ // save any non-blank char encountered
if (currChar > SPACE_CHAR) {
prevNonBlankChar = currChar;
}
- if (reqCheck && prevNonBlankChar > SPACE_CHAR) {
+ // skip any blank chars at the begining of the line
+ if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) {
+ continue;
+ }
+
+ if (reqCmntCheck) {
if (currChar == prefix[index]) {
index++;
if (index == prefix.length) {
- index = 0;
skipLine = true;
- colNum = 0;
- prevNonBlankChar = SPACE_CHAR;
- dataBuilder.delete(0, dataBuilder.length());
-
prevChar = currChar;
continue;
}
} else {
- index = 0;
- reqCheck = false;
+ reqCmntCheck = false;
}
}
if (currChar == quoteCharacter) {
hasQuoteChar = !hasQuoteChar;
- } else if (hasQuoteChar) {
- dataBuilder.append((char) currChar);
} else {
- boolean isDelimiter;
- switch (delimiter) {
- case WHITESPACE:
- isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR);
- break;
- default:
- isDelimiter = (currChar == delimChar);
- }
+ if (hasQuoteChar) {
+ dataBuilder.append((char) currChar);
+ } else {
+ boolean isDelimiter;
+ switch (delimiter) {
+ case WHITESPACE:
+ isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR);
+ break;
+ default:
+ isDelimiter = (currChar == delimChar);
+ }
- if (isDelimiter) {
- colNum++;
- String value = dataBuilder.toString().trim();
- dataBuilder.delete(0, dataBuilder.length());
+ if (isDelimiter) {
+ colNum++;
+ String value = dataBuilder.toString().trim();
+ dataBuilder.delete(0, dataBuilder.length());
- if (variables.contains(value)) {
- indexList.add(colNum);
+ if (variables.contains(value)) {
+ indexList.add(colNum);
+ }
+ } else {
+ dataBuilder.append((char) currChar);
}
- } else {
- dataBuilder.append((char) currChar);
}
}
}
@@ -132,10 +156,9 @@ protected int[] getColumnNumbers(Set variables) throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !taskFinished);
+ } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted());
- // data at the end of line
- if (colNum > 0 || dataBuilder.length() > 0) {
+ if (!finished) {
colNum++;
String value = dataBuilder.toString().trim();
dataBuilder.delete(0, dataBuilder.length());
@@ -147,7 +170,7 @@ protected int[] getColumnNumbers(Set variables) throws IOException {
}
int[] indices = new int[indexList.size()];
- if (indices.length > 0) {
+ if (!indexList.isEmpty()) {
int i = 0;
for (Integer index : indexList) {
indices[i++] = index;
@@ -161,6 +184,10 @@ protected int[] filterValidColumnNumbers(int[] columnNumbers) throws IOException
Set indices = new TreeSet<>();
int numOfVars = getNumberOfColumns();
for (int colNum : columnNumbers) {
+ if (Thread.currentThread().isInterrupted()) {
+ break;
+ }
+
if (colNum > 0 && colNum <= numOfVars) {
indices.add(colNum);
}
@@ -187,7 +214,7 @@ protected List generateVariables(int[] excludedColumns) throws IOExcepti
int numOfCols = getNumberOfColumns();
int length = excludedColumns.length;
int excludedIndex = 0;
- for (int colNum = 1; colNum <= numOfCols; colNum++) {
+ for (int colNum = 1; colNum <= numOfCols && !Thread.currentThread().isInterrupted(); colNum++) {
if (length > 0 && (excludedIndex < length && colNum == excludedColumns[excludedIndex])) {
excludedIndex++;
} else {
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java
index 528b9d2..8eb463b 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractContinuousTabularDataFileReader.java
@@ -83,7 +83,7 @@ protected double[][] extractData(List variables, int[] excludedColumns)
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -121,7 +121,7 @@ protected double[][] extractData(List variables, int[] excludedColumns)
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -250,7 +250,7 @@ protected double[][] extractData(List variables, int[] excludedColumns)
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when there is no newline at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -302,90 +302,113 @@ protected List extractVariables(int[] excludedColumns) throws IOExceptio
StringBuilder dataBuilder = new StringBuilder();
byte[] prefix = commentMarker.getBytes();
- int index = 0;
+ int prefixIndex = 0;
+ int excludedIndex = 0;
+ int numOfExCols = excludedColumns.length;
int colNum = 0;
int lineNum = 1;
- int numOfExCols = excludedColumns.length;
- int excludedIndex = 0;
- boolean reqCheck = prefix.length > 0;
+ boolean reqCmntCheck = prefix.length > 0;
boolean skipLine = false;
- boolean finished = false;
boolean hasQuoteChar = false;
- byte prevNonBlankChar = SPACE_CHAR;
+ boolean finished = false;
byte prevChar = -1;
+ byte prevNonBlankChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !finished) {
+ while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
- skipLine = false;
- if (prevNonBlankChar > SPACE_CHAR) {
- finished = true;
+ if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) {
+ prevChar = currChar;
+ continue;
+ }
+
+ if (skipLine) {
+ dataBuilder.delete(0, dataBuilder.length());
} else {
- lineNum++;
- if (currChar == LINE_FEED && prevChar == CARRIAGE_RETURN) {
- lineNum--;
+ finished = true;
+
+ colNum++;
+ String value = dataBuilder.toString().trim();
+ dataBuilder.delete(0, dataBuilder.length());
+
+ if (numOfExCols == 0 || excludedIndex >= numOfExCols || colNum != excludedColumns[excludedIndex]) {
+ if (value.length() > 0) {
+ variables.add(value);
+ } else {
+ String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum);
+ LOGGER.error(errMsg);
+ throw new DataReaderException(errMsg);
+ }
}
}
+
+ lineNum++;
+ skipLine = false;
+ reqCmntCheck = prefix.length > 0;
+ prefixIndex = 0;
+ prevNonBlankChar = -1;
+ hasQuoteChar = false;
} else if (!skipLine) {
+ // save any non-blank char encountered
if (currChar > SPACE_CHAR) {
prevNonBlankChar = currChar;
}
- if (reqCheck && prevNonBlankChar > SPACE_CHAR) {
- if (currChar == prefix[index]) {
- index++;
+ // skip any blank chars at the begining of the line
+ if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) {
+ prevChar = currChar;
+ continue;
+ }
- // all the comment chars are matched
- if (index == prefix.length) {
- index = 0;
+ if (reqCmntCheck) {
+ if (currChar == prefix[prefixIndex]) {
+ prefixIndex++;
+ if (prefixIndex == prefix.length) {
skipLine = true;
- colNum = 0;
- prevNonBlankChar = SPACE_CHAR;
- dataBuilder.delete(0, dataBuilder.length());
-
prevChar = currChar;
continue;
}
} else {
- index = 0;
- reqCheck = false;
+ reqCmntCheck = false;
}
}
if (currChar == quoteCharacter) {
hasQuoteChar = !hasQuoteChar;
- } else if (hasQuoteChar) {
- dataBuilder.append((char) currChar);
} else {
- boolean isDelimiter;
- switch (delimiter) {
- case WHITESPACE:
- isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR);
- break;
- default:
- isDelimiter = (currChar == delimChar);
- }
+ if (hasQuoteChar) {
+ dataBuilder.append((char) currChar);
+ } else {
+ boolean isDelimiter;
+ switch (delimiter) {
+ case WHITESPACE:
+ isDelimiter = (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR);
+ break;
+ default:
+ isDelimiter = (currChar == delimChar);
+ }
- if (isDelimiter) {
- colNum++;
- String value = dataBuilder.toString().trim();
- dataBuilder.delete(0, dataBuilder.length());
+ if (isDelimiter) {
+ colNum++;
+ String value = dataBuilder.toString().trim();
+ dataBuilder.delete(0, dataBuilder.length());
- if (numOfExCols > 0 && (excludedIndex < numOfExCols && colNum == excludedColumns[excludedIndex])) {
- excludedIndex++;
- } else {
- if (value.length() > 0) {
- variables.add(value);
+ if (numOfExCols > 0 && (excludedIndex < numOfExCols && colNum == excludedColumns[excludedIndex])) {
+ excludedIndex++;
} else {
- String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum);
- LOGGER.error(errMsg);
- throw new DataReaderException(errMsg);
+ if (value.length() > 0) {
+ variables.add(value);
+ } else {
+ String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum);
+ LOGGER.error(errMsg);
+ throw new DataReaderException(errMsg);
+ }
}
+ } else {
+ dataBuilder.append((char) currChar);
}
- } else {
- dataBuilder.append((char) currChar);
}
}
}
@@ -397,29 +420,20 @@ protected List extractVariables(int[] excludedColumns) throws IOExceptio
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !finished);
+ } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted());
- // data at the end of line
- if (colNum > 0 || dataBuilder.length() > 0) {
+ if (!finished) {
colNum++;
String value = dataBuilder.toString().trim();
dataBuilder.delete(0, dataBuilder.length());
if (numOfExCols == 0 || excludedIndex >= numOfExCols || colNum != excludedColumns[excludedIndex]) {
- switch (delimiter) {
- case WHITESPACE:
- if (value.length() > 0) {
- variables.add(value);
- }
- break;
- default:
- if (value.length() > 0) {
- variables.add(value);
- } else {
- String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum);
- LOGGER.error(errMsg);
- throw new DataReaderException(errMsg);
- }
+ if (value.length() > 0) {
+ variables.add(value);
+ } else {
+ String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum);
+ LOGGER.error(errMsg);
+ throw new DataReaderException(errMsg);
}
}
}
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java
index 751424c..0722999 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractDiscreteTabularDataFileReader.java
@@ -78,7 +78,7 @@ protected int[][] extractAndEncodeData(DiscreteVarInfo[] varInfos, int[] exclude
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -115,7 +115,7 @@ protected int[][] extractAndEncodeData(DiscreteVarInfo[] varInfos, int[] exclude
}
}
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -237,7 +237,7 @@ protected int[][] extractAndEncodeData(DiscreteVarInfo[] varInfos, int[] exclude
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline char at the end of the file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -302,7 +302,7 @@ protected DiscreteVarInfo[] extractVariableData(DiscreteVarInfo[] varInfos, int[
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -340,7 +340,7 @@ protected DiscreteVarInfo[] extractVariableData(DiscreteVarInfo[] varInfos, int[
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -453,7 +453,7 @@ protected DiscreteVarInfo[] extractVariableData(DiscreteVarInfo[] varInfos, int[
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -513,7 +513,7 @@ protected DiscreteVarInfo[] extractVariables(int[] excludedColumns) throws IOExc
byte prevChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !taskFinished) {
+ while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -595,7 +595,7 @@ protected DiscreteVarInfo[] extractVariables(int[] excludedColumns) throws IOExc
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !taskFinished);
+ } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted());
// data at the end of line
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -639,7 +639,7 @@ protected DiscreteVarInfo[] generateDiscreteVariables(int[] excludedColumns) thr
int exColIndex = 0;
int varInfoIndex = 0;
DiscreteVarInfo[] varInfos = new DiscreteVarInfo[size];
- for (int colNum = 1; colNum <= numOfCols; colNum++) {
+ for (int colNum = 1; colNum <= numOfCols && !Thread.currentThread().isInterrupted(); colNum++) {
if (numOfExCols > 0 && (exColIndex < numOfExCols && colNum == excludedColumns[exColIndex])) {
exColIndex++;
} else {
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java
index 1cbba60..7447da1 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/AbstractTabularDataFileReader.java
@@ -22,8 +22,11 @@
import edu.pitt.dbmi.data.Delimiter;
import java.io.File;
import java.io.IOException;
+import java.nio.channels.ClosedByInterruptException;
import java.util.Collections;
import java.util.Set;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
*
@@ -33,6 +36,8 @@
*/
public abstract class AbstractTabularDataFileReader extends AbstractBasicTabularDataFileReader implements TabularDataReader {
+ private static final Logger LOGGER = LoggerFactory.getLogger(AbstractTabularDataFileReader.class);
+
public AbstractTabularDataFileReader(File dataFile, Delimiter delimiter) {
super(dataFile, delimiter);
}
@@ -41,19 +46,42 @@ public AbstractTabularDataFileReader(File dataFile, Delimiter delimiter) {
@Override
public Dataset readInData(Set excludedVariables) throws IOException {
- int[] excludedColumns = hasHeader ? getColumnNumbers(excludedVariables) : new int[0];
+ Dataset dataset;
+ try {
+ int[] excludedColumns = hasHeader ? getColumnNumbers(excludedVariables) : new int[0];
+ dataset = readInDataFromFile(excludedColumns);
+ } catch (ClosedByInterruptException exception) {
+ dataset = null;
+ LOGGER.error("", exception);
+ }
- return readInDataFromFile(excludedColumns);
+ return dataset;
}
@Override
public Dataset readInData(int[] excludedColumns) throws IOException {
- return readInDataFromFile(filterValidColumnNumbers(excludedColumns));
+ Dataset dataset;
+ try {
+ dataset = readInDataFromFile(filterValidColumnNumbers(excludedColumns));
+ } catch (ClosedByInterruptException exception) {
+ dataset = null;
+ LOGGER.error("", exception);
+ }
+
+ return dataset;
}
@Override
public Dataset readInData() throws IOException {
- return readInData(Collections.EMPTY_SET);
+ Dataset dataset;
+ try {
+ dataset = readInData(Collections.EMPTY_SET);
+ } catch (ClosedByInterruptException exception) {
+ dataset = null;
+ LOGGER.error("", exception);
+ }
+
+ return dataset;
}
}
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java
index 5328e71..71a9854 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/ContinuousTabularDataFileReader.java
@@ -23,6 +23,7 @@
import edu.pitt.dbmi.data.Delimiter;
import java.io.File;
import java.io.IOException;
+import java.nio.channels.ClosedByInterruptException;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,10 +44,18 @@ public ContinuousTabularDataFileReader(File dataFile, Delimiter delimiter) {
@Override
protected Dataset readInDataFromFile(int[] excludedColumns) throws IOException {
- List variables = hasHeader ? extractVariables(excludedColumns) : generateVariables(excludedColumns);
- double[][] data = extractData(variables, excludedColumns);
+ Dataset dataset;
+ try {
+ List variables = hasHeader ? extractVariables(excludedColumns) : generateVariables(excludedColumns);
+ double[][] data = extractData(variables, excludedColumns);
- return new ContinuousTabularDataset(variables, data);
+ dataset = new ContinuousTabularDataset(variables, data);
+ } catch (ClosedByInterruptException exception) {
+ dataset = null;
+ LOGGER.error("", exception);
+ }
+
+ return dataset;
}
}
diff --git a/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java b/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java
index 387b56a..a14ae7a 100644
--- a/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java
+++ b/src/main/java/edu/pitt/dbmi/data/reader/tabular/MixedTabularDataFileReader.java
@@ -67,6 +67,10 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[]
int mixedVarInfoIndex = 0;
for (MixedVarInfo mixedVarInfo : mixedVarInfos) {
+ if (Thread.currentThread().isInterrupted()) {
+ break;
+ }
+
if (mixedVarInfo.isContinuous()) {
mixedVarInfo.clearValues();
continuousData[mixedVarInfoIndex++] = new double[numOfRows];
@@ -105,7 +109,7 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[]
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -143,7 +147,7 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[]
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -296,7 +300,7 @@ private MixedTabularDataset extractMixedData(MixedVarInfo[] mixedVarInfos, int[]
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline char at the end of the file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -376,7 +380,7 @@ private MixedVarInfo[] analysMixedVariables(MixedVarInfo[] mixedVarInfos, int[]
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -414,7 +418,7 @@ private MixedVarInfo[] analysMixedVariables(MixedVarInfo[] mixedVarInfos, int[]
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -540,7 +544,7 @@ private MixedVarInfo[] analysMixedVariables(MixedVarInfo[] mixedVarInfos, int[]
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -607,7 +611,7 @@ private MixedVarInfo[] extractMixedVariables(int[] excludedColumns) throws IOExc
byte prevChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !taskFinished) {
+ while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -689,7 +693,7 @@ private MixedVarInfo[] extractMixedVariables(int[] excludedColumns) throws IOExc
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !taskFinished);
+ } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted());
// data at the end of line
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -733,7 +737,7 @@ protected MixedVarInfo[] generateMixedVariables(int[] excludedColumns) throws IO
int exColIndex = 0;
int varInfoIndex = 0;
MixedVarInfo[] varInfos = new MixedVarInfo[size];
- for (int colNum = 1; colNum <= numOfCols; colNum++) {
+ for (int colNum = 1; colNum <= numOfCols && !Thread.currentThread().isInterrupted(); colNum++) {
if (numOfExCols > 0 && (exColIndex < numOfExCols && colNum == excludedColumns[exColIndex])) {
exColIndex++;
} else {
diff --git a/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java b/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java
index 660c35d..a0bc133 100644
--- a/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java
+++ b/src/main/java/edu/pitt/dbmi/data/util/TextFileUtils.java
@@ -89,7 +89,7 @@ public static char inferDelimiter(File file, int n, int skip, String comment, ch
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && lineCount < n) {
+ while (buffer.hasRemaining() && lineCount < n && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (skipLine) {
@@ -137,7 +137,7 @@ public static char inferDelimiter(File file, int n, int skip, String comment, ch
} else {
if (byteBuffer.position() > 0) {
byteBuffer.flip();
- while (byteBuffer.hasRemaining()) {
+ while (byteBuffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte c = byteBuffer.get();
if (c == quoteChar) {
hasQuoteChar = !hasQuoteChar;
@@ -165,10 +165,10 @@ public static char inferDelimiter(File file, int n, int skip, String comment, ch
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && lineCount < n);
+ } while (position < fileSize && lineCount < n && !Thread.currentThread().isInterrupted());
int maxIndex = 0;
- for (int i = 1; i < delims.length; i++) {
+ for (int i = 1; i < delims.length && !Thread.currentThread().isInterrupted(); i++) {
if (characters[delims[maxIndex]] < characters[delims[i]]) {
maxIndex = i;
}
diff --git a/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java
index da2ff34..66876a6 100644
--- a/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java
+++ b/src/main/java/edu/pitt/dbmi/data/validation/covariance/CovarianceDataFileValidation.java
@@ -29,9 +29,12 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
+import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;
import java.util.LinkedList;
import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
*
@@ -41,6 +44,8 @@
*/
public class CovarianceDataFileValidation extends AbstractDataFileReader implements DataFileValidation {
+ private static final Logger LOGGER = LoggerFactory.getLogger(CovarianceDataFileValidation.class);
+
private final List validationResults;
public CovarianceDataFileValidation(File dataFile, Delimiter delimiter) {
@@ -60,15 +65,18 @@ public void validate() {
result.setAttribute(ValidationAttribute.ROW_NUMBER, numberOfCases);
result.setAttribute(ValidationAttribute.COLUMN_NUMBER, numberOfVariables);
validationResults.add(result);
+ } catch (ClosedByInterruptException exception) {
+ LOGGER.error("", exception);
} catch (IOException exception) {
String errMsg = String.format("Unable to read file %s.", dataFile.getName());
ValidationResult result = new ValidationResult(ValidationCode.ERROR, MessageType.FILE_IO_ERROR, errMsg);
result.setAttribute(ValidationAttribute.FILE_NAME, dataFile.getName());
validationResults.add(result);
+ LOGGER.error("Validation failed.", exception);
}
}
- public void validateCovarianceData(int numberOfVariables) throws IOException {
+ protected void validateCovarianceData(int numberOfVariables) throws IOException {
try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) {
long fileSize = fc.size();
long position = 0;
@@ -93,7 +101,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
if (skipToData) {
- while (buffer.hasRemaining() && skipToData) {
+ while (buffer.hasRemaining() && skipToData && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -132,7 +140,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException {
}
}
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -290,7 +298,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case where no newline at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -352,7 +360,7 @@ public void validateCovarianceData(int numberOfVariables) throws IOException {
}
}
- public int validateVariables() throws IOException {
+ protected int validateVariables() throws IOException {
int count = 0;
try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) {
@@ -378,7 +386,7 @@ public int validateVariables() throws IOException {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
if (skipCaseNum) {
- while (buffer.hasRemaining() && skipCaseNum) {
+ while (buffer.hasRemaining() && skipCaseNum && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -416,7 +424,7 @@ public int validateVariables() throws IOException {
}
}
- while (buffer.hasRemaining() && !doneExtractVars) {
+ while (buffer.hasRemaining() && !doneExtractVars && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -496,7 +504,7 @@ public int validateVariables() throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !doneExtractVars);
+ } while (position < fileSize && !doneExtractVars && !Thread.currentThread().isInterrupted());
// data at the end of line
// data at the end of line
@@ -520,7 +528,7 @@ public int validateVariables() throws IOException {
return count;
}
- public int validateNumberOfCases() throws IOException {
+ protected int validateNumberOfCases() throws IOException {
int count = 0;
try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) {
@@ -539,7 +547,7 @@ public int validateNumberOfCases() throws IOException {
byte prevChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !finished) {
+ while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -588,7 +596,7 @@ public int validateNumberOfCases() throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while ((position < fileSize) && !finished);
+ } while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted());
if (dataBuilder.length() > 0) {
String value = dataBuilder.toString().trim();
diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java
index fbd646b..07490f5 100644
--- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java
+++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataFileValidation.java
@@ -72,7 +72,7 @@ protected int validateVariables(int[] excludedColumns) throws IOException {
byte prevChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !taskFinished) {
+ while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -155,7 +155,7 @@ protected int validateVariables(int[] excludedColumns) throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !taskFinished);
+ } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted());
// data at the end of line
if (colNum > 0 || dataBuilder.length() > 0) {
diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java
index f7b8b13..1dd6721 100644
--- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java
+++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/AbstractTabularDataValidation.java
@@ -25,8 +25,11 @@
import edu.pitt.dbmi.data.validation.ValidationResult;
import java.io.File;
import java.io.IOException;
+import java.nio.channels.ClosedByInterruptException;
import java.util.Collections;
import java.util.Set;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
*
@@ -36,6 +39,8 @@
*/
public abstract class AbstractTabularDataValidation extends AbstractTabularDataFileValidation implements TabularDataValidation {
+ private static final Logger LOGGER = LoggerFactory.getLogger(AbstractTabularDataValidation.class);
+
protected int markedMissing;
protected int assumedMissing;
protected int numOfRowsWithMissingValues;
@@ -55,11 +60,14 @@ public void validate(Set excludedVariables) {
numOfColsWithMissingValues = 0;
try {
validateDataFromFile(getColumnNumbers(excludedVariables));
+ } catch (ClosedByInterruptException exception) {
+ LOGGER.error("", exception);
} catch (IOException exception) {
String errMsg = String.format("Unable to read file %s.", dataFile.getName());
ValidationResult result = new ValidationResult(ValidationCode.ERROR, MessageType.FILE_IO_ERROR, errMsg);
result.setAttribute(ValidationAttribute.FILE_NAME, dataFile.getName());
validationResults.add(result);
+ LOGGER.error("Validation failed.", exception);
}
}
@@ -67,11 +75,14 @@ public void validate(Set excludedVariables) {
public void validate(int[] excludedColumns) {
try {
validateDataFromFile(filterValidColumnNumbers(excludedColumns));
+ } catch (ClosedByInterruptException exception) {
+ LOGGER.error("", exception);
} catch (IOException exception) {
String errMsg = String.format("Unable to read file %s.", dataFile.getName());
ValidationResult result = new ValidationResult(ValidationCode.ERROR, MessageType.FILE_IO_ERROR, errMsg);
result.setAttribute(ValidationAttribute.FILE_NAME, dataFile.getName());
validationResults.add(result);
+ LOGGER.error("Validation failed.", exception);
}
}
diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java
index 712e840..d134c41 100644
--- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java
+++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/ContinuousTabularDataFileValidation.java
@@ -99,7 +99,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -137,7 +137,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -341,7 +341,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline char at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java
index 43ba6cd..243f18b 100644
--- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java
+++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/MixedTabularDataFileValidation.java
@@ -53,6 +53,10 @@ protected void validateDataFromFile(int[] excludedColumns) throws IOException {
int numOfDiscrete = 0;
int numOfContinuous = 0;
for (MixedVarInfo var : varInfos) {
+ if (Thread.currentThread().isInterrupted()) {
+ break;
+ }
+
if (var.isContinuous()) {
numOfContinuous++;
} else {
@@ -111,6 +115,10 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th
int mixedVarInfoIndex = 0;
for (MixedVarInfo mixedVarInfo : mixedVarInfos) {
+ if (Thread.currentThread().isInterrupted()) {
+ break;
+ }
+
if (mixedVarInfo.isContinuous()) {
mixedVarInfo.clearValues();
continuousData[mixedVarInfoIndex++] = new double[numOfRows];
@@ -150,7 +158,7 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -188,7 +196,7 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -398,7 +406,7 @@ private int validateData(MixedVarInfo[] mixedVarInfos, int[] excludedColumns) th
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline char at the end of the file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -508,7 +516,7 @@ private MixedVarInfo[] analysMixedVariableValidation(MixedVarInfo[] mixedVarInfo
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -541,7 +549,7 @@ private MixedVarInfo[] analysMixedVariableValidation(MixedVarInfo[] mixedVarInfo
}
// read in data
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -650,7 +658,7 @@ private MixedVarInfo[] analysMixedVariableValidation(MixedVarInfo[] mixedVarInfo
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline at end of file
if (colNum > 0 || dataBuilder.length() > 0) {
@@ -709,7 +717,7 @@ private MixedVarInfo[] validateMixedVariables(int[] excludedColumns) throws IOEx
byte prevChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
- while (buffer.hasRemaining() && !taskFinished) {
+ while (buffer.hasRemaining() && !taskFinished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -792,7 +800,7 @@ private MixedVarInfo[] validateMixedVariables(int[] excludedColumns) throws IOEx
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize && !taskFinished);
+ } while (position < fileSize && !taskFinished && !Thread.currentThread().isInterrupted());
// data at the end of line
if (colNum > 0 || dataBuilder.length() > 0) {
diff --git a/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java b/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java
index 72d2e10..f5dded0 100644
--- a/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java
+++ b/src/main/java/edu/pitt/dbmi/data/validation/tabular/VerticalDiscreteTabularDataFileValidation.java
@@ -99,7 +99,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio
// skip header, if any
if (skipHeader) {
- while (buffer.hasRemaining() && skipHeader) {
+ while (buffer.hasRemaining() && skipHeader && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
@@ -136,7 +136,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio
}
}
- while (buffer.hasRemaining()) {
+ while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();
if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
@@ -318,7 +318,7 @@ private int validateData(int numOfVars, int[] excludedColumns) throws IOExceptio
if ((position + size) > fileSize) {
size = fileSize - position;
}
- } while (position < fileSize);
+ } while (position < fileSize && !Thread.currentThread().isInterrupted());
// case when no newline char at the end of the file
if (colNum > 0 || dataBuilder.length() > 0) {