Skip to content
This repository has been archived by the owner on Jun 18, 2019. It is now read-only.

Commit

Permalink
Merge pull request #24 from bd2kccd/v0.2.x
Browse files Browse the repository at this point in the history
V0.2.x
  • Loading branch information
kvb2univpitt authored Nov 9, 2017
2 parents 5666123 + d20181e commit 7cb3b55
Show file tree
Hide file tree
Showing 17 changed files with 392 additions and 235 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.pitt.dbmi</groupId>
<artifactId>data-reader</artifactId>
<version>0.2.2-SNAPSHOT</version>
<version>0.2.3-SNAPSHOT</version>
<packaging>jar</packaging>

<properties>
Expand Down
25 changes: 19 additions & 6 deletions src/main/java/edu/pitt/dbmi/data/preview/BasicDataPreviewer.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,13 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
*
Expand All @@ -35,6 +38,8 @@
*/
public class BasicDataPreviewer extends AbstractDataPreviewer implements DataPreviewer {

private static final Logger LOGGER = LoggerFactory.getLogger(BasicDataPreviewer.class);

public BasicDataPreviewer(File dataFile) {
super(dataFile);
}
Expand All @@ -49,6 +54,16 @@ public List<String> getPreviews(int fromLine, int toLine, int numOfCharacters) t
}

List<String> linePreviews = new LinkedList<>();
try {
getPreviews(fromLine, toLine, numOfCharacters, linePreviews);
} catch (ClosedByInterruptException exception) {
LOGGER.error("", exception);
}

return linePreviews;
}

protected void getPreviews(int fromLine, int toLine, int numOfCharacters, List<String> list) throws IOException {
try (FileChannel fc = new RandomAccessFile(dataFile, "r").getChannel()) {
long fileSize = fc.size();
long position = 0;
Expand All @@ -63,15 +78,15 @@ public List<String> getPreviews(int fromLine, int toLine, int numOfCharacters) t
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);

while (buffer.hasRemaining() && !isDone) {
while (buffer.hasRemaining() && !isDone && !Thread.currentThread().isInterrupted()) {
byte currentChar = buffer.get();
if (skipLine) {
if (currentChar == CARRIAGE_RETURN || currentChar == LINE_FEED) {
skipLine = false;

if (charCount > 0) {
charCount = 0;
linePreviews.add(lineBuilder.toString());
list.add(lineBuilder.toString());
lineBuilder.delete(0, lineBuilder.length());
}

Expand All @@ -88,7 +103,7 @@ public List<String> getPreviews(int fromLine, int toLine, int numOfCharacters) t
if (currentChar == CARRIAGE_RETURN || currentChar == LINE_FEED) {
if (charCount > 0) {
charCount = 0;
linePreviews.add(lineBuilder.toString());
list.add(lineBuilder.toString());
lineBuilder.delete(0, lineBuilder.length());
}

Expand All @@ -114,11 +129,9 @@ public List<String> getPreviews(int fromLine, int toLine, int numOfCharacters) t
if ((position + size) > fileSize) {
size = fileSize - position;
}
} while (position < fileSize);
} while (position < fileSize && !Thread.currentThread().isInterrupted());

}

return linePreviews;
}

}
136 changes: 81 additions & 55 deletions src/main/java/edu/pitt/dbmi/data/reader/AbstractDataFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.ClosedByInterruptException;
import java.nio.channels.FileChannel;

/**
Expand All @@ -36,7 +37,8 @@ public abstract class AbstractDataFileReader {
protected static final byte LINE_FEED = '\n';
protected static final byte CARRIAGE_RETURN = '\r';

protected static final byte SPACE_CHAR = ' ';
protected static final byte SPACE_CHAR = Delimiter.SPACE.getDelimiterChar();
protected static final String EMPTY_STRING = "";

protected byte quoteCharacter;
protected String missingValueMarker;
Expand All @@ -52,9 +54,9 @@ public AbstractDataFileReader(File dataFile, Delimiter delimiter) {
this.dataFile = dataFile;
this.delimiter = delimiter;

this.missingValueMarker = EMPTY_STRING;
this.commentMarker = EMPTY_STRING;
this.quoteCharacter = -1;
this.commentMarker = "";

this.numberOfLines = -1;
this.numberOfColumns = -1;
}
Expand All @@ -71,41 +73,52 @@ private int countNumberOfColumns() throws IOException {

byte[] prefix = commentMarker.getBytes();
int index = 0;
boolean reqCheck = prefix.length > 0;
boolean reqCmntCheck = prefix.length > 0;
boolean skipLine = false;
boolean finished = false;
boolean hasQuoteChar = false;
byte prevNonBlankChar = SPACE_CHAR;
boolean finished = false;
byte prevChar = -1;
byte prevNonBlankChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
while (buffer.hasRemaining() && !finished) {
while (buffer.hasRemaining() && !finished && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();

if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) {
prevChar = currChar;
continue;
}

finished = !skipLine;
if (finished) {
count++;
}

skipLine = false;
finished = prevNonBlankChar > SPACE_CHAR;
reqCmntCheck = prefix.length > 0;
index = 0;
prevNonBlankChar = -1;
} else if (!skipLine) {
// save any non-blank char encountered
if (currChar > SPACE_CHAR) {
prevNonBlankChar = currChar;
}

if (reqCheck && prevNonBlankChar > SPACE_CHAR) {
// skip any blank chars at the begining of the line
if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) {
continue;
}

if (reqCmntCheck) {
if (currChar == prefix[index]) {
index++;

// all the comment chars are matched
if (index == prefix.length) {
index = 0;
skipLine = true;
count = 0;
prevNonBlankChar = SPACE_CHAR;

prevChar = currChar;
continue;
}
} else {
reqCheck = false;
reqCmntCheck = false;
}
}

Expand All @@ -114,17 +127,13 @@ private int countNumberOfColumns() throws IOException {
} else if (!hasQuoteChar) {
switch (delimiter) {
case WHITESPACE:
if (currChar > SPACE_CHAR && prevChar <= SPACE_CHAR) {
if (!hasQuoteChar) {
count++;
}
if (currChar <= SPACE_CHAR && prevChar > SPACE_CHAR) {
count++;
}
break;
default:
if (currChar == delimChar) {
if (!hasQuoteChar) {
count++;
}
count++;
}
}
}
Expand All @@ -137,12 +146,10 @@ private int countNumberOfColumns() throws IOException {
if ((position + size) > fileSize) {
size = fileSize - position;
}
} while (position < fileSize && !finished);
} while ((position < fileSize) && !finished && !Thread.currentThread().isInterrupted());

if (delimiter != Delimiter.WHITESPACE) {
if (prevNonBlankChar > SPACE_CHAR) {
count++;
}
if (!finished) {
count++;
}
}

Expand All @@ -159,52 +166,63 @@ private int countNumberOfLines() throws IOException {

byte[] prefix = commentMarker.getBytes();
int index = 0;
boolean reqCheck = prefix.length > 0;
boolean reqCmntCheck = prefix.length > 0;
boolean skipLine = false;
boolean moveToEOL = false;
byte prevChar = -1;
byte prevNonBlankChar = -1;
do {
MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, position, size);
while (buffer.hasRemaining()) {
while (buffer.hasRemaining() && !Thread.currentThread().isInterrupted()) {
byte currChar = buffer.get();

if (currChar == CARRIAGE_RETURN || currChar == LINE_FEED) {
skipLine = false;
if (index > 0) {
index = 0;
if ((prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && prevChar != currChar) {
continue;
}

if (!skipLine) {
count++;
}
} else if (!skipLine) {
if (currChar <= SPACE_CHAR && index == 0) {
index = 0;
moveToEOL = false;
skipLine = false;
prevNonBlankChar = -1;
} else if (!moveToEOL) {
// save any non-blank char encountered
if (currChar > SPACE_CHAR) {
prevNonBlankChar = currChar;
}

// skip any blank chars at the begining of the line
if (currChar <= SPACE_CHAR && prevNonBlankChar <= SPACE_CHAR) {
continue;
}

if (reqCheck) {
if (reqCmntCheck) {
if (currChar == prefix[index]) {
index++;
if (index == prefix.length) {
index = 0;
moveToEOL = true;
skipLine = true;
}
} else {
index = 0;
skipLine = true;
count++;
moveToEOL = true;
}
} else {
skipLine = true;
count++;
moveToEOL = true;
}
}

prevChar = currChar;
}

position += size;
if ((position + size) > fileSize) {
size = fileSize - position;
}
} while (position < fileSize);
} while ((position < fileSize) && !Thread.currentThread().isInterrupted());

// case where no newline at end of file
if (index > 0) {
index = 0;
if (!(prevChar == CARRIAGE_RETURN || prevChar == LINE_FEED) && !skipLine) {
count++;
}
}
Expand All @@ -225,8 +243,8 @@ public String getMissingValueMarker() {
}

public void setMissingValueMarker(String missingValueMarker) {
this.missingValueMarker = (missingValueMarker) == null
? missingValueMarker
this.missingValueMarker = (missingValueMarker == null)
? EMPTY_STRING
: missingValueMarker.trim();
}

Expand All @@ -235,22 +253,30 @@ public String getCommentMarker() {
}

public void setCommentMarker(String commentMarker) {
if (commentMarker != null) {
this.commentMarker = commentMarker.trim();
}
this.commentMarker = (commentMarker == null)
? EMPTY_STRING
: commentMarker.trim();
}

public int getNumberOfLines() throws IOException {
if (numberOfLines == -1) {
numberOfLines = countNumberOfLines();
try {
numberOfLines = countNumberOfLines();
} catch (ClosedByInterruptException exception) {
numberOfLines = -1;
}
}

return numberOfLines;
}

public int getNumberOfColumns() throws IOException {
if (numberOfColumns == -1) {
numberOfColumns = countNumberOfColumns();
try {
numberOfColumns = countNumberOfColumns();
} catch (ClosedByInterruptException exception) {
numberOfColumns = -1;
}
}

return numberOfColumns;
Expand Down
Loading

0 comments on commit 7cb3b55

Please sign in to comment.