-
Notifications
You must be signed in to change notification settings - Fork 1
Home
agustafson edited this page Aug 21, 2011
·
3 revisions
I was looking for a java library (or scala) which could compare 2 datasets, whether the data is in excel or a delimited text file. Finding nothing, I decided to write a library myself. SQL ResultSet support as an InputDataset has also been added. The dataset difference is represented as a DiffResult, which contains the elementNumber, side (LEFT/RIGHT) and value.
- Create 2 InputDatasets from an excel file, delimited text file or SQL ResultSet
- Create ColumnComparators to compare columns from each InputDataset. Using a ConvertingColumnComparator (eg ExcelColumnConverter.Number) allows the ability to convert from the input column (eg: an excel cell) to a value for comparison (eg a BigDecimal)
- Create a new DatasetDiff, passing the ColumnComparators as a constructor param
- Execute the compareDatasets method on the DatasetDiff which will return a List of DiffResults
val textFileName = "simple1.txt"
val excelFileName = "simple1.xls"
val textFile = new TabDelimitedInputDataset(classLoader.getResourceAsStream(textFileName))
val excelFile = ExcelInputDataset(classLoader.getResourceAsStream(excelFileName), 0)
val stringTextConverter: (String) => String = TextColumnConverters.StringConverter()
val numberTextConverter: (String) => BigDecimal = TextColumnConverters.NumberConverter()
val stringExcelConverter: (HSSFCell) => String = ExcelColumnConverter.String
val numberExcelConverter: (HSSFCell) => BigDecimal = ExcelColumnConverter.Number
val stringColumnComparator = new ConvertingColumnComparator[String, String, HSSFCell](stringTextConverter, stringExcelConverter)
val numberColumnComparator = new ConvertingColumnComparator[BigDecimal, String, HSSFCell](numberTextConverter, numberExcelConverter)
val dateValueComparator = (left: Date, right: Date) => new DateMatcher("yyyyMMdd", left).matches(right)
val dateColumnComparator = new ConvertingColumnComparator[Date, String, HSSFCell](
TextColumnConverters.DateConverter("dd/MM/yyyy hh:mm"),
ExcelColumnConverter.Date,
dateValueComparator)
val columnComparators = Map(
0 -> stringColumnComparator,
1 -> numberColumnComparator,
2 -> dateColumnComparator
)
val datasetDiff = new DatasetDiff[String, HSSFCell](columnComparators)
val comparisons = datasetDiff.compareDatasets(textFile, excelFile)
There is a DatasetDiffBuilder for use with java. ClassLoader classLoader = this.getClass().getClassLoader();
String fileName1 = "simple1.txt";
String fileName2 = "simple2.txt";
TabDelimitedInputDataset textFile1 = new TabDelimitedInputDataset(classLoader.getResourceAsStream(fileName1));
TabDelimitedInputDataset textFile2 = new TabDelimitedInputDataset(classLoader.getResourceAsStream(fileName2));
Function1 stringConverter = TextColumnConverters.StringConverter();
Function1 numberConverter = TextColumnConverters.NumberConverter();
Function1 dateConverterDDMMYYYY = TextColumnConverters.DateConverter("dd/MM/yyyy hh:mm");
Function1 dateConverterYYYYMMDD = TextColumnConverters.DateConverter("yyyyMMdd");
ComparatorFunction<String> stringComparator = new ComparatorFunction<String>() {
@Override
public boolean compare(String left, String right) {
return left.equalsIgnoreCase(right);
}
};
ComparatorFunction<BigDecimal> bigDecimalComparator = new ComparatorFunction<BigDecimal>() {
@Override
public boolean compare(BigDecimal left, BigDecimal right) {
return left.underlying().abs().subtract(right.underlying()).abs()
.subtract(new java.math.BigDecimal("0.0000001"))
.compareTo(java.math.BigDecimal.ZERO) <= 0;
}
};
ComparatorFunction<Date> dateComparator = new ComparatorFunction<Date>() {
@Override
public boolean compare(Date left, Date right) {
SimpleDateFormat dateFormatYYYYMMDD = new SimpleDateFormat("yyyyMMdd");
return dateFormatYYYYMMDD.format(left).equals(dateFormatYYYYMMDD.format(right));
}
};
ConvertingColumnComparator<String, String, String> stringColumnComparator =
new ConvertingColumnComparator<String, String, String>(stringConverter, stringConverter, stringComparator);
ConvertingColumnComparator<BigDecimal, String, String> numberColumnComparator =
new ConvertingColumnComparator<BigDecimal, String, String>(numberConverter, numberConverter, bigDecimalComparator);
ConvertingColumnComparator<Date, String, String> dateColumnComparator =
new ConvertingColumnComparator<Date, String, String>(dateConverterDDMMYYYY, dateConverterYYYYMMDD, dateComparator);
List<DiffResult> comparisons = new DatasetDiffBuilder<String, String>()
.withColumnComparator(0, stringColumnComparator)
.withColumnComparator(1, numberColumnComparator)
.withColumnComparator(2, dateColumnComparator)
.compare(textFile1, textFile2);