clearableComponents = new ArrayList<>();
+
+ public void addReplacable(JComponent component) {
+
+ this.add(component);
+ clearableComponents.add(component);
+ }
+
+ public void clear() {
+ // remove the components in the reverse order of how they were added, keeps the layout of the JPanel intact
+ Collections.reverse(clearableComponents);
+ clearableComponents.forEach(this::remove);
+ clearableComponents.clear();
+ }
+
+}
diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/scan/SourceDataScan.java
similarity index 59%
rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java
rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/scan/SourceDataScan.java
index 38e64ba3..11e88715 100644
--- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java
+++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/scan/SourceDataScan.java
@@ -1,30 +1,26 @@
/*******************************************************************************
* Copyright 2019 Observational Health Data Sciences and Informatics
- *
+ *
* This file is part of WhiteRabbit
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
-package org.ohdsi.whiteRabbit.scan;
+package org.ohdsi.whiterabbit.scan;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
-import java.rmi.RemoteException;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.*;
import java.util.stream.Collectors;
@@ -38,34 +34,30 @@
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
-import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.commons.io.FileUtils;
-import org.ohdsi.databases.DbType;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.ohdsi.databases.configuration.DbSettings;
+import org.ohdsi.databases.configuration.DbType;
import org.ohdsi.databases.RichConnection;
-import org.ohdsi.databases.RichConnection.QueryResult;
+import org.ohdsi.databases.QueryResult;
+import org.ohdsi.databases.*;
import org.ohdsi.rabbitInAHat.dataModel.Table;
import org.ohdsi.utilities.*;
-import org.ohdsi.utilities.collections.CountingSet;
-import org.ohdsi.utilities.collections.CountingSet.Count;
import org.ohdsi.utilities.collections.Pair;
import org.ohdsi.utilities.files.ReadTextFile;
-import org.ohdsi.whiteRabbit.DbSettings;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import static java.lang.Long.max;
-public class SourceDataScan {
-
- public static int MAX_VALUES_IN_MEMORY = 100000;
- public static int MIN_CELL_COUNT_FOR_CSV = 1000000;
- public static int N_FOR_FREE_TEXT_CHECK = 1000;
- public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100;
-
+public class SourceDataScan implements ScanParameters {
+ static Logger logger = LoggerFactory.getLogger(SourceDataScan.class);
public final static String SCAN_REPORT_FILE_NAME = "ScanReport.xlsx";
public static final String POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "ORG_OHDSI_WHITERABBIT_POI_TMPDIR";
public static final String POI_TMP_DIR_PROPERTY_NAME = "org.ohdsi.whiterabbit.poi.tmpdir";
- private SXSSFWorkbook workbook;
+ private XSSFWorkbook workbook;
private char delimiter = ',';
private int sampleSize;
private boolean scanValues = false;
@@ -75,7 +67,6 @@ public class SourceDataScan {
private int maxValues;
private DbSettings.SourceType sourceType;
private DbType dbType;
- private String database;
private Map
> tableToFieldInfos;
private Map indexedTableNameLookup;
@@ -96,6 +87,22 @@ public void setSampleSize(int sampleSize) {
this.sampleSize = sampleSize;
}
+ public boolean doCalculateNumericStats() {
+ return calculateNumericStats;
+ }
+
+ public int getMaxValues() {
+ return maxValues;
+ }
+
+ public boolean doScanValues() {
+ return scanValues;
+ }
+
+ public int getNumStatsSamplerSize() {
+ return numStatsSamplerSize;
+ }
+
public void setScanValues(boolean scanValues) {
this.scanValues = scanValues;
}
@@ -104,6 +111,14 @@ public void setMinCellCount(int minCellCount) {
this.minCellCount = minCellCount;
}
+ public int getMinCellCount() {
+ return minCellCount;
+ }
+
+ public int getSampleSize() {
+ return sampleSize;
+ }
+
public void setMaxValues(int maxValues) {
this.maxValues = maxValues;
}
@@ -116,11 +131,10 @@ public void setNumStatsSamplerSize(int numStatsSamplerSize) {
this.numStatsSamplerSize = numStatsSamplerSize;
}
- public void process(DbSettings dbSettings, String outputFileName) {
+ public void process(DbSettings dbSettings, String outputFileName) throws IOException {
startTimeStamp = LocalDateTime.now();
sourceType = dbSettings.sourceType;
dbType = dbSettings.dbType;
- database = dbSettings.database;
tableToFieldInfos = new HashMap<>();
StringUtilities.outputWithTime("Started new scan of " + dbSettings.tables.size() + " tables...");
@@ -181,7 +195,7 @@ private static Path setupTmpDir(Path tmpDir) {
private static void checkWritableTmpDir(String dir) {
if (isNotWritable(Paths.get(dir))) {
String message = String.format("Directory %s is not writable! (used for tmp files for Apache POI)", dir);
- System.out.println(message);
+ logger.warn(message);
throw new RuntimeException(message);
}
}
@@ -214,15 +228,14 @@ private void processDatabase(DbSettings dbSettings) {
if (dbSettings.dbType == DbType.BIGQUERY) {
dbSettings.domain = dbSettings.database;
}
-
- try (RichConnection connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
+ try (RichConnection connection = new RichConnection(dbSettings)) {
connection.setVerbose(false);
connection.use(dbSettings.database);
tableToFieldInfos = dbSettings.tables.stream()
.collect(Collectors.toMap(
Table::new,
- table -> processDatabaseTable(table, connection)
+ table -> processDatabaseTable(table, connection, dbSettings.database)
));
}
}
@@ -257,11 +270,11 @@ private void processSasFiles(DbSettings dbSettings) {
}
}
- private void generateReport(String filename) {
+ private void generateReport(String filename) throws IOException {
StringUtilities.outputWithTime("Generating scan report");
removeEmptyTables();
- workbook = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk
+ workbook = new XSSFWorkbook();
int i = 0;
indexedTableNameLookup = new HashMap<>();
@@ -465,8 +478,7 @@ private void createMetaSheet() {
addRow(metaSheet, "N_FOR_FREE_TEXT_CHECK", SourceDataScan.N_FOR_FREE_TEXT_CHECK);
addRow(metaSheet, "MIN_AVERAGE_LENGTH_FOR_FREE_TEXT", SourceDataScan.MIN_AVERAGE_LENGTH_FOR_FREE_TEXT);
addRow(metaSheet, "sourceType", this.sourceType.toString());
- addRow(metaSheet, "dbType", this.dbType != null ? this.dbType.getTypeName() : "");
-// addRow(metaSheet, "database", this.database);
+ addRow(metaSheet, "dbType", this.dbType != null ? this.dbType.name() : "");
addRow(metaSheet, "delimiter", this.delimiter);
addRow(metaSheet, "sampleSize", this.sampleSize);
addRow(metaSheet, "scanValues", this.scanValues);
@@ -479,33 +491,38 @@ private void createMetaSheet() {
private void removeEmptyTables() {
tableToFieldInfos.entrySet()
- .removeIf(stringListEntry -> stringListEntry.getValue().size() == 0);
+ .removeIf(stringListEntry -> stringListEntry.getValue().isEmpty());
}
- private List processDatabaseTable(String table, RichConnection connection) {
+ private List processDatabaseTable(String table, RichConnection connection, String database) {
StringUtilities.outputWithTime("Scanning table " + table);
- long rowCount = connection.getTableSize(table);
- List fieldInfos = fetchTableStructure(connection, table);
+ long rowCount;
+ if (connection.getConnection().hasStorageHandler()) {
+ rowCount = connection.getConnection().getStorageHandler().getTableSize(table);
+ } else {
+ rowCount = connection.getTableSize(table);
+ }
+ List fieldInfos = connection.fetchTableStructure(connection, database, table, this);
if (scanValues) {
int actualCount = 0;
QueryResult queryResult = null;
try {
- queryResult = fetchRowsFromTable(connection, table, rowCount);
+ queryResult = connection.fetchRowsFromTable(table, rowCount, this);
for (org.ohdsi.utilities.files.Row row : queryResult) {
for (FieldInfo fieldInfo : fieldInfos) {
fieldInfo.processValue(row.get(fieldInfo.name));
}
actualCount++;
if (sampleSize != -1 && actualCount >= sampleSize) {
- System.out.println("Stopped after " + actualCount + " rows");
+ logger.info("Stopped after {} rows", actualCount);
break;
}
}
for (FieldInfo fieldInfo : fieldInfos)
fieldInfo.trim();
} catch (Exception e) {
- System.out.println("Error: " + e.getMessage());
+ logger.error(e.getMessage(), e);
} finally {
if (queryResult != null) {
queryResult.close();
@@ -516,105 +533,6 @@ private List processDatabaseTable(String table, RichConnection connec
return fieldInfos;
}
- private QueryResult fetchRowsFromTable(RichConnection connection, String table, long rowCount) {
- String query = null;
-
- if (sampleSize == -1) {
- if (dbType == DbType.MSACCESS)
- query = "SELECT * FROM [" + table + "]";
- else if (dbType == DbType.MSSQL || dbType == DbType.PDW || dbType == DbType.AZURE)
- query = "SELECT * FROM [" + table.replaceAll("\\.", "].[") + "]";
- else
- query = "SELECT * FROM " + table;
- } else {
- if (dbType == DbType.MSSQL || dbType == DbType.AZURE)
- query = "SELECT * FROM [" + table.replaceAll("\\.", "].[") + "] TABLESAMPLE (" + sampleSize + " ROWS)";
- else if (dbType == DbType.MYSQL)
- query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize;
- else if (dbType == DbType.PDW)
- query = "SELECT TOP " + sampleSize + " * FROM [" + table.replaceAll("\\.", "].[") + "] ORDER BY RAND()";
- else if (dbType == DbType.ORACLE) {
- if (sampleSize < rowCount) {
- double percentage = 100 * sampleSize / (double) rowCount;
- if (percentage < 100)
- query = "SELECT * FROM " + table + " SAMPLE(" + percentage + ")";
- } else {
- query = "SELECT * FROM " + table;
- }
- } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT)
- query = "SELECT * FROM " + table + " ORDER BY RANDOM() LIMIT " + sampleSize;
- else if (dbType == DbType.MSACCESS)
- query = "SELECT " + "TOP " + sampleSize + " * FROM [" + table + "]";
- else if (dbType == DbType.BIGQUERY)
- query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize;
- }
- // System.out.println("SQL: " + query);
- return connection.query(query);
-
- }
-
- private List fetchTableStructure(RichConnection connection, String table) {
- List fieldInfos = new ArrayList<>();
-
- if (dbType == DbType.MSACCESS) {
- ResultSet rs = connection.getMsAccessFieldNames(table);
- try {
- while (rs.next()) {
- FieldInfo fieldInfo = new FieldInfo(rs.getString("COLUMN_NAME"));
- fieldInfo.type = rs.getString("TYPE_NAME");
- fieldInfo.rowCount = connection.getTableSize(table);
- fieldInfos.add(fieldInfo);
- }
- } catch (SQLException e) {
- throw new RuntimeException(e.getMessage());
- }
- } else {
- String query = null;
- if (dbType == DbType.ORACLE)
- query = "SELECT COLUMN_NAME,DATA_TYPE FROM ALL_TAB_COLUMNS WHERE table_name = '" + table + "' AND owner = '" + database.toUpperCase() + "'";
- else if (dbType == DbType.MSSQL || dbType == DbType.PDW) {
- String trimmedDatabase = database;
- if (database.startsWith("[") && database.endsWith("]"))
- trimmedDatabase = database.substring(1, database.length() - 1);
- String[] parts = table.split("\\.");
- query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_CATALOG='" + trimmedDatabase + "' AND TABLE_SCHEMA='" + parts[0] +
- "' AND TABLE_NAME='" + parts[1] + "';";
- } else if (dbType == DbType.AZURE) {
- String[] parts = table.split("\\.");
- query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='" + parts[0] +
- "' AND TABLE_NAME='" + parts[1] + "';";
- } else if (dbType == DbType.MYSQL)
- query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database + "' AND TABLE_NAME = '" + table
- + "';";
- else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT)
- query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database.toLowerCase() + "' AND TABLE_NAME = '"
- + table.toLowerCase() + "' ORDER BY ordinal_position;";
- else if (dbType == DbType.TERADATA) {
- query = "SELECT ColumnName, ColumnType FROM dbc.columns WHERE DatabaseName= '" + database.toLowerCase() + "' AND TableName = '"
- + table.toLowerCase() + "';";
- } else if (dbType == DbType.BIGQUERY) {
- query = "SELECT column_name AS COLUMN_NAME, data_type as DATA_TYPE FROM " + database + ".INFORMATION_SCHEMA.COLUMNS WHERE table_name = \"" + table + "\";";
- }
-
- for (org.ohdsi.utilities.files.Row row : connection.query(query)) {
- row.upperCaseFieldNames();
- FieldInfo fieldInfo;
- if (dbType == DbType.TERADATA) {
- fieldInfo = new FieldInfo(row.get("COLUMNNAME"));
- } else {
- fieldInfo = new FieldInfo(row.get("COLUMN_NAME"));
- }
- if (dbType == DbType.TERADATA) {
- fieldInfo.type = row.get("COLUMNTYPE");
- } else {
- fieldInfo.type = row.get("DATA_TYPE");
- }
- fieldInfo.rowCount = connection.getTableSize(table);
- fieldInfos.add(fieldInfo);
- }
- }
- return fieldInfos;
- }
private List processCsvFile(String filename) {
StringUtilities.outputWithTime("Scanning table " + filename);
@@ -633,7 +551,7 @@ private List processCsvFile(String filename) {
if (lineNr == 1) {
for (String cell : row) {
- fieldInfos.add(new FieldInfo(cell));
+ fieldInfos.add(new FieldInfo(this, cell));
}
if (!scanValues) {
@@ -660,7 +578,7 @@ private List processSasFile(SasFileReader sasFileReader) throws IOExc
SasFileProperties sasFileProperties = sasFileReader.getSasFileProperties();
for (Column column : sasFileReader.getColumns()) {
- FieldInfo fieldInfo = new FieldInfo(column.getName());
+ FieldInfo fieldInfo = new FieldInfo(this, column.getName());
fieldInfo.label = column.getLabel();
fieldInfo.rowCount = sasFileProperties.getRowCount();
if (!scanValues) {
@@ -698,229 +616,6 @@ private List processSasFile(SasFileReader sasFileReader) throws IOExc
return fieldInfos;
}
- private class FieldInfo {
- public String type;
- public String name;
- public String label;
- public CountingSet valueCounts = new CountingSet<>();
- public long sumLength = 0;
- public int maxLength = 0;
- public long nProcessed = 0;
- public long emptyCount = 0;
- public long uniqueCount = 0;
- public long rowCount = -1;
- public boolean isInteger = true;
- public boolean isReal = true;
- public boolean isDate = true;
- public boolean isFreeText = false;
- public boolean tooManyValues = false;
- public UniformSamplingReservoir samplingReservoir;
- public Object average;
- public Object stdev;
- public Object minimum;
- public Object maximum;
- public Object q1;
- public Object q2;
- public Object q3;
-
- public FieldInfo(String name) {
- this.name = name;
- if (calculateNumericStats) {
- this.samplingReservoir = new UniformSamplingReservoir(numStatsSamplerSize);
- }
- }
-
- public void trim() {
- // Only keep values that are used in scan report
- if (valueCounts.size() > maxValues) {
- valueCounts.keepTopN(maxValues);
- }
-
- // Calculate numeric stats and dereference sampling reservoir to save memory.
- if (calculateNumericStats) {
- average = getAverage();
- stdev = getStandardDeviation();
- minimum = getMinimum();
- maximum = getMaximum();
- q1 = getQ1();
- q2 = getQ2();
- q3 = getQ3();
- }
- samplingReservoir = null;
- }
-
- public boolean hasValuesTrimmed() {
- return tooManyValues;
- }
-
- public Double getFractionEmpty() {
- if (nProcessed == 0)
- return 1d;
- else
- return emptyCount / (double) nProcessed;
- }
-
- public String getTypeDescription() {
- if (type != null)
- return type;
- else if (!scanValues) // If not type assigned and not values scanned, do not derive
- return "";
- else if (nProcessed == emptyCount)
- return DataType.EMPTY.name();
- else if (isFreeText)
- return DataType.TEXT.name();
- else if (isDate)
- return DataType.DATE.name();
- else if (isInteger)
- return DataType.INT.name();
- else if (isReal)
- return DataType.REAL.name();
- else
- return DataType.VARCHAR.name();
- }
-
- public Double getFractionUnique() {
- if (nProcessed == 0 || uniqueCount == 1) {
- return 0d;
- } else {
- return uniqueCount / (double) nProcessed;
- }
-
- }
-
- public void processValue(String value) {
- nProcessed++;
- sumLength += value.length();
- if (value.length() > maxLength)
- maxLength = value.length();
-
- String trimValue = value.trim();
- if (trimValue.length() == 0)
- emptyCount++;
-
- if (!isFreeText) {
- boolean newlyAdded = valueCounts.add(value);
- if (newlyAdded) uniqueCount++;
-
- if (trimValue.length() != 0) {
- evaluateDataType(trimValue);
- }
-
- if (nProcessed == N_FOR_FREE_TEXT_CHECK && !isInteger && !isReal && !isDate) {
- doFreeTextCheck();
- }
- } else {
- valueCounts.addAll(StringUtilities.mapToWords(trimValue.toLowerCase()));
- }
-
- // if over this large constant number, then trimmed back to size used in report (maxValues).
- if (!tooManyValues && valueCounts.size() > MAX_VALUES_IN_MEMORY) {
- tooManyValues = true;
- this.trim();
- }
-
- if (calculateNumericStats && !trimValue.isEmpty()) {
- if (isInteger || isReal) {
- samplingReservoir.add(Double.parseDouble(trimValue));
- } else if (isDate) {
- samplingReservoir.add(DateUtilities.parseDate(trimValue));
- }
- }
- }
-
- public List> getSortedValuesWithoutSmallValues() {
- List> result = valueCounts.key2count.entrySet().stream()
- .filter(e -> e.getValue().count >= minCellCount)
- .sorted(Comparator.>comparingInt(e -> e.getValue().count).reversed())
- .limit(maxValues)
- .map(e -> new Pair<>(e.getKey(), e.getValue().count))
- .collect(Collectors.toCollection(ArrayList::new));
-
- if (result.size() < valueCounts.key2count.size()) {
- result.add(new Pair<>("List truncated...", -1));
- }
- return result;
- }
-
- private void evaluateDataType(String value) {
- if (isReal && !StringUtilities.isNumber(value))
- isReal = false;
- if (isInteger && !StringUtilities.isLong(value))
- isInteger = false;
- if (isDate && !StringUtilities.isDate(value))
- isDate = false;
- }
-
- private void doFreeTextCheck() {
- double averageLength = sumLength / (double) (nProcessed - emptyCount);
- if (averageLength >= MIN_AVERAGE_LENGTH_FOR_FREE_TEXT) {
- isFreeText = true;
- // Reset value count to word count
- CountingSet wordCounts = new CountingSet<>();
- for (Map.Entry entry : valueCounts.key2count.entrySet())
- for (String word : StringUtilities.mapToWords(entry.getKey().toLowerCase()))
- wordCounts.add(word, entry.getValue().count);
- valueCounts = wordCounts;
- }
- }
-
- private Object formatNumericValue(double value) {
- return formatNumericValue(value, false);
- }
-
- private Object formatNumericValue(double value, boolean dateAsDays) {
- if (nProcessed == 0) {
- return Double.NaN;
- } else if (getTypeDescription().equals(DataType.EMPTY.name())) {
- return Double.NaN;
- } else if (isInteger || isReal) {
- return value;
- } else if (isDate && dateAsDays) {
- return value;
- } else if (isDate) {
- return LocalDate.ofEpochDay((long) value).toString();
- } else {
- return Double.NaN;
- }
- }
-
- private Object getMinimum() {
- double min = samplingReservoir.getPopulationMinimum();
- return formatNumericValue(min);
- }
-
- private Object getMaximum() {
- double max = samplingReservoir.getPopulationMaximum();
- return formatNumericValue(max);
- }
-
- private Object getAverage() {
- double average = samplingReservoir.getPopulationMean();
- return formatNumericValue(average);
- }
-
- private Object getStandardDeviation() {
- double stddev = samplingReservoir.getSampleStandardDeviation();
- return formatNumericValue(stddev, true);
- }
-
- private Object getQ1() {
- double q1 = samplingReservoir.getSampleQuartiles().get(0);
- return formatNumericValue(q1);
- }
-
- private Object getQ2() {
- double q2 = samplingReservoir.getSampleQuartiles().get(1);
- return formatNumericValue(q2);
- }
-
- private Object getQ3() {
- double q3 = samplingReservoir.getSampleQuartiles().get(2);
- return formatNumericValue(q3);
- }
-
- }
-
private Row addRow(Sheet sheet, Object... values) {
Row row = sheet.createRow(sheet.getPhysicalNumberOfRows());
for (Object value : values) {
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit.ico b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit.ico
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit.ico
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit.ico
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit128.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit128.png
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit128.png
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit128.png
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit16.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit16.png
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit16.png
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit16.png
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit256.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit256.png
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit256.png
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit256.png
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit32.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit32.png
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit32.png
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit32.png
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit48.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit48.png
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit48.png
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit48.png
diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit64.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit64.png
similarity index 100%
rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit64.png
rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit64.png
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/GUITestExtension.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/GUITestExtension.java
new file mode 100644
index 00000000..12553f70
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/GUITestExtension.java
@@ -0,0 +1,66 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import org.assertj.swing.junit.runner.FailureScreenshotTaker;
+import org.assertj.swing.junit.runner.ImageFolderCreator;
+import org.junit.jupiter.api.extension.Extension;
+import org.junit.jupiter.api.extension.ExtensionContext;
+import org.junit.jupiter.api.extension.InvocationInterceptor;
+import org.junit.jupiter.api.extension.ReflectiveInvocationContext;
+
+import java.lang.reflect.Method;
+
+import static org.assertj.swing.annotation.GUITestFinder.isGUITest;
+import static org.assertj.swing.junit.runner.Formatter.testNameFrom;
+
+/**
+ * Understands a JUnit 5 extension that takes a screenshot of a failed GUI test.
+ * The Junit 4 runner is available in {@link org.assertj.swing.junit.runner.GUITestRunner}.
+ *
+ * @see assertj-swing #259
+ * @author William Bakker
+ */
+public class GUITestExtension implements Extension, InvocationInterceptor {
+ //private final FailureScreenshotTaker screenshotTaker;
+
+ public GUITestExtension() {
+ //screenshotTaker = new FailureScreenshotTaker(new ImageFolderCreator().createImageFolder());
+ }
+
+ @Override
+ public void interceptTestMethod(
+ Invocation invocation,
+ ReflectiveInvocationContext invocationContext,
+ ExtensionContext extensionContext)
+ throws Throwable {
+ try {
+ invocation.proceed();
+ } catch (Throwable t) {
+ //takeScreenshot(invocationContext.getExecutable());
+ throw t;
+ }
+ }
+
+ private void takeScreenshot(Method method) {
+ final Class> testClass = method.getDeclaringClass();
+ if (!(isGUITest(testClass, method)))
+ return;
+ //screenshotTaker.saveScreenshot(testNameFrom(testClass, method));
+ }
+}
\ No newline at end of file
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java
index 26776c48..19bf8b3b 100644
--- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java
@@ -1,127 +1,177 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
package org.ohdsi.whiterabbit.scan;
-import org.ohdsi.databases.DbType;
-import org.ohdsi.databases.RichConnection;
-import org.ohdsi.ooxml.ReadXlsxFileWithHeader;
-import org.ohdsi.utilities.files.Row;
-import org.ohdsi.utilities.files.RowUtilities;
-import org.ohdsi.whiteRabbit.DbSettings;
-import org.testcontainers.containers.PostgreSQLContainer;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.assertj.swing.timing.Condition;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.whiterabbit.Console;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
+import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
+import java.util.stream.IntStream;
+import static org.assertj.swing.timing.Pause.pause;
+import static org.assertj.swing.timing.Timeout.timeout;
import static org.junit.jupiter.api.Assertions.*;
+import static org.ohdsi.databases.configuration.DbType.*;
public class ScanTestUtils {
- public static void verifyScanResultsFromXSLX(Path results, DbType dbType) {
- assertTrue(Files.exists(results));
- FileInputStream file = null;
- try {
- file = new FileInputStream(new File(results.toUri()));
- } catch (FileNotFoundException e) {
- throw new RuntimeException(String.format("File %s was expected to be found, but does not exist.", results), e);
- }
-
- ReadXlsxFileWithHeader sheet = new ReadXlsxFileWithHeader(file);
+ // Convenience for having the same scan parameters across tests
+ public static SourceDataScan createSourceDataScan() {
+ SourceDataScan sourceDataScan = new SourceDataScan();
+ sourceDataScan.setMinCellCount(5);
+ sourceDataScan.setScanValues(true);
+ sourceDataScan.setMaxValues(1000);
+ sourceDataScan.setNumStatsSamplerSize(500);
+ sourceDataScan.setCalculateNumericStats(false);
+ sourceDataScan.setSampleSize(100000);
+
+ return sourceDataScan;
+ }
- List data = new ArrayList<>();
- int i = 0;
- for (Row row : sheet) {
- data.add(row);
- i++;
- }
+ public static boolean scanResultsSheetMatchesReference(Path scanResults, Path referenceResults, DbType dbType) throws IOException {
+ Map>> scanSheets = readXlsxAsStringValues(scanResults);
+ Map>> referenceSheets = readXlsxAsStringValues(referenceResults);
- // apparently the order of rows in the generated xslx table is not fixed,
- // so they need to be sorted to be able to verify their contents
- RowUtilities.sort(data, "Table", "Field");
- assertEquals(42, i);
-
- // since the table is generated with empty lines between the different tables of the source database,
- // a number of empty lines is expected. Verify this, and the first non-empty line
- expectRowNIsLike(0, data, dbType, "", "", "", "", "", "");
- expectRowNIsLike(1, data, dbType, "", "", "", "", "", "");
- expectRowNIsLike(2, data, dbType, "cost", "amount_allowed", "", "numeric", "0", "34");
-
- // sample some other rows in the available range
- expectRowNIsLike(9, data,dbType, "cost", "drg_source_value", "", "character varying", "0", "34");
- expectRowNIsLike(23, data,dbType, "cost", "total_paid", "", "numeric", "0", "34");
- expectRowNIsLike(24, data,dbType, "person", "birth_datetime", "", "timestamp without time zone", "0", "30");
- expectRowNIsLike(41, data,dbType, "person", "year_of_birth", "", "integer", "0", "30");
+ return scanValuesMatchReferenceValues(scanSheets, referenceSheets, dbType);
}
- private static void expectRowNIsLike(int n, List rows, DbType dbType, String... expectedValues) {
- assert expectedValues.length == 6;
- testColumnValue(n, rows.get(n), "Table", expectedValues[0]);
- testColumnValue(n, rows.get(n), "Field", expectedValues[1]);
- testColumnValue(n, rows.get(n), "Description", expectedValues[2]);
- testColumnValue(n, rows.get(n), "Type", expectedTypeValue(expectedValues[3], dbType));
- testColumnValue(n, rows.get(n), "Max length", expectedValues[4]);
- testColumnValue(n, rows.get(n), "N rows", expectedValues[5]);
- }
+ public static boolean isScanReportGeneratedAndMatchesReference(Console console, Path expectedPath, Path referencePath, DbType dbType) throws IOException {
+ assertNotNull(console);
+ // wait for the "Scan report generated:" message in the Console text area
+ pause(new Condition("Label Timeout") {
+ public boolean test() {
+ return console.getText().contains("Scan report generated:");
+ }
- private static void testColumnValue(int i, Row row, String fieldName, String expected) {
- if (!expected.equalsIgnoreCase(row.get(fieldName))) {
- fail(String.format("In row %d, value '%s' was expected for column '%s', but '%s' was found",
- i, expected, fieldName, row.get(fieldName)));
- }
+ }, timeout(10000));
+ assertTrue(console.getText().contains(expectedPath.toString()));
+
+ return scanResultsSheetMatchesReference(expectedPath, referencePath, dbType);
}
- private static String expectedTypeValue(String columnName, DbType dbType) {
- /*
- * This is very pragmatical and may need to change when tests are added for more databases.
- * For now, PostgreSQL is used as the reference, and the expected types need to be adapted to match
- * for other database.
- */
- if (dbType == DbType.POSTGRESQL || columnName.equals("")) {
- return columnName;
- }
- else if (dbType == DbType.ORACLE){
- switch (columnName) {
- case "integer":
- return "NUMBER";
- case "numeric":
- return "FLOAT";
- case "character varying":
- return "VARCHAR2";
- case "timestamp without time zone":
- // seems a mismatch in the OMOP CMD v5.2 (Oracle defaults to WITH time zone)
- return "TIMESTAMP(6) WITH TIME ZONE";
- default:
- throw new RuntimeException("Unsupported column type: " + columnName);
+ public static boolean scanValuesMatchReferenceValues(Map>> scanSheets, Map>> referenceSheets, DbType dbType) {
+ assertEquals(scanSheets.size(), referenceSheets.size(), "Number of sheets does not match.");
+ for (String tabName: new String[]{"Field Overview", "Table Overview", "cost.csv", "person.csv"}) {
+ if (scanSheets.containsKey(tabName)) {
+ List> scanSheet = scanSheets.get(tabName);
+ List> referenceSheet = referenceSheets.get(tabName);
+ assertEquals(scanSheet.size(), referenceSheet.size(), String.format("Number of rows in sheet %s does not match.", tabName));
+ // in WhiteRabbit v0.10.7 and older, the order or tables is not defined, so this can result in differences due to the rows
+ // being in a different order. By sorting the rows in both sheets, these kind of differences should not play a role.
+ scanSheet.sort(new RowsComparator());
+ referenceSheet.sort(new RowsComparator());
+ for (int i = 0; i < scanSheet.size(); ++i) {
+ final int fi = i;
+ IntStream.range(0, scanSheet.get(fi).size())
+ .parallel()
+ .forEach(j -> {
+ final String scanValue = scanSheet.get(fi).get(j);
+ final String referenceValue = referenceSheet.get(fi).get(j);
+ if (tabName.equals("Field Overview") && j == 3 && !scanValue.equalsIgnoreCase(referenceValue)) {
+ assertTrue(matchTypeName(scanValue, referenceValue, dbType),
+ String.format("Field type '%s' cannot be matched with reference type '%s' for DbType %s",
+ scanValue, referenceValue, dbType.name()));
+ } else {
+ assertTrue(scanValue.equalsIgnoreCase(referenceValue),
+ String.format("In sheet %s, value '%s' in scan results does not match '%s' in reference",
+ tabName, scanValue, referenceValue));
+ }
+ });
+ }
}
}
- else {
- throw new RuntimeException("Unsupported DBType: " + dbType);
- }
+
+ return true;
}
- static DbSettings getTestPostgreSQLSettings(PostgreSQLContainer> container) {
- DbSettings dbSettings = new DbSettings();
- dbSettings.dbType = DbType.POSTGRESQL;
- dbSettings.sourceType = DbSettings.SourceType.DATABASE;
- dbSettings.server = container.getJdbcUrl();
- dbSettings.database = "public"; // yes, really
- dbSettings.user = container.getUsername();
- dbSettings.password = container.getPassword();
- dbSettings.tables = getTableNamesPostgreSQL(dbSettings);
-
- return dbSettings;
+ private static boolean matchTypeName(String type, String reference, DbType dbType) {
+ if (dbType == ORACLE) {
+ switch (type) {
+ case "NUMBER": return reference.equals("integer");
+ case "VARCHAR2": return reference.equals("character varying");
+ case "FLOAT": return reference.equals("numeric");
+ // seems a mismatch in the OMOP CMD v5.2 (Oracle defaults to WITH time zone):
+ case "TIMESTAMP(6) WITH TIME ZONE": return reference.equals("timestamp without time zone");
+ default: throw new RuntimeException(String.format("Unsupported column type '%s' for DbType %s ", type, dbType.name()));
+ }
+ } else if (dbType == DbType.SNOWFLAKE) {
+ switch (type) {
+ case "NUMBER": return reference.equals("integer") || reference.equals("numeric");
+ case "VARCHAR": return reference.equals("character varying");
+ case "TIMESTAMPNTZ": return reference.equals("timestamp without time zone");
+ default: throw new RuntimeException(String.format("Unsupported column type '%s' for DbType %s ", type, dbType.name()));
+ }
+ } else {
+ throw new RuntimeException("Unsupported DbType: " + dbType.name());
+ }
}
- static List getTableNamesPostgreSQL(DbSettings dbSettings) {
- try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
- return richConnection.getTableNames("public");
+ static class RowsComparator implements Comparator> {
+ @Override
+ public int compare(List o1, List o2) {
+ String firstString_o1 = o1.get(0);
+ String firstString_o2 = o2.get(0);
+ return firstString_o1.compareToIgnoreCase(firstString_o2);
}
}
+ private static Map>> readXlsxAsStringValues(Path xlsx) throws IOException {
+ assertTrue(Files.exists(xlsx), String.format("File %s does not exist.", xlsx));
+
+ Map>> sheets = new HashMap<>();
+ FileInputStream file = null;
+ try {
+ file = new FileInputStream(new File(xlsx.toUri()));
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException(String.format("File %s was expected to be found, but does not exist.", xlsx), e);
+ }
+ XSSFWorkbook xssfWorkbook = new XSSFWorkbook(file);
+
+ for (int i = 0; i < xssfWorkbook.getNumberOfSheets(); ++i) {
+ XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(i);
+
+ List> sheet = new ArrayList<>();
+ for (org.apache.poi.ss.usermodel.Row row : xssfSheet) {
+ List values = new ArrayList<>();
+ for (Cell cell: row) {
+ switch (cell.getCellType()) {
+ case NUMERIC: values.add(String.valueOf(cell.getNumericCellValue())); break;
+ case STRING: values.add(cell.getStringCellValue()); break;
+ default: throw new RuntimeException("Unsupported cell type: " + cell.getCellType().name());
+ };
+ }
+ sheet.add(values);
+ }
+ sheets.put(xssfSheet.getSheetName(), sheet);
+ }
+ return sheets;
+ }
}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanOracle.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanOracleIT.java
similarity index 72%
rename from whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanOracle.java
rename to whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanOracleIT.java
index 3f31dfe5..d05fd37c 100644
--- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanOracle.java
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanOracleIT.java
@@ -1,25 +1,42 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
package org.ohdsi.whiterabbit.scan;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
-import org.ohdsi.databases.DbType;
+import org.ohdsi.databases.configuration.DbSettings;
+import org.ohdsi.databases.configuration.DbType;
import org.ohdsi.databases.RichConnection;
-import org.ohdsi.whiteRabbit.DbSettings;
-import org.ohdsi.whiteRabbit.scan.SourceDataScan;
import org.testcontainers.containers.OracleContainer;
import org.testcontainers.junit.jupiter.Container;
-import org.testcontainers.junit.jupiter.Testcontainers;
import java.io.*;
import java.net.URISyntaxException;
+import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.nio.file.Paths;
import java.util.*;
import static org.junit.jupiter.api.Assertions.*;
-@Testcontainers(disabledWithoutDocker = true)
-class TestSourceDataScanOracle {
+class SourceDataScanOracleIT {
private final static String USER_NAME = "test_user";
private final static String SCHEMA_NAME = USER_NAME;
@@ -44,11 +61,16 @@ class TestSourceDataScanOracle {
.withDatabaseName("testDB")
.withInitScript("scan_data/create_data_oracle.sql");
+ @BeforeAll
+ public static void startContainer() {
+ oracleContainer.start();
+ }
+
@Test
public void connectToDatabase() {
// this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker
DbSettings dbSettings = getTestDbSettings();
- try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
+ try (RichConnection richConnection = new RichConnection(dbSettings)) {
// do nothing, connection will be closed automatically because RichConnection implements interface Closeable
}
}
@@ -61,14 +83,16 @@ public void testGetTableNames() {
assertEquals(2, tableNames.size());
}
@Test
- void testSourceDataScan(@TempDir Path tempDir) throws IOException {
+ void testSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException {
loadData();
Path outFile = tempDir.resolve("scanresult.xslx");
- SourceDataScan sourceDataScan = new SourceDataScan();
+ URL referenceScanReport = SourceDataScanOracleIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx");
+
+ SourceDataScan sourceDataScan = ScanTestUtils.createSourceDataScan();
DbSettings dbSettings = getTestDbSettings();
sourceDataScan.process(dbSettings, outFile.toString());
- ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType);
+ assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(outFile, Paths.get(referenceScanReport.toURI()), DbType.ORACLE));
}
private void loadData() {
@@ -78,15 +102,16 @@ private void loadData() {
private void insertDataFromCsv(String tableName) {
DbSettings dbSettings = getTestDbSettings();
- try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
+ try (RichConnection richConnection = new RichConnection(dbSettings)) {
try (BufferedReader reader = new BufferedReader(getResourcePath(tableName))) {
String line = null;
while ((line = reader.readLine()) != null) {
- String[] values = line.split("\t");
- if (line.endsWith("\t")) {
+ String[] values = line.split(",");
+ if (line.endsWith(",")) {
values = Arrays.copyOf(values, values.length + 1);
values[values.length - 1] = "";
}
+ // Oracle INSERT needs quotes around the values
String insertSql = String.format("INSERT INTO %s.%s VALUES('%s');", dbSettings.database, tableName, String.join("','", values));
richConnection.execute(insertSql);
}
@@ -97,7 +122,7 @@ private void insertDataFromCsv(String tableName) {
}
private InputStreamReader getResourcePath(String tableName) throws URISyntaxException, IOException {
- String resourceName = String.format("scan_data/%s.csv", tableName);
+ String resourceName = String.format("scan_data/%s-no-header.csv", tableName);
ClassLoader classLoader = getClass().getClassLoader();
File file = new File(Objects.requireNonNull(classLoader.getResource(resourceName)).toURI());
@@ -105,7 +130,7 @@ private InputStreamReader getResourcePath(String tableName) throws URISyntaxExce
}
private List getTableNames(DbSettings dbSettings) {
- try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
+ try (RichConnection richConnection = new RichConnection(dbSettings)) {
return richConnection.getTableNames(SCHEMA_NAME);
}
}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLGuiIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLGuiIT.java
new file mode 100644
index 00000000..b205f108
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLGuiIT.java
@@ -0,0 +1,121 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest;
+import org.assertj.swing.annotation.GUITest;
+import org.assertj.swing.core.GenericTypeMatcher;
+import org.assertj.swing.edt.GuiActionRunner;
+import org.assertj.swing.finder.WindowFinder;
+import org.assertj.swing.fixture.DialogFixture;
+import org.assertj.swing.fixture.FrameFixture;
+import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.whiterabbit.Console;
+import org.ohdsi.whiterabbit.WhiteRabbitMain;
+import org.ohdsi.whiterabbit.gui.LocationsPanel;
+import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+
+import javax.swing.*;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.ohdsi.databases.configuration.DbType.POSTGRESQL;
+import static org.ohdsi.whiterabbit.scan.SourceDataScanPostgreSQLIT.createPostgreSQLContainer;
+
+@ExtendWith(GUITestExtension.class)
+@CacioTest
+class SourceDataScanPostgreSQLGuiIT {
+
+ private static FrameFixture window;
+ private static Console console;
+
+ private final static int WIDTH = 1920;
+ private final static int HEIGHT = 1080;
+ @BeforeAll
+ public static void setupOnce() {
+ System.setProperty("cacio.managed.screensize", String.format("%sx%s", WIDTH, HEIGHT));
+ }
+
+ @BeforeEach
+ public void onSetUp() {
+ String[] args = {};
+ WhiteRabbitMain whiteRabbitMain = GuiActionRunner.execute(() -> new WhiteRabbitMain(true, args));
+ console = whiteRabbitMain.getConsole();
+ window = new FrameFixture(whiteRabbitMain.getFrame());
+ window.show(); // shows the frame to test
+ }
+
+ @Container
+ public static PostgreSQLContainer> postgreSQL = createPostgreSQLContainer();
+
+ @ExtendWith(GUITestExtension.class)
+ @Test
+ void testConnectionAndSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException {
+ URL referenceScanReport = TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx");
+ Path personCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/person-no-header.csv").toURI());
+ Path costCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/cost-no-header.csv").toURI());
+ Files.copy(personCsv, tempDir.resolve("person.csv"));
+ Files.copy(costCsv, tempDir.resolve("cost.csv"));
+ window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_LOCATIONS);
+ window.comboBox("SourceType").selectItem(DbType.POSTGRESQL.label());
+ window.textBox("FolderField").setText(tempDir.toAbsolutePath().toString());
+ // verify one tooltip text, assume that all other tooltip texts will be fine too (fingers crossed)
+ assertEquals(LocationsPanel.TOOLTIP_POSTGRESQL_SERVER, window.textBox(LocationsPanel.LABEL_SERVER_LOCATION).target().getToolTipText());
+ window.textBox(LocationsPanel.LABEL_SERVER_LOCATION).setText(String.format("%s:%s/%s",
+ postgreSQL.getHost(),
+ postgreSQL.getFirstMappedPort(),
+ postgreSQL.getDatabaseName()));
+ window.textBox(LocationsPanel.LABEL_USER_NAME).setText(postgreSQL.getUsername());
+ window.textBox(LocationsPanel.LABEL_PASSWORD).setText(postgreSQL.getPassword());
+ window.textBox(LocationsPanel.LABEL_DATABASE_NAME).setText("public");
+
+ // use the "Test connection" button
+ window.button(WhiteRabbitMain.LABEL_TEST_CONNECTION).click();
+ GenericTypeMatcher matcher = new GenericTypeMatcher(JDialog.class, true) {
+ protected boolean isMatching(JDialog frame) {
+ return WhiteRabbitMain.LABEL_CONNECTION_SUCCESSFUL.equals(frame.getTitle());
+ }
+ };
+ DialogFixture frame = WindowFinder.findDialog(matcher).using(window.robot());
+ frame.button().click();
+
+ // switch to the scan panel, add all tables found and run the scan
+ window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_SCAN).click();
+ window.button(WhiteRabbitMain.LABEL_ADD_ALL_IN_DB).click();
+ window.button(WhiteRabbitMain.LABEL_SCAN_TABLES).click();
+
+ // verify the generated scan report against the reference
+ assertTrue(ScanTestUtils.isScanReportGeneratedAndMatchesReference(
+ console,
+ tempDir.resolve("ScanReport.xlsx"),
+ Paths.get(referenceScanReport.toURI()),
+ POSTGRESQL));
+
+ //window.close();
+ }
+}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLIT.java
new file mode 100644
index 00000000..2892e3a8
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLIT.java
@@ -0,0 +1,107 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.ohdsi.databases.configuration.DbSettings;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.databases.RichConnection;
+import org.testcontainers.containers.BindMode;
+import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+
+class SourceDataScanPostgreSQLIT {
+
+ @Container
+ public static PostgreSQLContainer> postgreSQL = createPostgreSQLContainer();
+
+ @Test
+ public void connectToDatabase() {
+ // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker
+ DbSettings dbSettings = getTestDbSettings();
+ try (RichConnection richConnection = new RichConnection(dbSettings)) {
+ // do nothing, connection will be closed automatically because RichConnection implements interface Closeable
+ }
+ }
+
+ @Test
+ public void testGetTableNames() {
+ // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker
+ DbSettings dbSettings = getTestDbSettings();
+ List tableNames = getTableNames(dbSettings);
+ assertEquals(2, tableNames.size());
+ }
+
+ public static PostgreSQLContainer> createPostgreSQLContainer() {
+ PostgreSQLContainer> postgreSQLContainer = new PostgreSQLContainer<>("postgres:13.1")
+ .withUsername("test")
+ .withPassword("test")
+ .withDatabaseName("test")
+ .withClasspathResourceMapping(
+ "scan_data",
+ "/scan_data",
+ BindMode.READ_ONLY)
+ .withInitScript("scan_data/create_data_postgresql.sql");
+
+ postgreSQLContainer.start();
+
+ return postgreSQLContainer;
+ }
+
+ @Test
+ void testSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException {
+ Path outFile = tempDir.resolve("scanresult.xslx");
+ URL referenceScanReport = SourceDataScanPostgreSQLIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx");
+
+ SourceDataScan sourceDataScan = ScanTestUtils.createSourceDataScan();
+ DbSettings dbSettings = getTestDbSettings();
+
+ sourceDataScan.process(dbSettings, outFile.toString());
+ assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(outFile, Paths.get(referenceScanReport.toURI()), DbType.POSTGRESQL));
+ }
+
+ private List getTableNames(DbSettings dbSettings) {
+ try (RichConnection richConnection = new RichConnection(dbSettings)) {
+ return richConnection.getTableNames("public");
+ }
+ }
+
+ private DbSettings getTestDbSettings() {
+ DbSettings dbSettings = new DbSettings();
+ dbSettings.dbType = DbType.POSTGRESQL;
+ dbSettings.sourceType = DbSettings.SourceType.DATABASE;
+ dbSettings.server = postgreSQL.getJdbcUrl();
+ dbSettings.database = "public"; // always for PostgreSQL
+ dbSettings.user = postgreSQL.getUsername();
+ dbSettings.password = postgreSQL.getPassword();
+ dbSettings.tables = getTableNames(dbSettings);
+
+ return dbSettings;
+ }
+}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeGuiIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeGuiIT.java
new file mode 100644
index 00000000..880c8c2f
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeGuiIT.java
@@ -0,0 +1,142 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest;
+import org.assertj.swing.annotation.GUITest;
+import org.assertj.swing.core.GenericTypeMatcher;
+import org.assertj.swing.edt.GuiActionRunner;
+import org.assertj.swing.finder.WindowFinder;
+import org.assertj.swing.fixture.DialogFixture;
+import org.assertj.swing.fixture.FrameFixture;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.junit.jupiter.api.io.TempDir;
+import org.ohdsi.databases.SnowflakeHandler.SnowflakeConfiguration;
+import org.ohdsi.databases.SnowflakeTestUtils;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.whiterabbit.Console;
+import org.ohdsi.whiterabbit.WhiteRabbitMain;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.junit.jupiter.Container;
+
+import javax.swing.*;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.ohdsi.databases.configuration.DbType.SNOWFLAKE;
+import static org.ohdsi.whiterabbit.scan.SourceDataScanSnowflakeIT.*;
+
+@ExtendWith(GUITestExtension.class)
+@CacioTest
+class SourceDataScanSnowflakeGuiIT {
+
+ private static FrameFixture window;
+ private static Console console;
+
+ private final static int WIDTH = 1920;
+ private final static int HEIGHT = 1080;
+ @BeforeAll
+ public static void setupOnce() {
+ System.setProperty("cacio.managed.screensize", String.format("%sx%s", WIDTH, HEIGHT));
+ }
+
+ @Container
+ public static GenericContainer> testContainer;
+
+ @BeforeEach
+ public void onSetUp() {
+ try {
+ testContainer = createPythonContainer();
+ prepareTestData(testContainer);
+ } catch (IOException | InterruptedException e) {
+ throw new RuntimeException("Creating python container failed.");
+ }
+ String[] args = {};
+ WhiteRabbitMain whiteRabbitMain = GuiActionRunner.execute(() -> new WhiteRabbitMain(true, args));
+ console = whiteRabbitMain.getConsole();
+ window = new FrameFixture(whiteRabbitMain.getFrame());
+ window.show(); // shows the frame to test
+ }
+
+ @ExtendWith(GUITestExtension.class)
+ @Test
+ void testConnectionAndSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException {
+ Assumptions.assumeTrue(new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(), "Snowflake system properties file not available");
+ URL referenceScanReport = TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx");
+ Path personCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/person-no-header.csv").toURI());
+ Path costCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/cost-no-header.csv").toURI());
+ Files.copy(personCsv, tempDir.resolve("person.csv"));
+ Files.copy(costCsv, tempDir.resolve("cost.csv"));
+ window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_LOCATIONS);
+ window.comboBox("SourceType").selectItem(DbType.SNOWFLAKE.label());
+ window.textBox("FolderField").setText(tempDir.toAbsolutePath().toString());
+
+ // first use the test connection button, and expect a popup that informs us that several required fields are empty
+ // use the "Test connection" button
+ window.button(WhiteRabbitMain.LABEL_TEST_CONNECTION).click();
+ GenericTypeMatcher matcher = new GenericTypeMatcher(JDialog.class, true) {
+ protected boolean isMatching(JDialog frame) {
+ return WhiteRabbitMain.TITLE_ERRORS_IN_DATABASE_CONFIGURATION.equals(frame.getTitle());
+ }
+ };
+ DialogFixture frame = WindowFinder.findDialog(matcher).using(window.robot());
+ frame.button().click(); // close the popup
+
+ // fill in all the required values and try again
+ assertEquals(SnowflakeConfiguration.TOOLTIP_SNOWFLAKE_ACCOUNT, window.textBox(SnowflakeConfiguration.SNOWFLAKE_ACCOUNT).target().getToolTipText());
+ window.textBox(SnowflakeConfiguration.SNOWFLAKE_ACCOUNT).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_ACCOUNT"));
+ window.textBox(SnowflakeConfiguration.SNOWFLAKE_USER).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_USER"));
+ window.textBox(SnowflakeConfiguration.SNOWFLAKE_PASSWORD).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_PASSWORD"));
+ window.textBox(SnowflakeConfiguration.SNOWFLAKE_WAREHOUSE).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_WAREHOUSE"));
+ window.textBox(SnowflakeConfiguration.SNOWFLAKE_DATABASE).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_DATABASE"));
+ window.textBox(SnowflakeConfiguration.SNOWFLAKE_SCHEMA).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_SCHEMA"));
+
+ // use the "Test connection" button
+ window.button(WhiteRabbitMain.LABEL_TEST_CONNECTION).click();
+ matcher = new GenericTypeMatcher(JDialog.class, true) {
+ protected boolean isMatching(JDialog frame) {
+ return WhiteRabbitMain.LABEL_CONNECTION_SUCCESSFUL.equals(frame.getTitle());
+ }
+ };
+ frame = WindowFinder.findDialog(matcher).using(window.robot());
+ frame.button().click();
+
+ // switch to the scan panel, add all tables found and run the scan
+ window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_SCAN).click();
+ window.button(WhiteRabbitMain.LABEL_ADD_ALL_IN_DB).click();
+ window.button(WhiteRabbitMain.LABEL_SCAN_TABLES).click();
+
+ // verify the generated scan report against the reference
+ assertTrue(ScanTestUtils.isScanReportGeneratedAndMatchesReference(
+ console,
+ tempDir.resolve("ScanReport.xlsx"),
+ Paths.get(referenceScanReport.toURI()),
+ SNOWFLAKE));
+ }
+}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeIT.java
new file mode 100644
index 00000000..acfe3dec
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeIT.java
@@ -0,0 +1,159 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import org.apache.commons.lang.StringUtils;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.junit.jupiter.api.io.TempDir;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.databases.SnowflakeTestUtils;
+import org.ohdsi.whiterabbit.WhiteRabbitMain;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.BindMode;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.utility.DockerImageName;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+public class SourceDataScanSnowflakeIT {
+
+ public final static String SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE = "SNOWFLAKE_WR_TEST_ACCOUNT";
+ static Logger logger = LoggerFactory.getLogger(SourceDataScanSnowflakeIT.class);
+
+ final static String CONTAINER_DATA_PATH = "/scan_data";
+ @Container
+ public static GenericContainer> testContainer;
+
+ @BeforeEach
+ public void setUp() {
+ try {
+ testContainer = createPythonContainer();
+ prepareTestData(testContainer);
+ } catch (IOException | InterruptedException e) {
+ throw new RuntimeException("Creating python container failed.");
+ }
+ }
+
+ //@Test
+ void testWarnWhenRunningWithoutSnowflakeConfigured() {
+ String snowflakeWrTestAccunt = System.getenv(SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE);
+ assertFalse(StringUtils.isEmpty(snowflakeWrTestAccunt) && StringUtils.isEmpty(System.getProperty("ohdsi.org.whiterabbit.skip_snowflake_tests")),
+ String.format("\nTest class %s is being run without a Snowflake test instance configured.\n" +
+ "This is NOT a valid verification run.", SourceDataScanSnowflakeIT.class.getName()));
+ }
+
+ @Test
+ //@EnabledIfEnvironmentVariable(named = SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE, matches = ".+")
+ void testProcessSnowflakeFromIni(@TempDir Path tempDir) throws URISyntaxException, IOException {
+ Assumptions.assumeTrue(new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(), "Snowflake system properties file not available");
+ Charset charset = StandardCharsets.UTF_8;
+ Path iniFile = tempDir.resolve("snowflake.ini");
+ URL iniTemplate = SourceDataScanSnowflakeIT.class.getClassLoader().getResource("scan_data/snowflake.ini.template");
+ URL referenceScanReport = SourceDataScanSnowflakeIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx");
+ assert iniTemplate != null;
+ String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset);
+ content = content.replaceAll("%WORKING_FOLDER%", tempDir.toString())
+ .replaceAll("%SNOWFLAKE_ACCOUNT%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_ACCOUNT"))
+ .replaceAll("%SNOWFLAKE_USER%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_USER"))
+ .replaceAll("%SNOWFLAKE_PASSWORD%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_PASSWORD"))
+ .replaceAll("%SNOWFLAKE_WAREHOUSE%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_WAREHOUSE"))
+ .replaceAll("%SNOWFLAKE_DATABASE%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_DATABASE"))
+ .replaceAll("%SNOWFLAKE_SCHEMA%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_SCHEMA"));
+ Files.write(iniFile, content.getBytes(charset));
+ WhiteRabbitMain wrMain = new WhiteRabbitMain(true, new String[]{"-ini", iniFile.toAbsolutePath().toString()});
+ assert referenceScanReport != null;
+ assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.SNOWFLAKE));
+ }
+
+ static void prepareTestData(GenericContainer> container) throws IOException, InterruptedException {
+ SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker checker = new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker();
+ if (checker.getAsBoolean()) {
+ prepareTestData(container, new SnowflakeTestUtils.PropertyReader());
+ }
+ }
+
+ static void prepareTestData(GenericContainer> container, SnowflakeTestUtils.ReaderInterface reader) throws IOException, InterruptedException {
+ // snowsql is used for initializing the database
+
+ // add some packages needed for the installation of snowsql
+ execAndVerifyCommand(container, "/bin/sh", "-c", "apt update; apt -y install wget unzip");
+ // download snowsql
+ execAndVerifyCommand(container, "/bin/bash", "-c",
+ "wget -q https://sfc-repo.snowflakecomputing.com/snowsql/bootstrap/1.2/linux_x86_64/snowsql-1.2.29-linux_x86_64.bash;");
+ // install snowsql
+ execAndVerifyCommand(container, "/bin/bash", "-c",
+ "echo -e \"/tmp\\nN\" | bash snowsql-1.2.29-linux_x86_64.bash ");
+
+ // run the sql script needed to initialize the test data
+ execAndVerifyCommand(container, "/bin/bash", "-c",
+ String.format("(cd %s; SNOWSQL_PWD='%s' /tmp/snowsql -a %s -u %s -d %s -s %s -f %s/create_data_snowflake.sql)",
+ CONTAINER_DATA_PATH,
+ reader.getOrFail("SNOWFLAKE_WR_TEST_PASSWORD"),
+ reader.getOrFail(SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE),
+ reader.getOrFail("SNOWFLAKE_WR_TEST_USER"),
+ reader.getOrFail("SNOWFLAKE_WR_TEST_DATABASE"),
+ reader.getOrFail("SNOWFLAKE_WR_TEST_SCHEMA"),
+ CONTAINER_DATA_PATH
+ ));
+ }
+
+ public static GenericContainer> createPythonContainer() throws IOException, InterruptedException {
+ GenericContainer> testContainer = new GenericContainer<>(DockerImageName.parse("ubuntu:22.04"))
+ .withCommand("/bin/sh", "-c", "tail -f /dev/null") // keeps the container running until it is explicitly stopped
+ .withClasspathResourceMapping(
+ "scan_data",
+ CONTAINER_DATA_PATH,
+ BindMode.READ_ONLY);
+
+ testContainer.start();
+
+ return testContainer;
+ }
+
+ private static void execAndVerifyCommand(GenericContainer> container, String... command) throws IOException, InterruptedException {
+ execAndVerifyCommand(container, 0, command);
+ }
+ private static void execAndVerifyCommand(GenericContainer> container, int expectedExitValue, String... command) throws IOException, InterruptedException {
+ org.testcontainers.containers.Container.ExecResult result;
+
+ result = container.execInContainer(command);
+ if (result.getExitCode() != expectedExitValue) {
+ logger.error("stdout: {}", result.getStdout());
+ logger.error("stderr: {}", result.getStderr());
+ // hide the password, if present, so it won't appear in logs (pragmatic)
+ String message = ("Command failed: " + String.join(" ", command))
+ .replace(SnowflakeTestUtils.getEnvOrFail("SNOWFLAKE_WR_TEST_PASSWORD"), "xxxxx");
+ assertEquals(expectedExitValue, result.getExitCode(), message);
+ }
+ }
+}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java
deleted file mode 100644
index c36e99ad..00000000
--- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java
+++ /dev/null
@@ -1,200 +0,0 @@
-package org.ohdsi.whiterabbit.scan;
-
-import org.apache.commons.io.FileUtils;
-import org.junit.jupiter.api.Tag;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.ohdsi.databases.DbType;
-import org.ohdsi.databases.RichConnection;
-import org.ohdsi.ooxml.ReadXlsxFileWithHeader;
-import org.ohdsi.utilities.files.Row;
-import org.ohdsi.utilities.files.RowUtilities;
-import org.ohdsi.whiteRabbit.DbSettings;
-import org.ohdsi.whiteRabbit.scan.SourceDataScan;
-import org.testcontainers.containers.BindMode;
-import org.testcontainers.containers.PostgreSQLContainer;
-import org.testcontainers.junit.jupiter.Container;
-import org.testcontainers.junit.jupiter.Testcontainers;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.*;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-
-@Testcontainers
-@Tag("DockerRequired")
-class TestSourceDataScan {
-
- @Container
- public static PostgreSQLContainer> postgreSQL;
-
- static {
- /*
- * Since the database is only read, setting it up once suffices.
- *
- * Note that the init script is read locally, but accesses the CSV files from
- * the resource mapped into the container.
- *
- * The data used in this test are actually OMOP data. One reason for this is convenience: the DDL
- * for this data is know and could simply be copied instead of composed.
- * Also, for the technical correctness of WhiteRabbit (does it open the database, get the table
- * names and scan those tables), the actual nature of the source data does not matter.
- */
- try {
- postgreSQL = new PostgreSQLContainer<>("postgres:13.1")
- .withUsername("test")
- .withPassword("test")
- .withDatabaseName("test")
- .withClasspathResourceMapping(
- "scan_data",
- "/scan_data",
- BindMode.READ_ONLY)
- .withInitScript("scan_data/create_data_postgresql.sql");
-
- postgreSQL.start();
-
- } finally {
- if (postgreSQL != null) {
- postgreSQL.stop();
- }
- }
- }
-
- void testProcess(Path tempDir) throws IOException {
- Path outFile = tempDir.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME);
- SourceDataScan sourceDataScan = new SourceDataScan();
- DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL);
-
- sourceDataScan.process(dbSettings, outFile.toString());
- ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType);
- }
-
- @Test
- void testApachePoiTmpFileProblemWithAutomaticResolution(@TempDir Path tempDir) throws IOException, ReflectiveOperationException {
- // intends to verify solution of this bug: https://github.com/OHDSI/WhiteRabbit/issues/293
-
- /*
- * This tests a fix that assumes that the bug referenced here occurs in a multi-user situation where the
- * first user running the scan, and causing /tmp/poifiles to created, does so by creating it read-only
- * for everyone else. This directory is not automatically cleaned up, so every following user on the same
- * system running the scan encounters the problem that /tmp/poifiles already exists and is read-only,
- * causing a crash when the Apacho poi library attemps to create the xslx file.
- *
- * The class SourceDataScan has been extended with a static method, called implicitly once through a static{}
- * block, to create a TempDir strategy that will create a unique directory for each instance/run of WhiteRabbit.
- * This effectively solves the assumed error situation.
- *
- * This test does not execute a multi-user situation, but emulates it by leaving the tmp directory in a
- * read-only state after the first scan, and then confirming that a second scan fails. After that,
- * a new unique tmp dir is enforced by invoking SourceDataScan.setUniqueTempDirStrategyForApachePoi(),
- * and a new scan now runs successfully.
- */
-
- // Make sure the scenarios are tested without a user configured tmp dir, so set environment variable and
- // system property to an empty value
- System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, "");
- updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, "");
- Path defaultTmpPath = SourceDataScan.getDefaultPoiTmpPath(tempDir);
-
- if (!Files.exists(defaultTmpPath)) {
- Files.createDirectory(defaultTmpPath);
- } else {
- if (Files.exists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))) {
- Files.delete(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME));
- }
- }
-
- // process should pass without problem, and afterwards the default tmp dir should exist
- testProcess(defaultTmpPath);
- assertTrue(Files.exists(defaultTmpPath));
-
- // provoke the problem situation. make the default tmp dir readonly, try to process again
- assertTrue(Files.deleteIfExists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))); // or Apache Poi will happily reuse it
- assertTrue(defaultTmpPath.toFile().setReadOnly());
- RuntimeException thrown = assertThrows(RuntimeException.class, () -> {
- testProcess(defaultTmpPath);
- });
- assertTrue(thrown.getMessage().contains("Permission denied"));
-
- // invoke the static method to set a new tmp dir, process again (should succeed) and verify that
- // the new tmpdir is indeed different from the default
- String myTmpDir = SourceDataScan.setUniqueTempDirStrategyForApachePoi();
- testProcess(Paths.get(myTmpDir));
- assertNotEquals(defaultTmpPath.toFile().getAbsolutePath(), myTmpDir);
-
- // we might have left behind an unworkable situation; attempt to solve that
- if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) {
- assertTrue(defaultTmpPath.toFile().setWritable(true));
- }
- }
-
- @Test
- void testApachePoiTmpFileProblemWithUserConfiguredResolution(@TempDir Path tempDir) throws IOException, ReflectiveOperationException {
- // 1. Verify that the poi tmp dir property is used, if set
- Path tmpDirFromProperty = tempDir.resolve("setByProperty");
- System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, tmpDirFromProperty.toFile().getAbsolutePath());
- Files.createDirectories(tmpDirFromProperty);
-
- SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the property
- testProcess(tmpDirFromProperty);
- assertTrue(Files.exists(tmpDirFromProperty));
-
- cleanTmpDir(tmpDirFromProperty);
-
- // 2. Verify that the poi tmp dir environment variable is used, if set, and overrules the property set above
- Path tmpDirFromEnvironmentVariable = tempDir.resolve("setByEnvVar");
- updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, tmpDirFromEnvironmentVariable.toFile().getAbsolutePath());
- Files.createDirectories(tmpDirFromEnvironmentVariable);
-
- SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the env. var.
- testProcess(tmpDirFromEnvironmentVariable);
- assertFalse(Files.exists(tmpDirFromProperty));
- assertTrue(Files.exists(tmpDirFromEnvironmentVariable));
- cleanTmpDir(tmpDirFromEnvironmentVariable);
- }
-
- @SuppressWarnings({ "unchecked" })
- private static void updateEnv(String name, String val) throws ReflectiveOperationException {
- Map env = System.getenv();
- Field field = env.getClass().getDeclaredField("m");
- field.setAccessible(true);
- ((Map) field.get(env)).put(name, val);
- }
- private List getTableNames(DbSettings dbSettings) {
- try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
- return richConnection.getTableNames("public");
- }
- }
-
- private static void cleanTmpDir(Path path) {
- if (Files.exists(path)) {
- if (!Files.isWritable(path)) {
- assertTrue(path.toFile().setWritable(true),
- String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable",
- path.toFile().getAbsolutePath()));
- }
- assertTrue(deleteDir(path.toFile()));
- }
- }
- private static boolean deleteDir(File file) {
- if (Files.exists(file.toPath())) {
- File[] contents = file.listFiles();
- if (contents != null) {
- for (File f : contents) {
- if (!Files.isSymbolicLink(f.toPath())) {
- deleteDir(f);
- }
- }
- }
- return file.delete();
- }
- return true;
- }
-}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvGui.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvGui.java
new file mode 100644
index 00000000..20c5d188
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvGui.java
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest;
+import org.assertj.swing.edt.GuiActionRunner;
+import org.assertj.swing.fixture.FrameFixture;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.io.TempDir;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.whiterabbit.Console;
+import org.ohdsi.whiterabbit.WhiteRabbitMain;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.ohdsi.whiterabbit.gui.LocationsPanel;
+
+@ExtendWith(GUITestExtension.class)
+@CacioTest
+public class TestSourceDataScanCsvGui {
+ private static FrameFixture window;
+ private static Console console;
+
+ private final static int WIDTH = 1920;
+ private final static int HEIGHT = 1080;
+ @BeforeAll
+ public static void setupOnce() {
+ System.setProperty("cacio.managed.screensize", String.format("%sx%s", WIDTH, HEIGHT));
+ }
+
+ @BeforeEach
+ public void onSetUp() {
+ String[] args = {};
+ WhiteRabbitMain whiteRabbitMain = GuiActionRunner.execute(() -> new WhiteRabbitMain(true, args));
+ console = whiteRabbitMain.getConsole();
+ window = new FrameFixture(whiteRabbitMain.getFrame());
+ window.show(); // shows the frame to test
+ }
+
+ @Test
+ void testSourceDataScanFromGui(@TempDir Path tempDir) throws IOException, URISyntaxException {
+ URL referenceScanReport = TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-csv.xlsx");
+ Path personCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/person-header.csv").toURI());
+ Path costCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/cost-header.csv").toURI());
+ Files.copy(personCsv, tempDir.resolve("person.csv"));
+ Files.copy(costCsv, tempDir.resolve("cost.csv"));
+ window.tabbedPane("TabbedPane").selectTab(WhiteRabbitMain.LABEL_LOCATIONS);
+ window.comboBox("SourceType").selectItem(DbType.DELIMITED_TEXT_FILES.label());
+ window.textBox(LocationsPanel.NAME_DELIMITER).setText(",");
+ window.textBox("FolderField").setText(tempDir.toAbsolutePath().toString());
+ window.tabbedPane("TabbedPane").selectTab("Scan");
+ window.button("Add").click();
+ window.fileChooser("FileChooser").fileNameTextBox().setText("\"cost.csv\" \"person.csv\"");
+ window.fileChooser("FileChooser").approveButton().click();
+ window.button(WhiteRabbitMain.LABEL_SCAN_TABLES).click();
+
+ assertTrue(ScanTestUtils.isScanReportGeneratedAndMatchesReference(
+ console,
+ tempDir.resolve("ScanReport.xlsx"),
+ Paths.get(referenceScanReport.toURI()),
+ DbType.DELIMITED_TEXT_FILES));
+ }
+}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvIniFile.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvIniFile.java
new file mode 100644
index 00000000..0e6f21c9
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvIniFile.java
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import org.junit.jupiter.api.*;
+import org.junit.jupiter.api.io.TempDir;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.whiterabbit.WhiteRabbitMain;
+import org.opentest4j.AssertionFailedError;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class TestSourceDataScanCsvIniFile {
+ @Test
+ void testSourceDataScanFromIniFile(@TempDir Path tempDir) throws URISyntaxException, IOException {
+ Charset charset = StandardCharsets.UTF_8;
+ Path iniFile = tempDir.resolve("tsv.ini");
+ URL iniTemplate = TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/tsv.ini.template");
+ URL referenceScanReport = TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-csv.xlsx");
+ Path personCsv = Paths.get(TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/person-header.csv").toURI());
+ Path costCsv = Paths.get(TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/cost-header.csv").toURI());
+ assertNotNull(iniTemplate);
+ String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset);
+ content = content.replaceAll("%WORKING_FOLDER%", tempDir.toString());
+ Files.write(iniFile, content.getBytes(charset));
+ Files.copy(personCsv, tempDir.resolve("person.csv"));
+ Files.copy(costCsv, tempDir.resolve("cost.csv"));
+ WhiteRabbitMain wrMain = new WhiteRabbitMain(false, new String[]{"-ini", iniFile.toAbsolutePath().toString()});
+ assertNotNull(referenceScanReport);
+ assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.DELIMITED_TEXT_FILES));
+ }
+
+ @Test
+ // minimal test to verify comparing ScanReports: test the tester :-) (and no, this test strictly speaking does not belong here, it should be in its own class)
+ void testCompareSheets() {
+ // conform that ScanTestUtils.compareSheets does know how to compare scan results (same, different)
+ Map>> sheets1 = Collections.singletonMap("Field Overview", Collections.singletonList(Arrays.asList("one", "two", "three")));
+ Map>> sheets2 = Collections.singletonMap("Field Overview", Collections.singletonList(Arrays.asList("one", "two", "three")));
+ Map>> sheets3 = Collections.singletonMap("Field Overview", Collections.singletonList(Arrays.asList("two", "three", "four")));
+ AssertionFailedError thrown = Assertions.assertThrows(AssertionFailedError.class, () -> {
+ ScanTestUtils.scanValuesMatchReferenceValues(sheets1, sheets3, DbType.POSTGRESQL);
+ }, "AssertionFailedError was expected");
+ ScanTestUtils.scanValuesMatchReferenceValues(sheets1, sheets2, DbType.POSTGRESQL);
+ }
+}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java
deleted file mode 100644
index 0678d308..00000000
--- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package org.ohdsi.whiterabbit.scan;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.ohdsi.databases.RichConnection;
-import org.ohdsi.whiteRabbit.DbSettings;
-import org.ohdsi.whiteRabbit.scan.SourceDataScan;
-import org.testcontainers.containers.BindMode;
-import org.testcontainers.containers.PostgreSQLContainer;
-import org.testcontainers.junit.jupiter.Container;
-import org.testcontainers.junit.jupiter.Testcontainers;
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.*;
-
-
-@Testcontainers(disabledWithoutDocker = true)
-class TestSourceDataScanPostgreSQL {
-
- @Container
- public static PostgreSQLContainer> postgreSQL;
-
- static {
- /*
- * Since the database is only read, setting it up once suffices.
- *
- * Note that the init script is read locally, but accesses the CSV files from
- * the resource mapped into the container.
- *
- * The data used in this test are actually OMOP data. One reason for this is convenience: the DDL
- * for this data is know and could simply be copied instead of composed.
- * Also, for the technical correctness of WhiteRabbit (does it open the database, get the table
- * names and scan those tables), the actual nature of the source data does not matter.
- */
- try {
- postgreSQL = new PostgreSQLContainer<>("postgres:13.1")
- .withUsername("test")
- .withPassword("test")
- .withDatabaseName("test")
- .withClasspathResourceMapping(
- "scan_data",
- "/scan_data",
- BindMode.READ_ONLY)
- .withInitScript("scan_data/create_data_postgresql.sql");
-
- postgreSQL.start();
-
- } finally {
- if (postgreSQL != null) {
- postgreSQL.stop();
- }
- }
- }
-
- @Test
- public void connectToDatabase() {
- // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker
- DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL);
- try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) {
- // do nothing, connection will be closed automatically because RichConnection implements interface Closeable
- }
- }
-
- @Test
- public void testGetTableNames() {
- // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker
- DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL);
- List tableNames = ScanTestUtils.getTableNamesPostgreSQL(dbSettings);
- assertEquals(2, tableNames.size());
- }
- @Test
- void testSourceDataScan(@TempDir Path tempDir) throws IOException {
- Path outFile = tempDir.resolve("scanresult.xslx");
- SourceDataScan sourceDataScan = new SourceDataScan();
- DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL);
-
- sourceDataScan.process(dbSettings, outFile.toString());
- ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType);
- }
-}
diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/VerifyDistributionIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/VerifyDistributionIT.java
new file mode 100644
index 00000000..dbad9b2f
--- /dev/null
+++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/VerifyDistributionIT.java
@@ -0,0 +1,223 @@
+/*******************************************************************************
+ * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve
+ *
+ * This file is part of WhiteRabbit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package org.ohdsi.whiterabbit.scan;
+
+import org.apache.commons.lang.StringUtils;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
+import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
+import org.junit.jupiter.api.io.TempDir;
+import org.junit.runners.Parameterized;
+import org.ohdsi.databases.DBConnector;
+import org.ohdsi.databases.SnowflakeTestUtils;
+import org.ohdsi.databases.configuration.DbType;
+import org.ohdsi.utilities.files.IniFile;
+import org.ohdsi.whiterabbit.WhiteRabbitMain;
+import org.testcontainers.containers.BindMode;
+import org.testcontainers.containers.GenericContainer;
+import org.testcontainers.containers.Container.ExecResult;
+import org.testcontainers.utility.DockerImageName;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.*;
+import java.util.function.BooleanSupplier;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.ohdsi.whiterabbit.scan.SourceDataScanSnowflakeIT.createPythonContainer;
+import static org.ohdsi.whiterabbit.scan.SourceDataScanSnowflakeIT.prepareTestData;
+
+/**
+ * Intent: "deploy" the distributed application in a docker container (TestContainer) containing a Java runtime
+ * of a specified version, and runs a test of WhiteRabbit that aim to verify that the distribution is complete,
+ * i.e. no dependencies are missing. A data for a scan on csv files is used to run whiterabbit.
+ *
+ * Note that this does not test any of the JDBC driver dependencies, unless these databases are actually used.
+ */
+public class VerifyDistributionIT {
+
+ @TempDir
+ static Path tempDir;
+
+ private static final String WORKDIR_IN_CONTAINER = "/whiterabbit";
+ private static final String APPDIR_IN_CONTAINER = "/app";
+
+ @Test
+ void testDistributionWithJava8() throws IOException, URISyntaxException, InterruptedException {
+ testWhiteRabbitInContainer("eclipse-temurin:8", "openjdk version \"1.8.");
+ }
+
+ @Test
+ void testDistributionWithJava11() throws IOException, URISyntaxException, InterruptedException {
+ testWhiteRabbitInContainer("eclipse-temurin:11", "openjdk version \"11.0.");
+ }
+ @Test
+ void testDistributionWithJava17() throws IOException, URISyntaxException, InterruptedException {
+ testWhiteRabbitInContainer("eclipse-temurin:17", "openjdk version \"17.0.");
+ }
+
+ @Test
+ void verifyAllJDBCDriversLoadable() throws IOException, InterruptedException {
+ try (GenericContainer> javaContainer = createJavaContainer("eclipse-temurin:11")) {
+ javaContainer.start();
+ ExecResult execResult = javaContainer.execInContainer("sh", "-c",
+ String.format("cd %s/repo; java -classpath '*' org.ohdsi.databases.DBConnector", APPDIR_IN_CONTAINER));
+ if (execResult.getExitCode() != 0) {
+ System.out.println("stdout:" + execResult.getStdout());
+ System.out.println("stderr:" + execResult.getStderr());
+ }
+ assertTrue(execResult.getStdout().contains(DBConnector.ALL_JDBC_DRIVERS_LOADABLE), "Not all supported JDBC drivers could be loaded");
+ javaContainer.execInContainer("sh", "-c", "rm /app/repo/snowflake*"); // sabotage, confirms that test breaks if driver missing
+ execResult = javaContainer.execInContainer("sh", "-c",
+ String.format("cd %s/repo; java -classpath '*' org.ohdsi.databases.DBConnector", APPDIR_IN_CONTAINER));
+ assertFalse(execResult.getStdout().contains(DBConnector.ALL_JDBC_DRIVERS_LOADABLE), "Not all supported JDBC drivers could be loaded");
+ }
+ }
+
+ //@Test // useful while developing/debugging, leaving in place to test again after Snowflake JDBC driver update
+ void verifySnowflakeFailureInJava17() throws IOException, URISyntaxException, InterruptedException {
+ /*
+ * There is an issue with Snowflake JDBC that causes a failure in Java 16 and later
+ * (see https://community.snowflake.com/s/article/JDBC-Driver-Compatibility-Issue-With-JDK-16-and-Later)
+ * A flag can be passed to the JVM to work around this: --add-opens=java.base/java.nio=ALL-UNNAMED
+ *
+ * The whiteRabbit script in the distribution passes this flag.
+ *
+ * The tests below verify that:
+ * - the flag does not cause problems when running with Java 8 (1.8) or 11
+ * - without the flag, a failure occurs when running with Java 17
+ * - passing the flag fixes the failure with Java 17
+ *
+ * As the flag is in the distributed script, it needs to be edited out of the script.
+ *
+ * Note that we only test with the LTS versions of Java. This leaves Java 16 untested and unfixed.
+ *
+ * Once a fix is available in a newer version of the Snowflake JDBC jar, and it is used in WhiteRabbit,
+ * The test that now confirms the issue by expecting an Assertion error should start to fail.
+ * Then it is time to remove the flag (it is in the pom.xml for the whiterabbit module), and remove these tests,
+ * or normalize them to simply verify that all works well.
+ */
+ String patchingFlag = "--add-opens=java.base/java.nio=ALL-UNNAMED";
+ String javaOpts = String.format("JAVA_OPTS='%s'", patchingFlag);
+
+ // verify that the flag as set in the whiteRabbit script does not have an adversary effect when running with Java 11
+ // note that this flag is not supported by Java 8 (1.8)
+ runDistributionWithSnowflake("eclipse-temurin:11",javaOpts);
+
+ // verify that the failure occurs when running with Java 17, without the flag
+ AssertionError ignoredError = Assertions.assertThrows(org.opentest4j.AssertionFailedError.class, () -> {
+ runDistributionWithSnowflake("eclipse-temurin:17","");
+ });
+
+ // finally, verify that passing the flag fixes the failure when running wuth Java 17
+ runDistributionWithSnowflake("eclipse-temurin:17",javaOpts);
+ }
+
+ void runDistributionWithSnowflake(String javaImageName, String javaOpts) throws IOException, InterruptedException, URISyntaxException {
+ // test only run when there are settings available for Snowflake; otherwise it should be skipped
+ Assumptions.assumeTrue(new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(), "Snowflake system properties file not available");
+ SnowflakeTestUtils.PropertyReader reader = new SnowflakeTestUtils.PropertyReader();
+ try (GenericContainer> testContainer = createPythonContainer()) {
+ prepareTestData(testContainer, reader);
+ testContainer.stop();
+
+ try (GenericContainer> javaContainer = createJavaContainer(javaImageName)) {
+ javaContainer.start();
+ Charset charset = StandardCharsets.UTF_8;
+ Path iniFile = tempDir.resolve("snowflake.ini");
+ URL iniTemplate = VerifyDistributionIT.class.getClassLoader().getResource("scan_data/snowflake.ini.template");
+ URL referenceScanReport = SourceDataScanSnowflakeIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx");
+ assert iniTemplate != null;
+ String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset);
+ content = content.replaceAll("%WORKING_FOLDER%", WORKDIR_IN_CONTAINER)
+ .replaceAll("%SNOWFLAKE_ACCOUNT%", reader.getOrFail("SNOWFLAKE_WR_TEST_ACCOUNT"))
+ .replaceAll("%SNOWFLAKE_USER%", reader.getOrFail("SNOWFLAKE_WR_TEST_USER"))
+ .replaceAll("%SNOWFLAKE_PASSWORD%", reader.getOrFail("SNOWFLAKE_WR_TEST_PASSWORD"))
+ .replaceAll("%SNOWFLAKE_WAREHOUSE%", reader.getOrFail("SNOWFLAKE_WR_TEST_WAREHOUSE"))
+ .replaceAll("%SNOWFLAKE_DATABASE%", reader.getOrFail("SNOWFLAKE_WR_TEST_DATABASE"))
+ .replaceAll("%SNOWFLAKE_SCHEMA%", reader.getOrFail("SNOWFLAKE_WR_TEST_SCHEMA"));
+ Files.write(iniFile, content.getBytes(charset));
+ // verify that the distribution of whiterabbit has been generated and is available inside the container
+ ExecResult execResult = javaContainer.execInContainer("sh", "-c", String.format("ls %s", APPDIR_IN_CONTAINER));
+ assertTrue(execResult.getStdout().contains("repo"), "WhiteRabbit distribution is not accessible inside container");
+
+ // run whiterabbit and verify the result
+ execResult = javaContainer.execInContainer("sh", "-c",
+ String.format("%s /app/bin/whiteRabbit -ini %s/snowflake.ini", javaOpts, WORKDIR_IN_CONTAINER));
+ assertTrue(execResult.getStdout().contains("Started new scan of 2 tables..."));
+ assertTrue(execResult.getStdout().contains("Scanning table PERSON"));
+ assertTrue(execResult.getStdout().contains("Scanning table COST"));
+ assertTrue(execResult.getStdout().contains("Scan report generated: /whiterabbit/ScanReport.xlsx"));
+
+ assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.SNOWFLAKE));
+ }
+ }
+ }
+
+ private void testWhiteRabbitInContainer(String imageName, String expectedVersion) throws IOException, InterruptedException, URISyntaxException {
+ try (GenericContainer> javaContainer = createJavaContainer(imageName)) {
+ javaContainer.start();
+
+ Charset charset = StandardCharsets.UTF_8;
+ Path iniFile = tempDir.resolve("tsv.ini");
+ URL iniTemplate = VerifyDistributionIT.class.getClassLoader().getResource("scan_data/tsv.ini.template");
+ URL referenceScanReport = VerifyDistributionIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-csv.xlsx");
+ Path personCsv = Paths.get(VerifyDistributionIT.class.getClassLoader().getResource("scan_data/person-header.csv").toURI());
+ Path costCsv = Paths.get(VerifyDistributionIT.class.getClassLoader().getResource("scan_data/cost-header.csv").toURI());
+ assertNotNull(iniTemplate);
+ String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset);
+ content = content.replaceAll("%WORKING_FOLDER%", WORKDIR_IN_CONTAINER);
+ Files.write(iniFile, content.getBytes(charset));
+ Files.copy(personCsv, tempDir.resolve("person.csv"), StandardCopyOption.REPLACE_EXISTING);
+ Files.copy(costCsv, tempDir.resolve("cost.csv"), StandardCopyOption.REPLACE_EXISTING);
+
+ // verify that the default java version in the container is actually 1.8
+ ExecResult execResult = javaContainer.execInContainer("sh", "-c", "java -version");
+ assertTrue(execResult.getStderr().startsWith(expectedVersion), "default java version in container should match version " + expectedVersion);
+
+ // verify that the distribution of whiterabbit has been generated and is available inside the container
+ execResult = javaContainer.execInContainer("sh", "-c", String.format("ls %s", APPDIR_IN_CONTAINER));
+ assertTrue(execResult.getStdout().contains("repo"), "WhiteRabbit distribution is not accessible inside container");
+
+ // run whiterabbit and verify the result
+ execResult = javaContainer.execInContainer("sh", "-c", String.format("/app/bin/whiteRabbit -ini %s/tsv.ini", WORKDIR_IN_CONTAINER));
+ assertTrue(execResult.getStdout().contains("Started new scan of 2 tables..."));
+ assertTrue(execResult.getStdout().contains("Scanning table /whiterabbit/person.csv"));
+ assertTrue(execResult.getStdout().contains("Scanning table /whiterabbit/cost.csv"));
+ assertTrue(execResult.getStdout().contains("Scan report generated: /whiterabbit/ScanReport.xlsx"));
+
+ assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.DELIMITED_TEXT_FILES));
+
+ javaContainer.stop();
+ }
+ }
+
+ private GenericContainer> createJavaContainer(String imageName) {
+ return new GenericContainer<>(
+ DockerImageName.parse(imageName))
+ .withCommand("sh", "-c", "tail -f /dev/null")
+ .withFileSystemBind(Paths.get("../dist").toAbsolutePath().toString(), APPDIR_IN_CONTAINER)
+ .withFileSystemBind(tempDir.toString(), WORKDIR_IN_CONTAINER, BindMode.READ_WRITE);
+ }
+}
diff --git a/whiterabbit/src/test/resources/scan_data/README.md b/whiterabbit/src/test/resources/scan_data/README.md
new file mode 100644
index 00000000..19c7fe13
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/README.md
@@ -0,0 +1,6 @@
+The ScanReport-reference-v0.10.7-{csv,sql}.xlsx files in this directory were generated using the last version
+of WhiteRabbit that did not have any unit or integration tests, and serve as the reference for smoke/regression
+tests.
+
+Not that the order in which files/tables are generated into these xlsx files was (is) not entirely predictable,
+so some sorting is done in the tests to match the version under test.
\ No newline at end of file
diff --git a/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-csv.xlsx b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-csv.xlsx
new file mode 100644
index 00000000..b8d0d441
Binary files /dev/null and b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-csv.xlsx differ
diff --git a/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-sql.xlsx b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-sql.xlsx
new file mode 100644
index 00000000..ea091dfe
Binary files /dev/null and b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-sql.xlsx differ
diff --git a/whiterabbit/src/test/resources/scan_data/cost-header.csv b/whiterabbit/src/test/resources/scan_data/cost-header.csv
new file mode 100644
index 00000000..f6825044
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/cost-header.csv
@@ -0,0 +1,35 @@
+cost_id,cost_event_id,cost_domain_id,cost_type_concept_id,currency_concept_id,total_charge,total_cost,total_paid,paid_by_payer,paid_by_patient,paid_patient_copay,paid_patient_coinsurance,paid_patient_deductible,paid_by_primary,paid_ingredient_cost,paid_dispensing_fee,payer_plan_period_id,amount_allowed,revenue_code_concept_id,reveue_code_source_value,drg_concept_id,drg_source_value
+10791,1,Drug,0,44818668,,,180,,0,,0,,,,,,,0,,0,
+10792,2,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0,
+10793,3,Drug,0,44818668,,,60,,0,,0,,,,,,,0,,0,
+10794,4,Drug,0,44818668,,,130,,40,,40,,,,,,,0,,0,
+10795,6,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10796,8,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10797,10,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0,
+10798,11,Drug,0,44818668,,,40,,10,,10,,,,,,,0,,0,
+10799,12,Drug,0,44818668,,,110,,40,,40,,,,,,,0,,0,
+10800,14,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10801,18,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10802,19,Drug,0,44818668,,,10,,0,,0,,,,,,,0,,0,
+10803,21,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10804,25,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10805,27,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10806,28,Drug,0,44818668,,,0,,10,,10,,,,,,,0,,0,
+10807,29,Drug,0,44818668,,,30,,10,,10,,,,,,,0,,0,
+10808,31,Drug,0,44818668,,,350,,0,,0,,,,,,,0,,0,
+10809,33,Drug,0,44818668,,,10,,10,,10,,,,,,,0,,0,
+10810,35,Drug,0,44818668,,,570,,80,,80,,,,,,,0,,0,
+10811,37,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10812,38,Drug,0,44818668,,,150,,0,,0,,,,,,,0,,0,
+10813,41,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10814,42,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10815,45,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0,
+10816,51,Drug,0,44818668,,,80,,0,,0,,,,,,,0,,0,
+10817,52,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0,
+10818,53,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0,
+10819,55,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10820,56,Drug,0,44818668,,,70,,170,,170,,,,,,,0,,0,
+10821,58,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0,
+10822,61,Drug,0,44818668,,,160,,0,,0,,,,,,,0,,0,
+10823,62,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10824,63,Drug,0,44818668,,,350,,10,,10,,,,,,,0,,0,
diff --git a/whiterabbit/src/test/resources/scan_data/cost-no-header.csv b/whiterabbit/src/test/resources/scan_data/cost-no-header.csv
new file mode 100644
index 00000000..fa8fa46a
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/cost-no-header.csv
@@ -0,0 +1,34 @@
+10791,1,Drug,0,44818668,,,180,,0,,0,,,,,,,0,,0,
+10792,2,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0,
+10793,3,Drug,0,44818668,,,60,,0,,0,,,,,,,0,,0,
+10794,4,Drug,0,44818668,,,130,,40,,40,,,,,,,0,,0,
+10795,6,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10796,8,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10797,10,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0,
+10798,11,Drug,0,44818668,,,40,,10,,10,,,,,,,0,,0,
+10799,12,Drug,0,44818668,,,110,,40,,40,,,,,,,0,,0,
+10800,14,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10801,18,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10802,19,Drug,0,44818668,,,10,,0,,0,,,,,,,0,,0,
+10803,21,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10804,25,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10805,27,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10806,28,Drug,0,44818668,,,0,,10,,10,,,,,,,0,,0,
+10807,29,Drug,0,44818668,,,30,,10,,10,,,,,,,0,,0,
+10808,31,Drug,0,44818668,,,350,,0,,0,,,,,,,0,,0,
+10809,33,Drug,0,44818668,,,10,,10,,10,,,,,,,0,,0,
+10810,35,Drug,0,44818668,,,570,,80,,80,,,,,,,0,,0,
+10811,37,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10812,38,Drug,0,44818668,,,150,,0,,0,,,,,,,0,,0,
+10813,41,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10814,42,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0,
+10815,45,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0,
+10816,51,Drug,0,44818668,,,80,,0,,0,,,,,,,0,,0,
+10817,52,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0,
+10818,53,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0,
+10819,55,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0,
+10820,56,Drug,0,44818668,,,70,,170,,170,,,,,,,0,,0,
+10821,58,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0,
+10822,61,Drug,0,44818668,,,160,,0,,0,,,,,,,0,,0,
+10823,62,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0,
+10824,63,Drug,0,44818668,,,350,,10,,10,,,,,,,0,,0,
diff --git a/whiterabbit/src/test/resources/scan_data/cost.csv b/whiterabbit/src/test/resources/scan_data/cost.csv
deleted file mode 100644
index 7904c62e..00000000
--- a/whiterabbit/src/test/resources/scan_data/cost.csv
+++ /dev/null
@@ -1,34 +0,0 @@
-10791 1 Drug 0 44818668 180 0 0 0 0
-10792 2 Drug 0 44818668 70 70 70 0 0
-10793 3 Drug 0 44818668 60 0 0 0 0
-10794 4 Drug 0 44818668 130 40 40 0 0
-10795 6 Drug 0 44818668 30 0 0 0 0
-10796 8 Drug 0 44818668 20 0 0 0 0
-10797 10 Drug 0 44818668 120 0 0 0 0
-10798 11 Drug 0 44818668 40 10 10 0 0
-10799 12 Drug 0 44818668 110 40 40 0 0
-10800 14 Drug 0 44818668 30 0 0 0 0
-10801 18 Drug 0 44818668 0 0 0 0 0
-10802 19 Drug 0 44818668 10 0 0 0 0
-10803 21 Drug 0 44818668 30 0 0 0 0
-10804 25 Drug 0 44818668 20 0 0 0 0
-10805 27 Drug 0 44818668 20 0 0 0 0
-10806 28 Drug 0 44818668 0 10 10 0 0
-10807 29 Drug 0 44818668 30 10 10 0 0
-10808 31 Drug 0 44818668 350 0 0 0 0
-10809 33 Drug 0 44818668 10 10 10 0 0
-10810 35 Drug 0 44818668 570 80 80 0 0
-10811 37 Drug 0 44818668 0 0 0 0 0
-10812 38 Drug 0 44818668 150 0 0 0 0
-10813 41 Drug 0 44818668 0 0 0 0 0
-10814 42 Drug 0 44818668 20 0 0 0 0
-10815 45 Drug 0 44818668 70 0 0 0 0
-10816 51 Drug 0 44818668 80 0 0 0 0
-10817 52 Drug 0 44818668 120 0 0 0 0
-10818 53 Drug 0 44818668 70 70 70 0 0
-10819 55 Drug 0 44818668 0 0 0 0 0
-10820 56 Drug 0 44818668 70 170 170 0 0
-10821 58 Drug 0 44818668 70 0 0 0 0
-10822 61 Drug 0 44818668 160 0 0 0 0
-10823 62 Drug 0 44818668 30 0 0 0 0
-10824 63 Drug 0 44818668 350 10 10 0 0
diff --git a/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql b/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql
index 23cd38f3..f7c7d66e 100644
--- a/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql
+++ b/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql
@@ -51,5 +51,5 @@ CREATE TABLE cost
;
-COPY COST FROM '/scan_data/cost.csv' DELIMITER E'\t' CSV ENCODING 'UTF8';
-COPY PERSON FROM '/scan_data/person.csv' DELIMITER E'\t' CSV ENCODING 'UTF8';
+COPY COST FROM '/scan_data/cost-no-header.csv' DELIMITER ',' CSV ENCODING 'UTF8';
+COPY PERSON FROM '/scan_data/person-no-header.csv' DELIMITER ',' CSV ENCODING 'UTF8';
diff --git a/whiterabbit/src/test/resources/scan_data/create_data_snowflake.sql b/whiterabbit/src/test/resources/scan_data/create_data_snowflake.sql
new file mode 100644
index 00000000..3b53cf1e
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/create_data_snowflake.sql
@@ -0,0 +1,32 @@
+//
+// To be able to use the configured snowflake test environment, make sure that the role and grant
+// statements below have been exectuded, using the correct snowflake username for <>
+//
+//create role if not exists testrole;
+//grant usage on database test to role testrole;
+//grant usage on schema test.wr_test to role testrole;
+//grant ALL PRIVILEGES on schema test.wr_test to role testrole;
+//grant role testrole to user <>;
+
+//use schema test.wr_test;
+
+DROP TABLE IF EXISTS wr_test.person;
+DROP TABLE IF EXISTS wr_test.cost;
+
+CREATE TABLE wr_test.cost (cost_id BIGINT, cost_event_id BIGINT, cost_domain_id STRING, cost_type_concept_id BIGINT, currency_concept_id BIGINT, total_charge NUMERIC, total_cost NUMERIC, total_paid NUMERIC, paid_by_payer NUMERIC, paid_by_patient NUMERIC, paid_patient_copay NUMERIC, paid_patient_coinsurance NUMERIC, paid_patient_deductible NUMERIC, paid_by_primary NUMERIC, paid_ingredient_cost NUMERIC, paid_dispensing_fee NUMERIC, payer_plan_period_id BIGINT, amount_allowed NUMERIC, revenue_code_concept_id BIGINT, reveue_code_source_value STRING, drg_concept_id BIGINT, drg_source_value STRING);
+
+CREATE TABLE wr_test.person (person_id BIGINT, gender_concept_id BIGINT, year_of_birth BIGINT, month_of_birth BIGINT, day_of_birth BIGINT, birth_datetime TIMESTAMP, race_concept_id BIGINT, ethnicity_concept_id BIGINT, location_id BIGINT, provider_id BIGINT, care_site_id BIGINT, person_source_value STRING, gender_source_value STRING, gender_source_concept_id BIGINT, race_source_value STRING, race_source_concept_id BIGINT, ethnicity_source_value STRING, ethnicity_source_concept_id BIGINT);
+
+REMOVE @~ pattern=".*csv.gz";
+
+put file:///scan_data/cost-no-header.csv @~;
+
+put file:///scan_data/person-no-header.csv @~;
+
+CREATE OR REPLACE FILE FORMAT my_csv_format TYPE = 'csv' FIELD_DELIMITER = ',';
+
+COPY INTO cost from @~/cost-no-header.csv.gz FILE_FORMAT = (FORMAT_NAME = 'my_csv_format');
+
+COPY INTO person from @~/person-no-header.csv.gz FILE_FORMAT = (FORMAT_NAME = 'my_csv_format');
+
+REMOVE @~ pattern=".*csv.gz";
\ No newline at end of file
diff --git a/whiterabbit/src/test/resources/scan_data/person-header.csv b/whiterabbit/src/test/resources/scan_data/person-header.csv
new file mode 100644
index 00000000..2661396a
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/person-header.csv
@@ -0,0 +1,31 @@
+person_id,gender_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_datetime,race_concept_id,ethnicity_concept_id,location_id,provider_id,care_site_id,person_source_value,gender_source_value,gender_source_concept_id,race_source_value,race_source_concept_id,ethnicity_source_value,ethnicity_source_concept_id
+1,8507,1923,5,1,,8527,38003564,1,,,00013D2EFD8E45D1,1,,1,,1,
+2,8507,1943,1,1,,8527,38003564,2,,,00016F745862898F,1,,1,,1,
+3,8532,1936,9,1,,8527,38003564,3,,,0001FDD721E223DC,2,,1,,1,
+4,8507,1941,6,1,,0,38003563,4,,,00021CA6FF03E670,1,,5,,5,
+5,8507,1936,8,1,,8527,38003564,5,,,00024B3D2352D2D0,1,,1,,1,
+6,8507,1943,10,1,,8516,38003564,6,,,0002DAE1C81CC70D,1,,2,,2,
+7,8507,1922,7,1,,8527,38003564,7,,,0002F28CE057345B,1,,1,,1,
+8,8507,1935,9,1,,8527,38003564,8,,,000308435E3E5B76,1,,1,,1,
+9,8532,1976,9,1,,8527,38003564,9,,,000345A39D4157C9,2,,1,,1,
+10,8532,1938,10,1,,8516,38003564,10,,,00036A21B65B0206,2,,2,,2,
+11,8532,1934,2,1,,8527,38003564,11,,,000489E7EAAD463F,2,,1,,1,
+12,8507,1929,6,1,,8527,38003564,12,,,00048EF1F4791C68,1,,1,,1,
+13,8532,1936,7,1,,8527,38003564,13,,,0004F0ABD505251D,2,,1,,1,
+14,8507,1934,5,1,,8527,38003564,14,,,00052705243EA128,1,,1,,1,
+15,8532,1936,3,1,,8527,38003564,15,,,00070B63745BE497,2,,1,,1,
+16,8507,1934,1,1,,8527,38003564,16,,,0007E57CC13CE880,1,,1,,1,
+17,8532,1919,9,1,,8516,38003564,17,,,0007F12A492FD25D,2,,2,,2,
+18,8532,1919,10,1,,8516,38003564,18,,,000A005BA0BED3EA,2,,2,,2,
+19,8532,1942,7,1,,8527,38003564,19,,,000B4662348C35B4,2,,1,,1,
+20,8507,1938,4,1,,8527,38003564,20,,,000B97BA2314E971,1,,1,,1,
+21,8507,1932,8,1,,8516,38003564,21,,,000C7486B11E7030,1,,2,,2,
+23,8507,1932,7,1,,8527,38003564,23,,,000DDD364C46E2C6,1,,1,,1,
+25,8507,1965,4,1,,8527,38003564,25,,,00108066CA1FACCE,1,,1,,1,
+26,8532,1939,12,1,,8527,38003564,26,,,0010D6F80D245D62,2,,1,,1,
+27,8532,1940,4,1,,8527,38003564,27,,,0011714C14B52EEB,2,,1,,1,
+28,8507,1937,10,1,,8527,38003564,28,,,0011CB1FE23E91AF,1,,1,,1,
+29,8507,1938,4,1,,8527,38003564,29,,,0012AFEEC379A69D,1,,1,,1,
+30,8532,1959,11,1,,8527,38003564,30,,,00131C35661B2926,2,,1,,1,
+31,8532,1922,10,1,,8527,38003564,31,,,00139C345A104F72,2,,1,,1,
+32,8532,1953,12,1,,8527,38003564,32,,,0013E139F1F37264,2,,1,,1,
diff --git a/whiterabbit/src/test/resources/scan_data/person-no-header.csv b/whiterabbit/src/test/resources/scan_data/person-no-header.csv
new file mode 100644
index 00000000..63d4629b
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/person-no-header.csv
@@ -0,0 +1,30 @@
+1,8507,1923,5,1,,8527,38003564,1,,,00013D2EFD8E45D1,1,,1,,1,
+2,8507,1943,1,1,,8527,38003564,2,,,00016F745862898F,1,,1,,1,
+3,8532,1936,9,1,,8527,38003564,3,,,0001FDD721E223DC,2,,1,,1,
+4,8507,1941,6,1,,0,38003563,4,,,00021CA6FF03E670,1,,5,,5,
+5,8507,1936,8,1,,8527,38003564,5,,,00024B3D2352D2D0,1,,1,,1,
+6,8507,1943,10,1,,8516,38003564,6,,,0002DAE1C81CC70D,1,,2,,2,
+7,8507,1922,7,1,,8527,38003564,7,,,0002F28CE057345B,1,,1,,1,
+8,8507,1935,9,1,,8527,38003564,8,,,000308435E3E5B76,1,,1,,1,
+9,8532,1976,9,1,,8527,38003564,9,,,000345A39D4157C9,2,,1,,1,
+10,8532,1938,10,1,,8516,38003564,10,,,00036A21B65B0206,2,,2,,2,
+11,8532,1934,2,1,,8527,38003564,11,,,000489E7EAAD463F,2,,1,,1,
+12,8507,1929,6,1,,8527,38003564,12,,,00048EF1F4791C68,1,,1,,1,
+13,8532,1936,7,1,,8527,38003564,13,,,0004F0ABD505251D,2,,1,,1,
+14,8507,1934,5,1,,8527,38003564,14,,,00052705243EA128,1,,1,,1,
+15,8532,1936,3,1,,8527,38003564,15,,,00070B63745BE497,2,,1,,1,
+16,8507,1934,1,1,,8527,38003564,16,,,0007E57CC13CE880,1,,1,,1,
+17,8532,1919,9,1,,8516,38003564,17,,,0007F12A492FD25D,2,,2,,2,
+18,8532,1919,10,1,,8516,38003564,18,,,000A005BA0BED3EA,2,,2,,2,
+19,8532,1942,7,1,,8527,38003564,19,,,000B4662348C35B4,2,,1,,1,
+20,8507,1938,4,1,,8527,38003564,20,,,000B97BA2314E971,1,,1,,1,
+21,8507,1932,8,1,,8516,38003564,21,,,000C7486B11E7030,1,,2,,2,
+23,8507,1932,7,1,,8527,38003564,23,,,000DDD364C46E2C6,1,,1,,1,
+25,8507,1965,4,1,,8527,38003564,25,,,00108066CA1FACCE,1,,1,,1,
+26,8532,1939,12,1,,8527,38003564,26,,,0010D6F80D245D62,2,,1,,1,
+27,8532,1940,4,1,,8527,38003564,27,,,0011714C14B52EEB,2,,1,,1,
+28,8507,1937,10,1,,8527,38003564,28,,,0011CB1FE23E91AF,1,,1,,1,
+29,8507,1938,4,1,,8527,38003564,29,,,0012AFEEC379A69D,1,,1,,1,
+30,8532,1959,11,1,,8527,38003564,30,,,00131C35661B2926,2,,1,,1,
+31,8532,1922,10,1,,8527,38003564,31,,,00139C345A104F72,2,,1,,1,
+32,8532,1953,12,1,,8527,38003564,32,,,0013E139F1F37264,2,,1,,1,
diff --git a/whiterabbit/src/test/resources/scan_data/person.csv b/whiterabbit/src/test/resources/scan_data/person.csv
deleted file mode 100644
index e10b61b0..00000000
--- a/whiterabbit/src/test/resources/scan_data/person.csv
+++ /dev/null
@@ -1,30 +0,0 @@
-1 8507 1923 5 1 8527 38003564 1 00013D2EFD8E45D1 1 1 1
-2 8507 1943 1 1 8527 38003564 2 00016F745862898F 1 1 1
-3 8532 1936 9 1 8527 38003564 3 0001FDD721E223DC 2 1 1
-4 8507 1941 6 1 0 38003563 4 00021CA6FF03E670 1 5 5
-5 8507 1936 8 1 8527 38003564 5 00024B3D2352D2D0 1 1 1
-6 8507 1943 10 1 8516 38003564 6 0002DAE1C81CC70D 1 2 2
-7 8507 1922 7 1 8527 38003564 7 0002F28CE057345B 1 1 1
-8 8507 1935 9 1 8527 38003564 8 000308435E3E5B76 1 1 1
-9 8532 1976 9 1 8527 38003564 9 000345A39D4157C9 2 1 1
-10 8532 1938 10 1 8516 38003564 10 00036A21B65B0206 2 2 2
-11 8532 1934 2 1 8527 38003564 11 000489E7EAAD463F 2 1 1
-12 8507 1929 6 1 8527 38003564 12 00048EF1F4791C68 1 1 1
-13 8532 1936 7 1 8527 38003564 13 0004F0ABD505251D 2 1 1
-14 8507 1934 5 1 8527 38003564 14 00052705243EA128 1 1 1
-15 8532 1936 3 1 8527 38003564 15 00070B63745BE497 2 1 1
-16 8507 1934 1 1 8527 38003564 16 0007E57CC13CE880 1 1 1
-17 8532 1919 9 1 8516 38003564 17 0007F12A492FD25D 2 2 2
-18 8532 1919 10 1 8516 38003564 18 000A005BA0BED3EA 2 2 2
-19 8532 1942 7 1 8527 38003564 19 000B4662348C35B4 2 1 1
-20 8507 1938 4 1 8527 38003564 20 000B97BA2314E971 1 1 1
-21 8507 1932 8 1 8516 38003564 21 000C7486B11E7030 1 2 2
-23 8507 1932 7 1 8527 38003564 23 000DDD364C46E2C6 1 1 1
-25 8507 1965 4 1 8527 38003564 25 00108066CA1FACCE 1 1 1
-26 8532 1939 12 1 8527 38003564 26 0010D6F80D245D62 2 1 1
-27 8532 1940 4 1 8527 38003564 27 0011714C14B52EEB 2 1 1
-28 8507 1937 10 1 8527 38003564 28 0011CB1FE23E91AF 1 1 1
-29 8507 1938 4 1 8527 38003564 29 0012AFEEC379A69D 1 1 1
-30 8532 1959 11 1 8527 38003564 30 00131C35661B2926 2 1 1
-31 8532 1922 10 1 8527 38003564 31 00139C345A104F72 2 1 1
-32 8532 1953 12 1 8527 38003564 32 0013E139F1F37264 2 1 1
diff --git a/whiterabbit/src/test/resources/scan_data/snowflake.ini.template b/whiterabbit/src/test/resources/scan_data/snowflake.ini.template
new file mode 100644
index 00000000..ab12cd68
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/snowflake.ini.template
@@ -0,0 +1,16 @@
+# Usage: dist/bin/whiteRabbit -ini
+WORKING_FOLDER = %WORKING_FOLDER% # Path to the folder where all output will be written
+DATA_TYPE = Snowflake # "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "Redshift", "BigQuery", "Azure", "Teradata", "SAS7bdat"
+SNOWFLAKE_ACCOUNT = %SNOWFLAKE_ACCOUNT% # Name or address of the server. For Postgres, add the database name
+SNOWFLAKE_USER = %SNOWFLAKE_USER% # User name for the database
+SNOWFLAKE_PASSWORD = %SNOWFLAKE_PASSWORD% # Password for the database
+SNOWFLAKE_WAREHOUSE = %SNOWFLAKE_WAREHOUSE% # Name of the data schema used
+SNOWFLAKE_DATABASE = %SNOWFLAKE_DATABASE%
+SNOWFLAKE_SCHEMA = %SNOWFLAKE_SCHEMA%
+TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database
+SCAN_FIELD_VALUES = yes # Include the frequency of field values in the scan report? "yes" or "no"
+MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report
+MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported
+ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values
+CALCULATE_NUMERIC_STATS = no # Include average, standard deviation and quartiles in the scan report? "yes" or "no"
+NUMERIC_STATS_SAMPLER_SIZE = 500 # Maximum number of rows used to calculate numeric statistics
diff --git a/whiterabbit/src/test/resources/scan_data/tsv.ini.template b/whiterabbit/src/test/resources/scan_data/tsv.ini.template
new file mode 100644
index 00000000..2e287355
--- /dev/null
+++ b/whiterabbit/src/test/resources/scan_data/tsv.ini.template
@@ -0,0 +1,14 @@
+WORKING_FOLDER = %WORKING_FOLDER% # Path to the folder where all output will be written
+DATA_TYPE = Delimited text files # "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "Redshift", "BigQuery", "Azure", "Teradata", "SAS7bdat"
+SERVER_LOCATION = 127.0.0.1/data_base_name # Name or address of the server. For Postgres, add the database name
+USER_NAME = joe # User name for the database
+PASSWORD = supersecret # Password for the database
+DATABASE_NAME = schema_name # Name of the data schema used
+DELIMITER = , # The delimiter that separates values
+TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database
+SCAN_FIELD_VALUES = yes # Include the frequency of field values in the scan report? "yes" or "no"
+MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report
+MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported
+ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values
+CALCULATE_NUMERIC_STATS = no # Include average, standard deviation and quartiles in the scan report? "yes" or "no"
+NUMERIC_STATS_SAMPLER_SIZE = 500 # Maximum number of rows used to calculate numeric statistics