diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 65a5c4fb..f1c4cd36 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,7 +21,7 @@ jobs: - uses: actions/setup-java@v3 with: distribution: temurin - java-version: 8 + java-version: 17 cache: maven # Compile the code diff --git a/.gitignore b/.gitignore index 30e7e2f5..75067728 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ dist/ .idea/ target/ *.class +*.log # jenv file(s) .java-version @@ -20,3 +21,6 @@ Try* /examples/ .DS_Store data/ + +# contains authentication data for a Snowflake instance +snowflake.env diff --git a/README.md b/README.md index e2e907aa..853e66f4 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Requires Java 1.8 or higher, and read access to the database to be scanned. Java Dependencies ============ -For the distributable packages, the only requirement is Java 8. For building the package, also Maven is needed. +For the distributable packages, the only requirement is Java 8. For building the package, Java 17 and Maven are needed. Getting Started =============== @@ -90,22 +90,42 @@ Development =========== White Rabbit and Rabbit in a Hat are structured as a Maven package and can be developed in Eclipse. Contributions are welcome. +While the software in the project can be executed with Java 1.8, for development Java 17 is needed. +This has to do with test and verification dependencies that are not available in a version compatible with Java 1.8 . + +Please note that when using an IDE for development, source and target release must still be Java 1.8 . This is enforced +in the maven build file (pom.xml), + To generate the files ready for distribution, run `mvn install`. ### Testing -Some newer code has unit and/or integration tests. Tests that depend on external resources being available, -such as a database, should be excluded from executing automatically. You can use the @Tag annotation to combine -such tests in a group with the same tag, and exclude that tag from being run automatically by maven (this -should be done in the configuration of the surefire plugin in pom.xml of the module involved). - -An exception to the above are tests that depend on Docker. Currently, these tests are implemented with the -TestContainers library, and are configured to check for Docker being present. If not, these tests will not -be run, but the tests as a whole will still succeed. However, it is recommended that these tests are run since -these tests verify essential functionality for WhiteRabbit, like the database interface. +A limited number of unit and integration tests exist. The integration tests run only in the maven verification phase, +(`mn verify`) and depend on docker being available to the user running the verification. If docker is not available, the +integration tests will fail. Also, GitHub actions have been configured to run the test suite automatically. +#### Snowflake + +There are automated tests for Snowflake, but since it is not (yet?) possible to have a local +Snowflake instance in a Docker container, these test will only run if the following information +is provided through environment variables: + + SNOWFLAKE_WR_TEST_ACCOUNT + SNOWFLAKE_WR_TEST_USER + SNOWFLAKE_WR_TEST_PASSWORD + SNOWFLAKE_WR_TEST_WAREHOUSE + SNOWFLAKE_WR_TEST_DATABASE + SNOWFLAKE_WR_TEST_SCHEMA + +It is recommended that user, password, database and schema are created for these tests only, +and do not relate in any way to any production environment. +The schema should not contain any tables when the test is started. + +It is possible to skip the Snowflake tests without failing the build by passing +`-Dohdsi.org.whiterabbit.skip_snowflake_tests=1` to maven. + ### Development status Production. This program is being used by many people. diff --git a/docs/images/riah_stem_table.png b/docs/images/riah_stem_table.png index 73104a4a..e6de0bdd 100644 Binary files a/docs/images/riah_stem_table.png and b/docs/images/riah_stem_table.png differ diff --git a/pom.xml b/pom.xml index e357586d..05d85f73 100644 --- a/pom.xml +++ b/pom.xml @@ -15,6 +15,18 @@ Leporidae https://www.ohdsi.org/analytic-tools/whiterabbit-for-etl-design/ + + OHDSI + https://www.ohdsi.org + + + + + Apache License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + + + central @@ -78,7 +90,13 @@ 1.8 1.8 + 1.8 UTF-8 + + false + ${skipTests} + ${skipTests} + 1.8 @@ -106,8 +124,42 @@ org.apache.maven.plugins maven-surefire-plugin - - 3.0.0-M8 + 3.1.2 + + ${skipUnitTests} + 1 + false + + false + com.github.caciocavallosilano.cacio.ctc.CTCToolkit + com.github.caciocavallosilano.cacio.ctc.CTCGraphicsEnvironment + + + -Doracle.jdbc.timezoneAsRegion=false + + --add-exports=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + + --add-opens java.base/java.lang.reflect=ALL-UNNAMED + --add-exports java.base/java.lang.reflect=ALL-UNNAMED + --add-exports=java.desktop/java.awt=ALL-UNNAMED + --add-exports=java.desktop/java.awt.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.image=ALL-UNNAMED + --add-exports=java.desktop/sun.java2d=ALL-UNNAMED + --add-exports=java.desktop/java.awt.dnd.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.event=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.datatransfer=ALL-UNNAMED + --add-exports=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.desktop/java.awt=ALL-UNNAMED + --add-opens=java.desktop/sun.java2d=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens java.base/java.lang=ALL-UNNAMED + + + org.junit.jupiter @@ -116,14 +168,41 @@ - + + org.apache.maven.plugins + maven-failsafe-plugin + + + **/*IT.java + + ${skipIntegrationTests} + + + + + integration-test + verify + + + + + + org.honton.chas + license-maven-plugin + 0.0.3 + + + org.apache.maven.plugins + maven-clean-plugin + 3.3.1 + maven-clean-plugin - 3.1.0 + 3.3.1 @@ -146,6 +225,125 @@ maven-project-info-reports-plugin 3.0.0 + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.4.1 + + + enforce-java + + enforce + + + + + 17 + + + + + + + + org.honton.chas + license-maven-plugin + 0.0.3 + + + + Oracle Free Use Terms and Conditions \(FUTC\) + + + The GNU General Public License, v2 with Universal FOSS Exception, v1.0 + + + Plexus + https://github.com/dom4j/dom4j/blob/master/LICENSE + + + (The )?Apache( )?(Software )?(License)?(,)? (Version )?(2.0|v2) + https://www.apache.org/licenses/LICENSE-2.0.txt + + + BSD-2-Clause + https://jdbc.postgresql.org/about/license.html + + + HSQLDB License, a BSD open source license + https://hsqldb.org/web/hsqlLicense.html + + + MIT License + https://www.opensource.org/licenses/mit-license.php + + + MIT + https://opensource.org/licenses/MIT + + + Eclipse Public License 1.0 + https://www.eclipse.org/legal/epl-v10.html + + + Eclipse Public License v2.0 + https://www.eclipse.org/legal/epl-v20.html + + + GPL2 with classpath exception + https://openjdk.java.net/legal/gplv2+ce.html + + + + + com.microsoft.sqlserver:sqljdbc4 + + + com.teradata.jdbc:terajdbc4 + com.teradata.tdgss:tdgssconfig + com.simba.googlebigquery.jdbc:GoogleBigQueryJDBC + com.simba.googlebigquery.jdbc:google-api-client + com.simba.googlebigquery.jdbc:google-http-client + com.simba.googlebigquery.jdbc:gax + com.simba.googlebigquery.jdbc:google-http-client-jackson2 + com.simba.googlebigquery.jdbc:google-oauth-client + com.simba.googlebigquery.jdbc:google-auth-library-oauth2-http + com.simba.googlebigquery.jdbc:google-auth-library-credentials + com.simba.googlebigquery.jdbc:jackson-core + com.simba.googlebigquery.jdbc:guava + com.simba.googlebigquery.jdbc:google-api-service-bigquery + com.simba.googlebigquery.jdbc:opencensus-api + com.simba.googlebigquery.jdbc:opencensus-contrib-http-util + com.simba.googlebigquery.jdbc:grpc-context + com.simba.googlebigquery.jdbc:joda-time + + + + + + + compliance + + + + + + com.github.ferstl + depgraph-maven-plugin + 4.0.2 + + diff --git a/rabbit-core/pom.xml b/rabbit-core/pom.xml index dd527e4a..4fdb1ad4 100644 --- a/rabbit-core/pom.xml +++ b/rabbit-core/pom.xml @@ -14,9 +14,26 @@ UTF-8 - 4.1.2 + 5.2.4 + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + test-jar + + + + + + + com.oracle.ojdbc @@ -38,50 +55,39 @@ dom4j 2.1.4 + + org.slf4j + slf4j-simple + 1.7.36 + + org.apache.poi poi - 4.1.2 - - - org.apache.ant - ant - - + ${apache-poi-version} org.apache.poi poi-ooxml - 4.1.2 - - - org.apache.commons - commons-compress - - + ${apache-poi-version} org.apache.poi poi-excelant - 4.1.2 - - - org.apache.ant - ant - - + ${apache-poi-version} + org.apache.poi - poi-ooxml-schemas - 4.1.2 + poi-ooxml-lite + ${apache-poi-version} org.apache.xmlbeans xmlbeans - 3.1.0 + 5.1.1 org.postgresql @@ -113,11 +119,6 @@ commons-compress 1.24.0 - - org.hsqldb - hsqldb - 2.7.2 - com.healthmarketscience.jackcess jackcess @@ -127,6 +128,18 @@ net.sf.ucanaccess ucanaccess 5.0.1 + + + org.hsqldb + hsqldb + + + + + org.hsqldb + hsqldb + 2.7.2 + jdk8 com.amazon.redshift @@ -202,6 +215,12 @@ com.simba.googlebigquery.jdbc opencensus-api 0.18.0 + + + com.simba.googlebigquery.jdbc + avro + + com.simba.googlebigquery.jdbc @@ -218,10 +237,17 @@ joda-time 2.10.1 + - com.simba.googlebigquery.jdbc + org.apache.avro avro - 1.8.2 + 1.11.3 + + + com.fasterxml.jackson.core + jackson-core + + com.epam @@ -234,5 +260,30 @@ ant 1.10.14 + + + + net.snowflake + snowflake-jdbc + 3.14.3 + + + org.junit.jupiter + junit-jupiter + RELEASE + test + + + org.apache.httpcomponents + httpclient + 4.5.13 + compile + + + one.util + streamex + 0.8.2 + compile + diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/DBConnection.java b/rabbit-core/src/main/java/org/ohdsi/databases/DBConnection.java new file mode 100644 index 00000000..749c4e76 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/DBConnection.java @@ -0,0 +1,374 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.apache.commons.lang.StringUtils; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.utilities.files.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.*; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.List; + +/* + * DBConnection is a wrapper for java.sql.Connection + * + * + * The latter one instantiates a java.sql.Connection instance itself. + * The constructors of DBConnection ensure that one of the following is true: + * - a java.sql.Connection implementing object is provided, and used it its methods + * - a StorageHandler implementing object is provided, and used to create a java.sql.Connection interface + * - if neither of the above is valid at construction, a RuntimeException is thrown + * + * DBConnection provides a partial subset of the java.sql.Connection interface, just enough to satisfy the + * needs of WhiteRabbit + */ +public class DBConnection { + Logger logger = LoggerFactory.getLogger(DBConnection.class); + + private final Connection connection; + private final DbType dbType; + private boolean verbose; + private final StorageHandler connectorInterface; + private static DecimalFormat decimalFormat = new DecimalFormat("#.#"); + + + public DBConnection(Connection connection, DbType dbType, boolean verbose) { + this.connection = connection; + this.dbType = dbType; + this.connectorInterface = null; + this.verbose = verbose; + } + + public DBConnection(StorageHandler connectorInterface, DbType dbType, boolean verbose) { + this.connectorInterface = connectorInterface; + connectorInterface.checkInitialised(); + this.connection = connectorInterface.getDBConnection().getConnection(); + this.dbType = dbType; + this.verbose = verbose; + } + + public Connection getConnection() { + return this.connection; + } + + public StorageHandler getStorageHandler() { + this.connectorInterface.checkInitialised(); + return this.connectorInterface; + } + + public void setVerbose(boolean verbose) { + this.verbose = verbose; + } + + public boolean isVerbose() { + return verbose; + } + + public boolean hasStorageHandler() { + return this.connectorInterface != null; + } + + public Statement createStatement(int typeForwardOnly, int concurReadOnly) throws SQLException { + return this.connection.createStatement(typeForwardOnly, concurReadOnly); + } + + public DatabaseMetaData getMetaData() throws SQLException { + return this.connection.getMetaData(); + } + + public void use(String database, DbType dbType) { + if (this.hasStorageHandler()) { + this.getStorageHandler().use(database); + } else { + if (database == null || dbType == DbType.MS_ACCESS || dbType == DbType.BIGQUERY || dbType == DbType.AZURE) { + return; + } + + if (dbType == DbType.ORACLE) { + execute("ALTER SESSION SET current_schema = " + database); + } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) { + execute("SET search_path TO " + database); + } else if (dbType == DbType.TERADATA) { + execute("database " + database); + } else { + execute("USE " + database); + } + } + } + + public void execute(String sql) { + execute(sql, false); + } + + public void execute(String sql, boolean verbose) { + Statement statement = null; + try { + if (StringUtils.isEmpty(sql)) { + return; + } + + statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + for (String subQuery : sql.split(";")) { + if (verbose) { + String abbrSQL = subQuery.replace('\n', ' ').replace('\t', ' ').trim(); + if (abbrSQL.length() > 100) + abbrSQL = abbrSQL.substring(0, 100).trim() + "..."; + logger.info("Adding query to batch: " + abbrSQL); + } + + statement.addBatch(subQuery); + } + long start = System.currentTimeMillis(); + if (verbose) { + logger.info("Executing batch"); + } + statement.executeBatch(); + if (verbose) { + outputQueryStats(statement, System.currentTimeMillis() - start); + } + } catch (SQLException e) { + logger.error(sql); + logger.error(e.getMessage(), e); + } finally { + if (statement != null) { + try { + statement.close(); + } catch (SQLException e) { + logger.error(e.getMessage()); + } + } + } + } + + void outputQueryStats(Statement statement, long ms) throws SQLException { + Throwable warning = statement.getWarnings(); + if (warning != null) + logger.info("- SERVER: " + warning.getMessage()); + String timeString; + if (ms < 1000) + timeString = ms + " ms"; + else if (ms < 60000) + timeString = decimalFormat.format(ms / 1000d) + " seconds"; + else if (ms < 3600000) + timeString = decimalFormat.format(ms / 60000d) + " minutes"; + else + timeString = decimalFormat.format(ms / 3600000d) + " hours"; + logger.info("- Query completed in " + timeString); + } + + public List getTableNames(String database) { + if (this.hasStorageHandler()) { + return this.getStorageHandler().getTableNames(); + } else { + return getTableNamesClassic(database); + } + } + + public List fetchTableStructure(RichConnection connection, String database, String table, ScanParameters scanParameters) { + List fieldInfos = new ArrayList<>(); + + if (dbType.supportsStorageHandler()) { + fieldInfos = dbType.getStorageHandler().fetchTableStructure(table, scanParameters); + } else if (dbType == DbType.MS_ACCESS) { + ResultSet rs = getFieldNamesFromJDBC(table); + try { + while (rs.next()) { + FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME")); + fieldInfo.type = rs.getString("TYPE_NAME"); + fieldInfo.rowCount = connection.getTableSize(table); + fieldInfos.add(fieldInfo); + } + } catch (SQLException e) { + throw new RuntimeException(e.getMessage()); + } + } else { + String query = null; + if (dbType == DbType.ORACLE) + query = "SELECT COLUMN_NAME,DATA_TYPE FROM ALL_TAB_COLUMNS WHERE table_name = '" + table + "' AND owner = '" + database.toUpperCase() + "'"; + else if (dbType == DbType.SQL_SERVER || dbType == DbType.PDW) { + String trimmedDatabase = database; + if (database.startsWith("[") && database.endsWith("]")) + trimmedDatabase = database.substring(1, database.length() - 1); + String[] parts = table.split("\\."); + query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_CATALOG='" + trimmedDatabase + "' AND TABLE_SCHEMA='" + parts[0] + + "' AND TABLE_NAME='" + parts[1] + "';"; + } else if (dbType == DbType.AZURE) { + String[] parts = table.split("\\."); + query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='" + parts[0] + + "' AND TABLE_NAME='" + parts[1] + "';"; + } else if (dbType == DbType.MYSQL) + query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database + "' AND TABLE_NAME = '" + table + + "';"; + else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) + query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database.toLowerCase() + "' AND TABLE_NAME = '" + + table.toLowerCase() + "' ORDER BY ordinal_position;"; + else if (dbType == DbType.TERADATA) { + query = "SELECT ColumnName, ColumnType FROM dbc.columns WHERE DatabaseName= '" + database.toLowerCase() + "' AND TableName = '" + + table.toLowerCase() + "';"; + } else if (dbType == DbType.BIGQUERY) { + query = "SELECT column_name AS COLUMN_NAME, data_type as DATA_TYPE FROM " + database + ".INFORMATION_SCHEMA.COLUMNS WHERE table_name = \"" + table + "\";"; + } + + if (StringUtils.isEmpty(query)) { + throw new RuntimeException("No query was specified to obtain the table structure for DbType = " + dbType.name()); + } + + for (org.ohdsi.utilities.files.Row row : connection.query(query)) { + row.upperCaseFieldNames(); + org.ohdsi.databases.FieldInfo fieldInfo; + if (dbType == DbType.TERADATA) { + fieldInfo = new org.ohdsi.databases.FieldInfo(scanParameters, row.get("COLUMNNAME")); + } else { + fieldInfo = new org.ohdsi.databases.FieldInfo(scanParameters, row.get("COLUMN_NAME")); + } + if (dbType == DbType.TERADATA) { + fieldInfo.type = row.get("COLUMNTYPE"); + } else { + fieldInfo.type = row.get("DATA_TYPE"); + } + fieldInfo.rowCount = connection.getTableSize(table); + fieldInfos.add(fieldInfo); + } + } + return fieldInfos; + } + + public ResultSet getFieldNamesFromJDBC(String table) { + if (dbType == DbType.MS_ACCESS) { + try { + DatabaseMetaData metadata = connection.getMetaData(); + return metadata.getColumns(null, null, table, null); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage()); + } + } else { + throw new RuntimeException("DB is not of supported type"); + } + } + + public QueryResult fetchRowsFromTable(String table, long rowCount, ScanParameters scanParameters) { + String query = null; + int sampleSize = scanParameters.getSampleSize(); + + if (dbType.supportsStorageHandler()) { + query = dbType.getStorageHandler().getRowSampleQuery(table, rowCount, sampleSize); + } else if (sampleSize == -1) { + if (dbType == DbType.MS_ACCESS) + query = "SELECT * FROM [" + table + "]"; + else if (dbType == DbType.SQL_SERVER || dbType == DbType.PDW || dbType == DbType.AZURE) + query = "SELECT * FROM [" + table.replaceAll("\\.", "].[") + "]"; + else + query = "SELECT * FROM " + table; + } else { + if (dbType == DbType.SQL_SERVER || dbType == DbType.AZURE) + query = "SELECT * FROM [" + table.replaceAll("\\.", "].[") + "] TABLESAMPLE (" + sampleSize + " ROWS)"; + else if (dbType == DbType.MYSQL) + query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize; + else if (dbType == DbType.PDW) + query = "SELECT TOP " + sampleSize + " * FROM [" + table.replaceAll("\\.", "].[") + "] ORDER BY RAND()"; + else if (dbType == DbType.ORACLE) { + if (sampleSize < rowCount) { + double percentage = 100 * sampleSize / (double) rowCount; + if (percentage < 100) + query = "SELECT * FROM " + table + " SAMPLE(" + percentage + ")"; + } else { + query = "SELECT * FROM " + table; + } + } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) { + query = "SELECT * FROM " + table + " ORDER BY RANDOM() LIMIT " + sampleSize; + } + else if (dbType == DbType.MS_ACCESS) { + query = "SELECT " + "TOP " + sampleSize + " * FROM [" + table + "]"; + } + else if (dbType == DbType.BIGQUERY) { + query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize; + } + } + + + if (StringUtils.isEmpty(query)) { + throw new RuntimeException("No query was generated for database type " + dbType.name()); + } + + return createQueryResult(query); + } + + + private List getTableNamesClassic(String database) { + List names = new ArrayList<>(); + String query = null; + if (dbType == DbType.MYSQL) { + query = "SHOW TABLES IN " + database; + } else if (dbType == DbType.SQL_SERVER || dbType == DbType.PDW || dbType == DbType.AZURE) { + query = "SELECT CONCAT(schemas.name, '.', tables_views.name) FROM " + + "(SELECT schema_id, name FROM %1$s.sys.tables UNION ALL SELECT schema_id, name FROM %1$s.sys.views) tables_views " + + "INNER JOIN %1$s.sys.schemas ON tables_views.schema_id = schemas.schema_id " + + "ORDER BY schemas.name, tables_views.name"; + query = String.format(query, database); + logger.info(query); + } else if (dbType == DbType.ORACLE) { + query = "SELECT table_name FROM " + + "(SELECT table_name, owner FROM all_tables UNION ALL SELECT view_name, owner FROM all_views) tables_views " + + "WHERE owner='" + database.toUpperCase() + "'"; + } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) { + query = "SELECT table_name FROM information_schema.tables WHERE table_schema = '" + database.toLowerCase() + "' ORDER BY table_name"; + } else if (dbType == DbType.MS_ACCESS) { + query = "SELECT Name FROM sys.MSysObjects WHERE (Type=1 OR Type=5) AND Flags=0;"; + } else if (dbType == DbType.TERADATA) { + query = "SELECT TableName from dbc.tables WHERE tablekind IN ('T','V') and databasename='" + database + "'"; + } else if (dbType == DbType.BIGQUERY) { + query = "SELECT table_name from " + database + ".INFORMATION_SCHEMA.TABLES ORDER BY table_name;"; + } + + for (Row row : createQueryResult(query)) + names.add(row.get(row.getFieldNames().get(0))); + return names; + } + + private QueryResult createQueryResult(String sql) { + return new QueryResult(sql, this, verbose); + } + + public void close() throws SQLException { + if (this.hasStorageHandler()) { + this.getStorageHandler().close(); + } else { + this.connection.close(); + } + } + + public void setAutoCommit(boolean b) throws SQLException { + this.connection.setAutoCommit(b); + } + + public PreparedStatement prepareStatement(String statement) throws SQLException { + return this.connection.prepareStatement(statement); + } + + public void commit() throws SQLException { + this.connection.commit(); + } + + public void clearWarnings() throws SQLException { + this.connection.clearWarnings(); + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/DBConnector.java b/rabbit-core/src/main/java/org/ohdsi/databases/DBConnector.java index 33bc159a..4c3c8f63 100644 --- a/rabbit-core/src/main/java/org/ohdsi/databases/DBConnector.java +++ b/rabbit-core/src/main/java/org/ohdsi/databases/DBConnector.java @@ -18,37 +18,46 @@ package org.ohdsi.databases; import java.sql.Connection; +import java.sql.Driver; import java.sql.DriverManager; import java.sql.SQLException; +import java.util.Enumeration; import java.util.regex.Matcher; import java.util.regex.Pattern; import oracle.jdbc.pool.OracleDataSource; -import org.apache.tools.ant.types.selectors.SelectSelector; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; public class DBConnector { - public static void main(String[] args) { + public static DBConnection connect(DbSettings dbSettings, boolean verbose) { + assert dbSettings.dbType != null; + if (dbSettings.dbType.supportsStorageHandler()) { + return new DBConnection(dbSettings.dbType.getStorageHandler().getInstance(dbSettings), dbSettings.dbType, verbose); + } else { + return connect(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType, verbose); + } } // If dbType.BIGQUERY: domain field has been replaced with database field - public static Connection connect(String server, String domain, String user, String password, DbType dbType) { - if (dbType.equals(DbType.MYSQL)) - return DBConnector.connectToMySQL(server, user, password); - else if (dbType.equals(DbType.MSSQL) || dbType.equals(DbType.PDW) || dbType.equals(DbType.AZURE)) - return DBConnector.connectToMSSQL(server, domain, user, password); - else if (dbType.equals(DbType.ORACLE)) - return DBConnector.connectToOracle(server, domain, user, password); - else if (dbType.equals(DbType.POSTGRESQL)) - return DBConnector.connectToPostgreSQL(server, user, password); - else if (dbType.equals(DbType.MSACCESS)) - return DBConnector.connectToMsAccess(server, user, password); - else if (dbType.equals(DbType.REDSHIFT)) - return DBConnector.connectToRedshift(server, user, password); - else if (dbType.equals(DbType.TERADATA)) - return DBConnector.connectToTeradata(server, user, password); - else if (dbType.equals(DbType.BIGQUERY)) - return DBConnector.connectToBigQuery(server, domain, user, password); + private static DBConnection connect(String server, String domain, String user, String password, DbType dbType, boolean verbose) { + if (dbType.equalsDbType(DbType.MYSQL)) + return new DBConnection(DBConnector.connectToMySQL(server, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.SQL_SERVER) || dbType.equalsDbType(DbType.PDW) || dbType.equalsDbType(DbType.AZURE)) + return new DBConnection(DBConnector.connectToMSSQL(server, domain, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.ORACLE)) + return new DBConnection(DBConnector.connectToOracle(server, domain, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.POSTGRESQL)) + return new DBConnection(DBConnector.connectToPostgreSQL(server, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.MS_ACCESS)) + return new DBConnection(DBConnector.connectToMsAccess(server, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.REDSHIFT)) + return new DBConnection(DBConnector.connectToRedshift(server, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.TERADATA)) + return new DBConnection(DBConnector.connectToTeradata(server, user, password), dbType, verbose); + else if (dbType.equalsDbType(DbType.BIGQUERY)) + return new DBConnection(DBConnector.connectToBigQuery(server, domain, user, password), dbType, verbose); else return null; } @@ -110,10 +119,9 @@ public static Connection connectToPostgreSQL(String server, String user, String final String jdbcProtocol = "jdbc:postgresql://"; String url = (!server.startsWith(jdbcProtocol) ? jdbcProtocol : "") + server; try { - System.out.printf("DriverManager.getConnection(%s, %s, %s)%n", url, user, password); return DriverManager.getConnection(url, user, password); } catch (SQLException e1) { - throw new RuntimeException("Cannot connect to DB server: " + e1.getMessage() + " for url: " + url); + throw new RuntimeException("Cannot connect to DB server: " + e1.getMessage()); } } @@ -253,5 +261,45 @@ public static Connection connectToBigQuery(String server, String domain, String throw new RuntimeException("Simba URL failed: Cannot connect to DB server: " + e1.getMessage()); } } + + /* + * main() can be run to verify that all configured JDBC drivers are loadable + */ + public static void main(String[] args) { + verifyDrivers(); + } + + public static final String ALL_JDBC_DRIVERS_LOADABLE = "All configured JDBC drivers could be loaded."; + static void verifyDrivers() { + // verify that a JDBC driver that is not included/supported cannot be loaded + String notSupportedDriver = "org.sqlite.JDBC"; // change this if WhiteRabbit starts supporting SQLite + if (DbType.driverNames().contains(notSupportedDriver)) { + throw new RuntimeException("Cannot run this test for a supported driver."); + } + try { + testJDBCDriverAndVersion(notSupportedDriver); + throw new RuntimeException(String.format("JDBC driver was not expected to be loaded: %s", notSupportedDriver)); + } catch (ClassNotFoundException ignored) {} + + DbType.driverNames().forEach(driver -> { + try { + testJDBCDriverAndVersion(driver); + } catch (ClassNotFoundException e) { + throw new RuntimeException(String.format("JDBC driver class could not be loaded: %s", driver)); + } + }); + System.out.println(ALL_JDBC_DRIVERS_LOADABLE); + } + + static void testJDBCDriverAndVersion(String driverName) throws ClassNotFoundException { + Enumeration drivers = DriverManager.getDrivers(); + while (drivers.hasMoreElements()) { + Driver driver = drivers.nextElement(); + Class driverClass = Class.forName(driverName); + if (driver.getClass().isAssignableFrom(driverClass)) { + int ignoredMajorVersion = driver.getMajorVersion(); + } + } + } } diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/DBRowIterator.java b/rabbit-core/src/main/java/org/ohdsi/databases/DBRowIterator.java new file mode 100644 index 00000000..20a6687d --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/DBRowIterator.java @@ -0,0 +1,123 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.ohdsi.utilities.files.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +class DBRowIterator implements Iterator { + static Logger logger = LoggerFactory.getLogger(DBRowIterator.class); + + private ResultSet resultSet; + + private boolean hasNext; + + private Set columnNames = new HashSet<>(); + + public DBRowIterator(String sql, RichConnection richConnection) { + new DBRowIterator(sql, richConnection.getConnection(), richConnection.isVerbose()); + } + public DBRowIterator(String sql, DBConnection dbConnection, boolean verbose) { + Statement statement; + try { + sql.trim(); + if (sql.endsWith(";")) + sql = sql.substring(0, sql.length() - 1); + if (verbose) { + String abbrSQL = sql.replace('\n', ' ').replace('\t', ' ').trim(); + if (abbrSQL.length() > 100) + abbrSQL = abbrSQL.substring(0, 100).trim() + "..."; + logger.info("Executing query: {}", abbrSQL); + } + long start = System.currentTimeMillis(); + statement = dbConnection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + resultSet = statement.executeQuery(sql); + hasNext = resultSet.next(); + if (verbose) + dbConnection.outputQueryStats(statement, System.currentTimeMillis() - start); + } catch (SQLException e) { + logger.error(sql, e.getMessage()); + throw new RuntimeException(e); + } + } + + public void close() { + if (resultSet != null) { + try { + resultSet.close(); + } catch (SQLException e) { + e.printStackTrace(); + } + resultSet = null; + hasNext = false; + } + } + + @Override + public boolean hasNext() { + return hasNext; + } + + @Override + public Row next() { + try { + Row row = new Row(); + ResultSetMetaData metaData; + metaData = resultSet.getMetaData(); + columnNames.clear(); + + for (int i = 1; i < metaData.getColumnCount() + 1; i++) { + String columnName = metaData.getColumnName(i); + if (columnNames.add(columnName)) { + String value; + try { + value = resultSet.getString(i); + } catch (Exception e) { + value = ""; + } + if (value == null) + value = ""; + + row.add(columnName, value.replace(" 00:00:00", "")); + } + } + hasNext = resultSet.next(); + if (!hasNext) { + resultSet.close(); + resultSet = null; + } + return row; + } catch (SQLException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/DataType.java b/rabbit-core/src/main/java/org/ohdsi/databases/DataType.java new file mode 100644 index 00000000..0b82546e --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/DataType.java @@ -0,0 +1,22 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +public enum DataType { + EMPTY, TEXT, DATE, INT, REAL, VARCHAR; +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/DbType.java b/rabbit-core/src/main/java/org/ohdsi/databases/DbType.java deleted file mode 100644 index 4942deb0..00000000 --- a/rabbit-core/src/main/java/org/ohdsi/databases/DbType.java +++ /dev/null @@ -1,52 +0,0 @@ -/******************************************************************************* - * Copyright 2019 Observational Health Data Sciences and Informatics - * - * This file is part of WhiteRabbit - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -package org.ohdsi.databases; - -public class DbType { - public static DbType MYSQL = new DbType("mysql"); - public static DbType MSSQL = new DbType("mssql"); - public static DbType PDW = new DbType("pdw"); - public static DbType ORACLE = new DbType("oracle"); - public static DbType POSTGRESQL = new DbType("postgresql"); - public static DbType MSACCESS = new DbType("msaccess"); - public static DbType REDSHIFT = new DbType("redshift"); - public static DbType TERADATA = new DbType("teradata"); - public static DbType BIGQUERY = new DbType("bigquery"); - public static DbType AZURE = new DbType("azure"); - - private enum Type { - MYSQL, MSSQL, PDW, ORACLE, POSTGRESQL, MSACCESS, REDSHIFT, TERADATA, BIGQUERY, AZURE - }; - - private Type type; - - public DbType(String type) { - this.type = Type.valueOf(type.toUpperCase()); - } - - public boolean equals(Object other) { - if (other instanceof DbType && ((DbType) other).type == type) - return true; - else - return false; - } - - public String getTypeName() { - return this.type.name(); - } -} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/FieldInfo.java b/rabbit-core/src/main/java/org/ohdsi/databases/FieldInfo.java new file mode 100644 index 00000000..8d19fcb7 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/FieldInfo.java @@ -0,0 +1,255 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.ohdsi.utilities.DateUtilities; +import org.ohdsi.utilities.StringUtilities; +import org.ohdsi.utilities.collections.CountingSet; +import org.ohdsi.utilities.collections.Pair; + +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class FieldInfo { + private final ScanParameters scanParameters; + public String type; + public String name; + public String label; + public CountingSet valueCounts = new CountingSet<>(); + public long sumLength = 0; + public int maxLength = 0; + public long nProcessed = 0; + public long emptyCount = 0; + public long uniqueCount = 0; + public long rowCount = -1; + public boolean isInteger = true; + public boolean isReal = true; + public boolean isDate = true; + public boolean isFreeText = false; + public boolean tooManyValues = false; + public UniformSamplingReservoir samplingReservoir; + public Object average; + public Object stdev; + public Object minimum; + public Object maximum; + public Object q1; + public Object q2; + public Object q3; + + public FieldInfo(ScanParameters scanParameters, String name) { + this.scanParameters = scanParameters; + this.name = name; + if (scanParameters.doCalculateNumericStats()) { + this.samplingReservoir = new UniformSamplingReservoir(scanParameters.getNumStatsSamplerSize()); + } + } + + public void trim() { + // Only keep values that are used in scan report + if (valueCounts.size() > scanParameters.getMaxValues()) { + valueCounts.keepTopN(scanParameters.getMaxValues()); + } + + // Calculate numeric stats and dereference sampling reservoir to save memory. + if (scanParameters.doCalculateNumericStats()) { + average = getAverage(); + stdev = getStandardDeviation(); + minimum = getMinimum(); + maximum = getMaximum(); + q1 = getQ1(); + q2 = getQ2(); + q3 = getQ3(); + } + samplingReservoir = null; + } + + public boolean hasValuesTrimmed() { + return tooManyValues; + } + + public Double getFractionEmpty() { + if (nProcessed == 0) + return 1d; + else + return emptyCount / (double) nProcessed; + } + + public String getTypeDescription() { + if (type != null) + return type; + else if (!scanParameters.doScanValues()) // If not type assigned and not values scanned, do not derive + return ""; + else if (nProcessed == emptyCount) + return DataType.EMPTY.name(); + else if (isFreeText) + return DataType.TEXT.name(); + else if (isDate) + return DataType.DATE.name(); + else if (isInteger) + return DataType.INT.name(); + else if (isReal) + return DataType.REAL.name(); + else + return DataType.VARCHAR.name(); + } + + public Double getFractionUnique() { + if (nProcessed == 0 || uniqueCount == 1) { + return 0d; + } else { + return uniqueCount / (double) nProcessed; + } + + } + + public void processValue(String value) { + nProcessed++; + sumLength += value.length(); + if (value.length() > maxLength) + maxLength = value.length(); + + String trimValue = value.trim(); + if (trimValue.length() == 0) + emptyCount++; + + if (!isFreeText) { + boolean newlyAdded = valueCounts.add(value); + if (newlyAdded) uniqueCount++; + + if (trimValue.length() != 0) { + evaluateDataType(trimValue); + } + + if (nProcessed == ScanParameters.N_FOR_FREE_TEXT_CHECK && !isInteger && !isReal && !isDate) { + doFreeTextCheck(); + } + } else { + valueCounts.addAll(StringUtilities.mapToWords(trimValue.toLowerCase())); + } + + // if over this large constant number, then trimmed back to size used in report (maxValues). + if (!tooManyValues && valueCounts.size() > ScanParameters.MAX_VALUES_IN_MEMORY) { + tooManyValues = true; + this.trim(); + } + + if (scanParameters.doCalculateNumericStats() && !trimValue.isEmpty()) { + if (isInteger || isReal) { + samplingReservoir.add(Double.parseDouble(trimValue)); + } else if (isDate) { + samplingReservoir.add(DateUtilities.parseDate(trimValue)); + } + } + + } + + public List> getSortedValuesWithoutSmallValues() { + List> result = valueCounts.key2count.entrySet().stream() + .filter(e -> e.getValue().count >= scanParameters.getMinCellCount()) + .sorted(Comparator.>comparingInt(e -> e.getValue().count).reversed()) + .limit(scanParameters.getMaxValues()) + .map(e -> new Pair<>(e.getKey(), e.getValue().count)) + .collect(Collectors.toCollection(ArrayList::new)); + + if (result.size() < valueCounts.key2count.size()) { + result.add(new Pair<>("List truncated...", -1)); + } + return result; + } + + private void evaluateDataType(String value) { + if (isReal && !StringUtilities.isNumber(value)) + isReal = false; + if (isInteger && !StringUtilities.isLong(value)) + isInteger = false; + if (isDate && !StringUtilities.isDate(value)) + isDate = false; + } + + private void doFreeTextCheck() { + double averageLength = sumLength / (double) (nProcessed - emptyCount); + if (averageLength >= ScanParameters.MIN_AVERAGE_LENGTH_FOR_FREE_TEXT) { + isFreeText = true; + // Reset value count to word count + CountingSet wordCounts = new CountingSet<>(); + for (Map.Entry entry : valueCounts.key2count.entrySet()) + for (String word : StringUtilities.mapToWords(entry.getKey().toLowerCase())) + wordCounts.add(word, entry.getValue().count); + valueCounts = wordCounts; + } + } + + private Object formatNumericValue(double value) { + return formatNumericValue(value, false); + } + + private Object formatNumericValue(double value, boolean dateAsDays) { + if (nProcessed == 0) { + return Double.NaN; + } else if (getTypeDescription().equals(DataType.EMPTY.name())) { + return Double.NaN; + } else if (isInteger || isReal) { + return value; + } else if (isDate && dateAsDays) { + return value; + } else if (isDate) { + return LocalDate.ofEpochDay((long) value).toString(); + } else { + return Double.NaN; + } + } + + private Object getMinimum() { + double min = samplingReservoir.getPopulationMinimum(); + return formatNumericValue(min); + } + + private Object getMaximum() { + double max = samplingReservoir.getPopulationMaximum(); + return formatNumericValue(max); + } + + private Object getAverage() { + double average = samplingReservoir.getPopulationMean(); + return formatNumericValue(average); + } + + private Object getStandardDeviation() { + double stddev = samplingReservoir.getSampleStandardDeviation(); + return formatNumericValue(stddev, true); + } + + private Object getQ1() { + double q1 = samplingReservoir.getSampleQuartiles().get(0); + return formatNumericValue(q1); + } + + private Object getQ2() { + double q2 = samplingReservoir.getSampleQuartiles().get(1); + return formatNumericValue(q2); + } + + private Object getQ3() { + double q3 = samplingReservoir.getSampleQuartiles().get(2); + return formatNumericValue(q3); + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/QueryResult.java b/rabbit-core/src/main/java/org/ohdsi/databases/QueryResult.java new file mode 100644 index 00000000..e44de6f5 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/QueryResult.java @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.ohdsi.utilities.files.Row; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class QueryResult implements Iterable { + private String sql; + + private List iterators = new ArrayList<>(); + private DBConnection dbConnection; + + public QueryResult(String sql, DBConnection dbConnection) { + this(sql, dbConnection, false); + } + + public QueryResult(String sql, DBConnection dbConnection, boolean verbose) { + this.sql = sql; + this.dbConnection = dbConnection; + } + + @Override + public Iterator iterator() { + DBRowIterator iterator = new DBRowIterator(sql, dbConnection, false); + iterators.add(iterator); + return iterator; + } + + public void close() { + for (DBRowIterator iterator : iterators) { + iterator.close(); + } + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/RichConnection.java b/rabbit-core/src/main/java/org/ohdsi/databases/RichConnection.java index 17f691e7..77b563d1 100644 --- a/rabbit-core/src/main/java/org/ohdsi/databases/RichConnection.java +++ b/rabbit-core/src/main/java/org/ohdsi/databases/RichConnection.java @@ -19,168 +19,76 @@ import java.io.Closeable; import java.sql.BatchUpdateException; -import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; import java.sql.ResultSet; -import java.sql.ResultSetMetaData; import java.sql.SQLException; -import java.sql.Statement; import java.sql.Types; -import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; import org.ohdsi.utilities.SimpleCounter; import org.ohdsi.utilities.StringUtilities; import org.ohdsi.utilities.files.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class RichConnection implements Closeable { - public static int INSERT_BATCH_SIZE = 100000; - private Connection connection; - private boolean verbose = false; - private static DecimalFormat decimalFormat = new DecimalFormat("#.#"); - private DbType dbType; + Logger logger = LoggerFactory.getLogger(RichConnection.class); - public RichConnection(String server, String domain, String user, String password, DbType dbType) { - this.connection = DBConnector.connect(server, domain, user, password, dbType); - this.dbType = dbType; + public static int INSERT_BATCH_SIZE = 100000; + private DBConnection connection; + private boolean verbose = false; + private DbType dbType; + + public RichConnection(DbSettings dbSettings) { + this.connection = DBConnector.connect(dbSettings, verbose); + this.dbType = dbSettings.dbType; } /** * Execute the given SQL statement. - * + * * @param sql */ public void execute(String sql) { - Statement statement = null; - try { - if (sql.length() == 0) - return; - - statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - for (String subQuery : sql.split(";")) { - if (verbose) { - String abbrSQL = subQuery.replace('\n', ' ').replace('\t', ' ').trim(); - if (abbrSQL.length() > 100) - abbrSQL = abbrSQL.substring(0, 100).trim() + "..."; - System.out.println("Adding query to batch: " + abbrSQL); - } - - statement.addBatch(subQuery); - } - long start = System.currentTimeMillis(); - if (verbose) - System.out.println("Executing batch"); - statement.executeBatch(); - if (verbose) - outputQueryStats(statement, System.currentTimeMillis() - start); - } catch (SQLException e) { - System.err.println(sql); - e.printStackTrace(); - } finally { - if (statement != null) { - try { - statement.close(); - } catch (SQLException e) { - // TODO Auto-generated catch block - System.err.println(e.getMessage()); - } - } - } - } - - private void outputQueryStats(Statement statement, long ms) throws SQLException { - Throwable warning = statement.getWarnings(); - if (warning != null) - System.out.println("- SERVER: " + warning.getMessage()); - String timeString; - if (ms < 1000) - timeString = ms + " ms"; - else if (ms < 60000) - timeString = decimalFormat.format(ms / 1000d) + " seconds"; - else if (ms < 3600000) - timeString = decimalFormat.format(ms / 60000d) + " minutes"; - else - timeString = decimalFormat.format(ms / 3600000d) + " hours"; - System.out.println("- Query completed in " + timeString); + connection.execute(sql, verbose); } /** * Query the database using the provided SQL statement. - * + * * @param sql * @return */ public QueryResult query(String sql) { - return new QueryResult(sql); + return new QueryResult(sql, connection, verbose); } /** * Switch the database to use. - * + * * @param database */ public void use(String database) { - if (database == null || dbType == DbType.MSACCESS || dbType == DbType.BIGQUERY || dbType == DbType.AZURE) { - return; - } - - if (dbType == DbType.ORACLE) { - execute("ALTER SESSION SET current_schema = " + database); - } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) { - execute("SET search_path TO " + database); - } else if (dbType == DbType.TERADATA) { - execute("database " + database); - } else { - execute("USE " + database); - } + connection.use(database, dbType); } public List getTableNames(String database) { - List names = new ArrayList<>(); - String query = null; - if (dbType == DbType.MYSQL) { - query = "SHOW TABLES IN " + database; - } else if (dbType == DbType.MSSQL || dbType == DbType.PDW || dbType == DbType.AZURE) { - query = "SELECT CONCAT(schemas.name, '.', tables_views.name) FROM " + - "(SELECT schema_id, name FROM %1$s.sys.tables UNION ALL SELECT schema_id, name FROM %1$s.sys.views) tables_views " + - "INNER JOIN %1$s.sys.schemas ON tables_views.schema_id = schemas.schema_id " + - "ORDER BY schemas.name, tables_views.name"; - query = String.format(query, database); - System.out.println(query); - } else if (dbType == DbType.ORACLE) { - query = "SELECT table_name FROM " + - "(SELECT table_name, owner FROM all_tables UNION ALL SELECT view_name, owner FROM all_views) tables_views " + - "WHERE owner='" + database.toUpperCase() + "'"; - } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) { - query = "SELECT table_name FROM information_schema.tables WHERE table_schema = '" + database.toLowerCase() + "' ORDER BY table_name"; - } else if (dbType == DbType.MSACCESS) { - query = "SELECT Name FROM sys.MSysObjects WHERE (Type=1 OR Type=5) AND Flags=0;"; - } else if (dbType == DbType.TERADATA) { - query = "SELECT TableName from dbc.tables WHERE tablekind IN ('T','V') and databasename='" + database + "'"; - } else if (dbType == DbType.BIGQUERY) { - query = "SELECT table_name from " + database + ".INFORMATION_SCHEMA.TABLES ORDER BY table_name;"; - } + return connection.getTableNames(database); + } - for (Row row : query(query)) - names.add(row.get(row.getFieldNames().get(0))); - return names; + public List fetchTableStructure(RichConnection connection, String database, String table, ScanParameters scanParameters) { + return this.connection.fetchTableStructure(this, database, table, scanParameters); } - public ResultSet getMsAccessFieldNames(String table) { - if (dbType == DbType.MSACCESS) { - try { - DatabaseMetaData metadata = connection.getMetaData(); - return metadata.getColumns(null, null, table, null); - } catch (SQLException e) { - throw new RuntimeException(e.getMessage()); - } - } else - throw new RuntimeException("DB is not of type MS Access"); + public QueryResult fetchRowsFromTable(String table, long rowCount, ScanParameters scanParameters) { + return this.connection.fetchRowsFromTable(table, rowCount, scanParameters); } /** @@ -192,9 +100,9 @@ public ResultSet getMsAccessFieldNames(String table) { public long getTableSize(String tableName) { QueryResult qr; long returnVal; - if (dbType == DbType.MSSQL || dbType == DbType.PDW || dbType == DbType.AZURE) + if (dbType == DbType.SQL_SERVER || dbType == DbType.PDW || dbType == DbType.AZURE) qr = query("SELECT COUNT_BIG(*) FROM [" + tableName.replaceAll("\\.", "].[") + "];"); - else if (dbType == DbType.MSACCESS) + else if (dbType == DbType.MS_ACCESS) qr = query("SELECT COUNT(*) FROM [" + tableName + "];"); else qr = query("SELECT COUNT(*) FROM " + tableName + ";"); @@ -217,37 +125,19 @@ public void close() { try { connection.close(); } catch (SQLException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); } } public void setVerbose(boolean verbose) { - this.verbose = verbose; + this.connection.setVerbose(verbose); } - public class QueryResult implements Iterable { - private String sql; - - private List iterators = new ArrayList<>(); - - public QueryResult(String sql) { - this.sql = sql; - } - - @Override - public Iterator iterator() { - DBRowIterator iterator = new DBRowIterator(sql); - iterators.add(iterator); - return iterator; - } - - public void close() { - for (DBRowIterator iterator : iterators) { - iterator.close(); - } - } + public DBConnection getConnection() { + return connection; } + /** * Inserts the rows into a table in the database. * @@ -263,8 +153,9 @@ public void insertIntoTable(Iterator iterator, String table, boolean create SimpleCounter counter = new SimpleCounter(1000000, true); while (iterator.hasNext()) { if (batch.size() == INSERT_BATCH_SIZE) { - if (first && create) + if (first && create) { createTable(table, batch); + } insert(table, batch); batch.clear(); first = false; @@ -272,24 +163,29 @@ public void insertIntoTable(Iterator iterator, String table, boolean create batch.add(iterator.next()); counter.count(); } - if (batch.size() != 0) { - if (first && create) + if (!batch.isEmpty()) { + if (first && create) { createTable(table, batch); + } insert(table, batch); } } + boolean isVerbose() { + return connection.isVerbose(); + } + private void insert(String tableName, List rows) { List columns; columns = rows.get(0).getFieldNames(); - for (int i = 0; i < columns.size(); i++) - columns.set(i, columnNameToSqlName(columns.get(i))); + columns.replaceAll(this::columnNameToSqlName); StringBuilder sql = new StringBuilder("INSERT INTO " + tableName); sql.append(" (").append(StringUtilities.join(columns, ",")).append(")"); sql.append(" VALUES (?"); - for (int i = 1; i < columns.size(); i++) + for (int i = 1; i < columns.size(); i++) { sql.append(",?"); + } sql.append(")"); try { connection.setAutoCommit(false); @@ -297,16 +193,16 @@ private void insert(String tableName, List rows) { for (Row row : rows) { for (int i = 0; i < columns.size(); i++) { String value = row.get(columns.get(i)); - if (value == null) - System.out.println(row.toString()); - else if (value.length() == 0) + if (value == null) { + logger.info(row.toString()); + } else if (value.isEmpty()) { value = null; - // System.out.println(value); - if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) // PostgreSQL does not allow unspecified types + } + if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) {// PostgreSQL does not allow unspecified types statement.setObject(i + 1, value, Types.OTHER); + } else if (dbType == DbType.ORACLE) { if (isDate(value)) { - // System.out.println(value); statement.setDate(i + 1, java.sql.Date.valueOf(value)); } else @@ -322,9 +218,9 @@ else if (dbType == DbType.ORACLE) { connection.setAutoCommit(true); connection.clearWarnings(); } catch (SQLException e) { - e.printStackTrace(); + logger.error(e.getMessage(), e); if (e instanceof BatchUpdateException) { - System.err.println(e.getNextException().getMessage()); + logger.error(e.getNextException().getMessage()); } } } @@ -351,25 +247,25 @@ private static boolean isDate(String string) { private Set createTable(String tableName, List rows) { Set numericFields = new HashSet<>(); Row firstRow = rows.get(0); - List fields = new ArrayList<>(rows.size()); + List fields = new ArrayList<>(rows.size()); for (String field : firstRow.getFieldNames()) - fields.add(new FieldInfo(field)); + fields.add(new NumericFieldInfo(field)); for (Row row : rows) { - for (FieldInfo fieldInfo : fields) { - String value = row.get(fieldInfo.name); - if (fieldInfo.isNumeric && !StringUtilities.isInteger(value)) - fieldInfo.isNumeric = false; - if (value.length() > fieldInfo.maxLength) - fieldInfo.maxLength = value.length(); + for (NumericFieldInfo numericFieldInfo : fields) { + String value = row.get(numericFieldInfo.name); + if (numericFieldInfo.isNumeric && !StringUtilities.isInteger(value)) + numericFieldInfo.isNumeric = false; + if (value.length() > numericFieldInfo.maxLength) + numericFieldInfo.maxLength = value.length(); } } StringBuilder sql = new StringBuilder(); sql.append("CREATE TABLE ").append(tableName).append(" (\n"); - for (FieldInfo fieldInfo : fields) { - sql.append(" ").append(fieldInfo.toString()).append(",\n"); - if (fieldInfo.isNumeric) - numericFields.add(fieldInfo.name); + for (NumericFieldInfo numericFieldInfo : fields) { + sql.append(" ").append(numericFieldInfo.toString()).append(",\n"); + if (numericFieldInfo.isNumeric) + numericFields.add(numericFieldInfo.name); } sql.append(");"); execute(sql.toString()); @@ -380,12 +276,12 @@ private String columnNameToSqlName(String name) { return name.replaceAll(" ", "_").replace("-", "_").replace(",", "_").replaceAll("_+", "_"); } - private class FieldInfo { + private class NumericFieldInfo { public String name; public boolean isNumeric = true; public int maxLength = 0; - public FieldInfo(String name) { + public NumericFieldInfo(String name) { this.name = name; } @@ -397,7 +293,7 @@ else if (maxLength > 255) return columnNameToSqlName(name) + " text"; else return columnNameToSqlName(name) + " varchar(255)"; - } else if (dbType == DbType.MSSQL || dbType == DbType.PDW || dbType == DbType.AZURE) { + } else if (dbType == DbType.SQL_SERVER || dbType == DbType.PDW || dbType == DbType.AZURE) { if (isNumeric) { if (maxLength < 10) return columnNameToSqlName(name) + " int"; @@ -411,94 +307,4 @@ else if (maxLength > 255) throw new RuntimeException("Create table syntax not specified for type " + dbType); } } - - private class DBRowIterator implements Iterator { - - private ResultSet resultSet; - - private boolean hasNext; - - private Set columnNames = new HashSet<>(); - - public DBRowIterator(String sql) { - Statement statement; - try { - sql.trim(); - if (sql.endsWith(";")) - sql = sql.substring(0, sql.length() - 1); - if (verbose) { - String abbrSQL = sql.replace('\n', ' ').replace('\t', ' ').trim(); - if (abbrSQL.length() > 100) - abbrSQL = abbrSQL.substring(0, 100).trim() + "..."; - System.out.println("Executing query: " + abbrSQL); - } - long start = System.currentTimeMillis(); - statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); - resultSet = statement.executeQuery(sql); - hasNext = resultSet.next(); - if (verbose) - outputQueryStats(statement, System.currentTimeMillis() - start); - } catch (SQLException e) { - System.err.println(sql); - System.err.println(e.getMessage()); - throw new RuntimeException(e); - } - } - - public void close() { - if (resultSet != null) { - try { - resultSet.close(); - } catch (SQLException e) { - e.printStackTrace(); - } - resultSet = null; - hasNext = false; - } - } - - @Override - public boolean hasNext() { - return hasNext; - } - - @Override - public Row next() { - try { - Row row = new Row(); - ResultSetMetaData metaData; - metaData = resultSet.getMetaData(); - columnNames.clear(); - - for (int i = 1; i < metaData.getColumnCount() + 1; i++) { - String columnName = metaData.getColumnName(i); - if (columnNames.add(columnName)) { - String value; - try { - value = resultSet.getString(i); - } catch (Exception e) { - value = ""; - } - if (value == null) - value = ""; - - row.add(columnName, value.replace(" 00:00:00", "")); - } - } - hasNext = resultSet.next(); - if (!hasNext) { - resultSet.close(); - resultSet = null; - } - return row; - } catch (SQLException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - @Override - public void remove() { - } - } } diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/ScanParameters.java b/rabbit-core/src/main/java/org/ohdsi/databases/ScanParameters.java new file mode 100644 index 00000000..8cbe5669 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/ScanParameters.java @@ -0,0 +1,38 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +public interface ScanParameters { + + public boolean doCalculateNumericStats(); + + public int getNumStatsSamplerSize(); + + public int getMaxValues(); + + public boolean doScanValues(); + + public int getMinCellCount(); + + public int getSampleSize(); + + public static int MAX_VALUES_IN_MEMORY = 100000; + public static int MIN_CELL_COUNT_FOR_CSV = 1000000; + public static int N_FOR_FREE_TEXT_CHECK = 1000; + public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100; +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/SnowflakeHandler.java b/rabbit-core/src/main/java/org/ohdsi/databases/SnowflakeHandler.java new file mode 100644 index 00000000..1c32670b --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/SnowflakeHandler.java @@ -0,0 +1,289 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.apache.commons.lang.StringUtils; + +import java.sql.*; +import java.util.Arrays; +import java.util.List; + +import org.ohdsi.databases.configuration.*; +import org.ohdsi.utilities.collections.Pair; +import org.ohdsi.utilities.files.IniFile; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.ohdsi.databases.SnowflakeHandler.SnowflakeConfiguration.*; + +/* + * SnowflakeHandler implements all Snowflake specific logic required to connect to, and query, a Snowflake instance. + * + * It is implemented as a Singleton, using the enum pattern es described here: https://www.baeldung.com/java-singleton + */ +public enum SnowflakeHandler implements StorageHandler { + INSTANCE(); + + final static Logger logger = LoggerFactory.getLogger(SnowflakeHandler.class); + + DBConfiguration configuration = new SnowflakeConfiguration(); + private DBConnection snowflakeConnection = null; + + private final DbType dbType = DbType.SNOWFLAKE; + public static final String ERROR_NO_FIELD_OF_TYPE = "No value was specified for type"; + public static final String ERROR_INCORRECT_SCHEMA_SPECIFICATION = + "Database should be specified as 'warehouse.database.schema', " + + "e.g. 'computewh.snowflake_sample_data.weather"; + public static final String ERROR_CONNECTION_NOT_INITIALIZED = + "Snowflake Database connection has not been initialized."; + + SnowflakeHandler() { + } + + public void resetConnection() throws SQLException { + if (this.snowflakeConnection != null) { + this.snowflakeConnection.close(); + } + this.snowflakeConnection = null; + } + + @Override + public StorageHandler getInstance(DbSettings dbSettings) { + if (snowflakeConnection == null) { + snowflakeConnection = connectToSnowflake(dbSettings); + } + + return INSTANCE; + } + + public static Pair getConfiguration(IniFile iniFile, ValidationFeedback feedback) { + SnowflakeConfiguration configuration = new SnowflakeConfiguration(); + ValidationFeedback currentFeedback = configuration.loadAndValidateConfiguration(iniFile); + if (feedback != null) { + feedback.add(currentFeedback); + } + + String warehouse = configuration.getValue(SNOWFLAKE_WAREHOUSE); + DbSettings dbSettings = new DbSettings(); + dbSettings.dbType = DbType.SNOWFLAKE; + dbSettings.server = String.format("https://%s.snowflakecomputing.com", configuration.getValue(SNOWFLAKE_ACCOUNT)); + dbSettings.database = String.format("%s.%s.%s", + warehouse, + configuration.getValue(SNOWFLAKE_DATABASE), + configuration.getValue(SNOWFLAKE_SCHEMA)); + dbSettings.domain = dbSettings.database; + dbSettings.user = configuration.getValue(SNOWFLAKE_USER); + dbSettings.password = configuration.getValue(SNOWFLAKE_PASSWORD); + dbSettings.sourceType = DbSettings.SourceType.DATABASE; + + return new Pair<>(configuration, dbSettings); + } + + public DBConnection getDBConnection() { + this.checkInitialised(); + return this.snowflakeConnection; + } + + @Override + public String getTableSizeQuery(String tableName) { + return String.format("SELECT COUNT(*) FROM %s.%s.%s;", this.getDatabase(), this.getSchema(), tableName); + } + + public String getRowSampleQuery(String table, long rowCount, long sampleSize) { + return String.format("SELECT * FROM %s ORDER BY RANDOM() LIMIT %s", table, sampleSize); + } + + public String getTablesQuery(String database) { + return String.format("SELECT TABLE_NAME FROM %s.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '%s'", this.getDatabase().toUpperCase(), this.getSchema().toUpperCase()); + } + + @Override + public void checkInitialised() throws DBConfigurationException { + if (this.snowflakeConnection == null) { + throw new DBConfigurationException("Snowflake DB/connection was not initialized"); + } + } + + public DbType getDbType() { + return this.dbType; + } + + private static DBConnection connectToSnowflake(DbSettings dbSettings) { + try { + Class.forName("net.snowflake.client.jdbc.SnowflakeDriver"); + } catch (ClassNotFoundException ex) { + throw new RuntimeException("Cannot find JDBC driver. Make sure the file snowflake-jdbc-x.xx.xx.jar is in the path: " + ex.getMessage()); + } + String url = buildUrl(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, INSTANCE.configuration.getValue(SNOWFLAKE_AUTHENTICATOR)); + try { + return new DBConnection(DriverManager.getConnection(url), DbType.SNOWFLAKE, false); + } catch (SQLException ex) { + throw new RuntimeException("Cannot connect to Snowflake server: " + ex.getMessage()); + } + } + + public ResultSet getFieldNames(String table) { + try { + DatabaseMetaData metadata = this.snowflakeConnection.getMetaData(); + return metadata.getColumns(null, null, table, null); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage()); + } + } + + public DBConfiguration getDBConfiguration() { + + return this.configuration; + } + public static class SnowflakeConfiguration extends DBConfiguration { + public static final String SNOWFLAKE_ACCOUNT = "SNOWFLAKE_ACCOUNT"; + public static final String TOOLTIP_SNOWFLAKE_ACCOUNT = "Account for the Snowflake instance"; + public static final String SNOWFLAKE_USER = "SNOWFLAKE_USER"; + public static final String SNOWFLAKE_PASSWORD = "SNOWFLAKE_PASSWORD"; + public static final String SNOWFLAKE_AUTHENTICATOR = "SNOWFLAKE_AUTHENTICATOR"; + public static final String SNOWFLAKE_WAREHOUSE = "SNOWFLAKE_WAREHOUSE"; + public static final String SNOWFLAKE_DATABASE = "SNOWFLAKE_DATABASE"; + public static final String SNOWFLAKE_SCHEMA = "SNOWFLAKE_SCHEMA"; + public static final String ERROR_MUST_SET_PASSWORD_OR_AUTHENTICATOR = "Either password or authenticator must be specified for Snowflake"; + public static final String ERROR_MUST_NOT_SET_PASSWORD_AND_AUTHENTICATOR = "Specify only one of password or authenticator Snowflake"; + public static final String ERROR_VALUE_CAN_ONLY_BE_ONE_OF = "Error can only be one of "; + public SnowflakeConfiguration() { + super( + ConfigurationField.create( + SNOWFLAKE_ACCOUNT, + "Account", + TOOLTIP_SNOWFLAKE_ACCOUNT) + .required(), + ConfigurationField.create( + SNOWFLAKE_USER, + "User", + "User for the Snowflake instance") + .required(), + ConfigurationField.create( + SNOWFLAKE_PASSWORD, + "Password", + "Password for the Snowflake instance"), + ConfigurationField.create( + SNOWFLAKE_WAREHOUSE, + "Warehouse", + "Warehouse for the Snowflake instance") + .required(), + ConfigurationField.create( + SNOWFLAKE_DATABASE, + "Database", + "Database for the Snowflake instance") + .required(), + ConfigurationField.create( + SNOWFLAKE_SCHEMA, + "Schema", + "Schema for the Snowflake instance") + .required(), + ConfigurationField.create( + SNOWFLAKE_AUTHENTICATOR, + "Authenticator method", + "Snowflake JDBC authenticator method (only 'externalbrowser' is currently supported)") + .addValidator(new FieldValidator() { + private final List allowedValues = Arrays.asList("externalbrowser"); + @Override + public ValidationFeedback validate(ConfigurationField field) { + ValidationFeedback feedback = new ValidationFeedback(); + if (StringUtils.isNotEmpty(field.getValue())) { + if (!allowedValues.contains(field.getValue().toLowerCase())) { + feedback.addError(String.format("%s (%s)", ERROR_VALUE_CAN_ONLY_BE_ONE_OF, + String.join(", ", allowedValues)), field); + } else { + field.setValue(field.getValue().toLowerCase()); + } + } + return feedback; + } + }) + ); + this.configurationFields.addValidator(new PasswordXORAuthenticatorValidator()); + } + + static class PasswordXORAuthenticatorValidator implements ConfigurationValidator { + + @Override + public ValidationFeedback validate(ConfigurationFields fields) { + ValidationFeedback feedback = new ValidationFeedback(); + String password = fields.getValue(SNOWFLAKE_PASSWORD); + String authenticator = fields.getValue(SNOWFLAKE_AUTHENTICATOR); + if (StringUtils.isEmpty(password) && StringUtils.isEmpty(authenticator)) { + feedback.addError(ERROR_MUST_SET_PASSWORD_OR_AUTHENTICATOR, fields.get(SNOWFLAKE_PASSWORD)); + feedback.addError(ERROR_MUST_SET_PASSWORD_OR_AUTHENTICATOR, fields.get(SNOWFLAKE_AUTHENTICATOR)); + } else if (!StringUtils.isEmpty(password) && !StringUtils.isEmpty(authenticator)) { + feedback.addError(ERROR_MUST_NOT_SET_PASSWORD_AND_AUTHENTICATOR, fields.get(SNOWFLAKE_PASSWORD)); + feedback.addError(ERROR_MUST_NOT_SET_PASSWORD_AND_AUTHENTICATOR, fields.get(SNOWFLAKE_AUTHENTICATOR)); + } + + return feedback; + } + } + @Override + public DbSettings toDbSettings(ValidationFeedback feedback) { + return getConfiguration(this.toIniFile(),feedback ).getItem2(); + } + + } + + private static String buildUrl(String server, String schema, String user, String password, String authenticator) { + final String jdbcPrefix = "jdbc:snowflake://"; + String url = (!server.startsWith(jdbcPrefix) ? jdbcPrefix : "") + server; + if (!url.contains("?")) { + url += "?"; + } + + String[] parts = splitDatabaseName(schema); + url = appendParameterIfSet(url, "warehouse", parts[0]); + url = appendParameterIfSet(url, "db", parts[1]); + url = appendParameterIfSet(url, "schema", parts[2]); + url = appendParameterIfSet(url, "user", user); + if (!StringUtils.isEmpty(authenticator)) { + url = appendParameterIfSet(url, "authenticator", authenticator); + } else { + url = appendParameterIfSet(url, "password", password); + } + + return url; + } + private static String appendParameterIfSet(String url, String name, String value) { + if (!StringUtils.isEmpty(value)) { + return String.format("%s%s%s=%s", url, (url.endsWith("?") ? "" : "&"), name, value); + } + else { + throw new RuntimeException(String.format(ERROR_NO_FIELD_OF_TYPE + " %s", name)); + } + } + private static String[] splitDatabaseName(String databaseName) { + String[] parts = databaseName.split("\\."); + if (parts.length != 3) { + throw new RuntimeException(ERROR_INCORRECT_SCHEMA_SPECIFICATION); + } + + return parts; + } + + public String getDatabase() { + return this.configuration.getValue(SNOWFLAKE_DATABASE); + } + + private String getSchema() { + return this.configuration.getValue(SNOWFLAKE_SCHEMA); + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java b/rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java new file mode 100644 index 00000000..f241cd5d --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/StorageHandler.java @@ -0,0 +1,233 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.ohdsi.databases.configuration.*; +import org.ohdsi.utilities.files.IniFile; +import org.ohdsi.utilities.files.Row; + +import java.io.PrintStream; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * StorageHandler defines the interface that a database connection class must implement. + * + */ +public interface StorageHandler { + + /** + * Creates an instance of the implementing class, or can return the singleton for. + * + * @param dbSettings Configuration parameters for the implemented database + * @return instance of a StorageHandler implementing class + */ + StorageHandler getInstance(DbSettings dbSettings); + + /** + * Returns the DBConnection object associated with the database connection + * + * @return DBConnection object + */ + DBConnection getDBConnection(); + + /** + * @return the DbType enum constant associated with the implementation + */ + DbType getDbType(); + + /** + * + * @param tableName name of the table to get the size (number of rows) for + * @return Implementation specific query to get the size of the table + */ + String getTableSizeQuery(String tableName); + + /** + * Verifies if the implementing object was properly configured for use. Should throw a DBConfigurationException + * if this is not the case. + * + * @throws DBConfigurationException Object not ready for use + */ + void checkInitialised() throws DBConfigurationException; + + /** + * Returns the row count of the specified table. + * + * @param tableName name of table + * @return size of table in rows + */ + default long getTableSize(String tableName ) { + long returnVal; + QueryResult qr = new QueryResult(getTableSizeQuery(tableName), getDBConnection()); + try { + returnVal = Long.parseLong(qr.iterator().next().getCells().get(0)); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + qr.close(); + } + return returnVal; + } + + /** + * Executes an SQL use statement (or similar) if the underlying database requires it. + * + * No-op by default. + * + * @param ignoredDatabase provided for compatibility + */ + default void use(String ignoredDatabase) {} + + /** + * closes the connection to the database. No-op by default. + */ + default void close() { + // no-op by default, so singletons don't need to implement it + } + + /** + * Returns the name of the database the connection was initiated for. + * + * @return name of (current) database + */ + String getDatabase(); + + /** + * + * @return List of table names in current database + */ + default List getTableNames() { + List names = new ArrayList<>(); + String query = this.getTablesQuery(getDatabase()); + + for (Row row : new QueryResult(query, new DBConnection(this, getDbType(), false))) { + names.add(row.getCells().get(0)); + } + + return names; + } + + /** + * Fetches the structure of a table as a list of FieldInfo objects. + * + * The default implementation should work for some/most/all JDBC databases and only needs to be overridden + * for databases where this is not the case. + * + * @param table name of the table to fetch the structure for + * @param scanParameters parameters that are to be used for scanning the table + * @return + */ + default List fetchTableStructure(String table, ScanParameters scanParameters) { + List fieldInfos = new ArrayList<>(); + ResultSet rs = getFieldNamesFromJDBC(table); + try { + while (rs.next()) { + FieldInfo fieldInfo = new FieldInfo(scanParameters, rs.getString("COLUMN_NAME")); + fieldInfo.type = rs.getString("TYPE_NAME"); + fieldInfo.rowCount = getTableSize(table); + fieldInfos.add(fieldInfo); + } + } catch ( + SQLException e) { + throw new RuntimeException(e.getMessage()); + } + return fieldInfos; + } + + /** + * Retrieves column names (fields) for a table. + * + * The default implementation uses the JDBC metadata. Should only be overridden if this approach does not work + * for the underlying database. + * + * @param table name of the table to get the column names for + * @return java.sql.ResultSet + */ + default ResultSet getFieldNamesFromJDBC(String table) { + try { + DatabaseMetaData metadata = getDBConnection().getMetaData(); + return metadata.getColumns(null, null, table, null); + } catch (SQLException e) { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * Returns the database specific query to obtain the table names in the database. + * See getTableNames(), which calls this method + * + * @param database + * @return + */ + String getTablesQuery(String database); + + /** + * Returns the database specific query that should be used to obtain a sample of rows from a table. + * + * @param table table to get sample from + * @param rowCount known rowcount for the table + * @param sampleSize size of the sample + * @return Database specific SQL query + */ + String getRowSampleQuery(String table, long rowCount, long sampleSize); + + /** + * @return the DbSettings object used to initialize the database connection + */ + default DbSettings getDbSettings(ValidationFeedback feedback) { + return getDBConfiguration().toDbSettings(feedback); + } + + /** + * Returns a validated DbSettings object with values based on the IniFile object + * + * @param iniFile IniFile object containing database configuration values for the class + * that implements the StorageHandler + * + * @return DbSettings object + */ + default DbSettings getDbSettings(IniFile iniFile, ValidationFeedback feedback, PrintStream outStream) { + ValidationFeedback validationFeedback = getDBConfiguration().loadAndValidateConfiguration(iniFile); + if (feedback != null) { + feedback.add(validationFeedback); + } + if (outStream != null) { + if (validationFeedback.hasErrors()) { + outStream.println("There are errors for the configuration file:"); + validationFeedback.getErrors().forEach((error, fields) -> + outStream.printf("\t%s (%s)%n", error, fields.stream().map(f -> f.name).collect(Collectors.joining(",")))); + } + if (validationFeedback.hasWarnings()) { + outStream.println("There are errors for the configuration file:"); + validationFeedback.getWarnings().forEach((warning, fields) -> + outStream.printf("\t%s (%s)%n", warning, fields.stream().map(f -> f.name).collect(Collectors.joining(",")))); + } + } + return getDBConfiguration().toDbSettings(feedback); + } + + /** + * Returns the DBConfiguration object for the implementing class + */ + DBConfiguration getDBConfiguration(); +} diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/UniformSamplingReservoir.java b/rabbit-core/src/main/java/org/ohdsi/databases/UniformSamplingReservoir.java similarity index 79% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/UniformSamplingReservoir.java rename to rabbit-core/src/main/java/org/ohdsi/databases/UniformSamplingReservoir.java index d7b8a068..8680d989 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/UniformSamplingReservoir.java +++ b/rabbit-core/src/main/java/org/ohdsi/databases/UniformSamplingReservoir.java @@ -1,4 +1,24 @@ -package org.ohdsi.whiteRabbit.scan; +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.math.BigDecimal; import java.math.RoundingMode; @@ -18,13 +38,15 @@ * calculated exactly. */ public class UniformSamplingReservoir { + static Logger logger = LoggerFactory.getLogger(UniformSamplingReservoir.class); + private double[] samples; private int maxSize; private long populationCount; private BigDecimal populationSum; private double populationMinimum = Double.POSITIVE_INFINITY; private double populationMaximum = Double.NEGATIVE_INFINITY; - private transient int currentSampleLength; + private int currentSampleLength; /** * Create an empty reservoir. @@ -211,14 +233,16 @@ public static void main(String[] args) { us.add(i); } - System.out.println(us.getSamples().toString()); - System.out.println(us.getCount()); - System.out.println(us.getSampleQuartiles().toString()); - System.out.println(us.populationSum.doubleValue()); - System.out.println(us.getPopulationMean()); - System.out.println(us.getPopulationMinimum()); - System.out.println(us.getPopulationMaximum()); - System.out.println(us.getSampleMean()); - System.out.println(us.getSampleStandardDeviation()); + if (logger.isInfoEnabled()) { + logger.info(us.getSamples().toString()); + logger.info(String.valueOf(us.getCount())); + logger.info(us.getSampleQuartiles().toString()); + logger.info(String.valueOf(us.populationSum.doubleValue())); + logger.info(String.valueOf(us.getPopulationMean())); + logger.info(String.valueOf(us.getPopulationMinimum())); + logger.info(String.valueOf(us.getPopulationMaximum())); + logger.info(String.valueOf(us.getSampleMean())); + logger.info(String.valueOf(us.getSampleStandardDeviation())); + } } } \ No newline at end of file diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationField.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationField.java new file mode 100644 index 00000000..63cb2254 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationField.java @@ -0,0 +1,137 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +import org.apache.commons.lang.StringUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Pattern; + +public class ConfigurationField { + public final String name; + public final String label; + public final String toolTip; + private String value; + private String defaultValue; + + public static final String VALUE_REQUIRED_FORMAT_STRING = "A non-empty value is required for field %s (name %s)"; + public static final String INTEGER_VALUE_REQUIRED_FORMAT_STRING = "An integer value is allowed for field %s (name %s)"; + public static final String ONLY_YESNO_ALLOWED_FORMAT_STRING = "Only the values 'yes' or 'no' are allowed for field %s (name %s)"; + + List validators = new ArrayList<>(); + + private static final FieldValidator fieldRequiredValidator = new FieldRequiredValidator(); + private static final FieldValidator integerValueValidator = new IntegerValueValidator(); + private static final FieldValidator onlyYesNoAllowed = new YesNoValidator(); + + private ConfigurationField(String name, String label, String toolTip) { + this.name = name; + this.label = label; + this.toolTip = toolTip; + this.defaultValue = null; + this.value = null; + } + + public static ConfigurationField create(String name, String label, String toolTip) { + return new ConfigurationField(name, label, toolTip); + } + + public ConfigurationField required() { + this.addValidator(fieldRequiredValidator); + return this; + } + public ConfigurationField integerValue() { + this.addValidator(integerValueValidator); + return this; + } + public ConfigurationField yesNoValue() { + this.addValidator(onlyYesNoAllowed); + return this; + } + + public ConfigurationField defaultValue(String value) { + this.defaultValue = value; + return this; + } + + public ConfigurationField addValidator(FieldValidator validator) { + this.validators.add(validator); + return this; + } + + public ConfigurationField setValue(String value) { + this.value = value; + return this; + } + + public String getValue() { + return this.value; + } + + public String getDefaultValue() { + return this.defaultValue; + } + + public String getValueOrDefault() { + if (this.value != null) { + return this.value; + } else if (this.defaultValue != null){ + return this.defaultValue; + } + return null; + } + + private static class FieldRequiredValidator implements FieldValidator { + public ValidationFeedback validate(ConfigurationField field) { + ValidationFeedback feedback = new ValidationFeedback(); + if (StringUtils.isEmpty(field.getValue())) { + feedback.addError(String.format(VALUE_REQUIRED_FORMAT_STRING, field.label, field.name), field); + } + + return feedback; + } + } + + private static class IntegerValueValidator implements FieldValidator { + static Pattern integerPattern = Pattern.compile("^\\d*$"); + public ValidationFeedback validate(ConfigurationField field) { + ValidationFeedback feedback = new ValidationFeedback(); + if (StringUtils.isNotEmpty(field.getValue()) && (!integerPattern.matcher(field.getValue()).matches())) { + feedback.addError(String.format(INTEGER_VALUE_REQUIRED_FORMAT_STRING, field.label, field.name), field); + } + + return feedback; + } + } + private static class YesNoValidator implements FieldValidator { + static Pattern yesNoPattern = Pattern.compile("^(yes|no)$", Pattern.CASE_INSENSITIVE); + public ValidationFeedback validate(ConfigurationField field) { + ValidationFeedback feedback = new ValidationFeedback(); + if (StringUtils.isNotEmpty(field.getValue())) { + if (!yesNoPattern.matcher(field.getValue()).matches()) { + feedback.addError(String.format(ONLY_YESNO_ALLOWED_FORMAT_STRING, field.label, field.name), field); + } else { + field.setValue(field.getValue().toLowerCase()); + } + } + + return feedback; + } + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationFields.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationFields.java new file mode 100644 index 00000000..d5658a3e --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationFields.java @@ -0,0 +1,63 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +public class ConfigurationFields { + List fields; + List validators = new ArrayList<>(); + + public ConfigurationFields(ConfigurationField... fields) { + this.fields = new ArrayList<>(Arrays.asList(fields)); + } + + public void addValidator(ConfigurationValidator validator) { + this.validators.add(validator); + } + + public List getFields() { + return this.fields; + } + + public ConfigurationField get(String fieldName) { + Optional field = fields.stream().filter(f -> fieldName.equalsIgnoreCase(f.name)).findFirst(); + if (field.isPresent()) { + return field.get(); + } + + throw new DBConfigurationException(String.format("No ConfigurationField object found for field name '%s'", fieldName)); + } + + public String getValue(String fieldName) { + Optional value = this.fields.stream().filter(f -> fieldName.equalsIgnoreCase(f.name)).map(ConfigurationField::getValue).findFirst(); + return (value.orElse("")); + } + + public ValidationFeedback validate() { + ValidationFeedback allFeedback = new ValidationFeedback(); + for (ConfigurationValidator validator : this.validators) { + allFeedback.add(validator.validate(this)); + } + + return allFeedback; + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationValidator.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationValidator.java new file mode 100644 index 00000000..ec37f3d9 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ConfigurationValidator.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +@FunctionalInterface +public interface ConfigurationValidator { + ValidationFeedback validate(ConfigurationFields fields); +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DBConfiguration.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DBConfiguration.java new file mode 100644 index 00000000..2336a0d6 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DBConfiguration.java @@ -0,0 +1,168 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +import org.apache.commons.lang.StringUtils; +import org.ohdsi.utilities.files.IniFile; + +import java.io.PrintStream; +import java.util.*; + +public class DBConfiguration { + public static final String DATA_TYPE_FIELD = "DATA_TYPE"; + public static final String DELIMITER_FIELD = "DELIMITER"; + public static final String TABLES_TO_SCAN_FIELD = "TABLES_TO_SCAN"; + public static final String SCAN_FIELD_VALUES_FIELD = "SCAN_FIELD_VALUES"; + public static final String MIN_CELL_COUNT_FIELD = "MIN_CELL_COUNT"; + public static final String MAX_DISTINCT_VALUES_FIELD = "MAX_DISTINCT_VALUES"; + public static final String ROWS_PER_TABLE_FIELD = "ROWS_PER_TABLE"; + public static final String CALCULATE_NUMERIC_STATS_FIELD = "CALCULATE_NUMERIC_STATS"; + public static final String NUMERIC_STATS_SAMPLER_SIZE_FIELD = "NUMERIC_STATS_SAMPLER_SIZE"; + public static final String ERROR_DUPLICATE_DEFINITIONS_FOR_FIELD = "Multiple definitions for field "; + protected ConfigurationFields configurationFields; + + private DBConfiguration() { + } + + + public DBConfiguration(ConfigurationField... fields) { + this.checkForDuplicates(fields); + this.configurationFields = new ConfigurationFields(fields); + } + + public static ConfigurationField[] createScanConfigurationFields() { + return new ConfigurationField[]{ + ConfigurationField.create(DELIMITER_FIELD, + "", + "") + .defaultValue(",") + .required(), + ConfigurationField.create(TABLES_TO_SCAN_FIELD, + "", + "") + .defaultValue("*") + .required(), + ConfigurationField.create(SCAN_FIELD_VALUES_FIELD, + "", + "") + .defaultValue("yes") + .required(), + ConfigurationField.create(MIN_CELL_COUNT_FIELD, + "", + "") + .defaultValue("5") + .integerValue() + .required(), + ConfigurationField.create(MAX_DISTINCT_VALUES_FIELD, + "", + "") + .defaultValue("1000") + .integerValue() + .required(), + ConfigurationField.create(ROWS_PER_TABLE_FIELD, + "", + "") + .defaultValue("100000") + .integerValue() + .required(), + ConfigurationField.create(CALCULATE_NUMERIC_STATS_FIELD, + "", + "") + .defaultValue("no") + .yesNoValue() + .required(), + ConfigurationField.create(NUMERIC_STATS_SAMPLER_SIZE_FIELD, + "", + "") + .defaultValue("500") + .integerValue() + .required() + }; + } + + public IniFile toIniFile() { + IniFile iniFile = new IniFile(); + this.configurationFields.getFields().forEach(f -> { + iniFile.set(f.name, f.getValue()); + }); + + return iniFile; + } + + public DbSettings toDbSettings(ValidationFeedback feedback) { + throw new DBConfigurationException("Should be implemented by inheriting classes"); + } + + private void checkForDuplicates(ConfigurationField... fields) { + Set names = new HashSet<>(); + for (ConfigurationField field : fields) { + if (names.contains(field.name)) { + throw new DBConfigurationException(ERROR_DUPLICATE_DEFINITIONS_FOR_FIELD + field.name); + } + names.add(field.name); + } + } + + public ValidationFeedback loadAndValidateConfiguration(IniFile iniFile) throws DBConfigurationException { + for (ConfigurationField field : this.getFields()) { + field.setValue(iniFile.get(field.name)); + } + + return this.validateAll(); + } + + public ValidationFeedback validateAll() { + ValidationFeedback configurationFeedback = new ValidationFeedback(); + for (ConfigurationField field : this.getFields()) { + for (FieldValidator validator : field.validators) { + ValidationFeedback feedback = validator.validate(field); + configurationFeedback.add(feedback); + } + } + + configurationFeedback.add(configurationFields.validate()); + + return configurationFeedback; + } + + public List getFields() { + return configurationFields.getFields(); + } + + public ConfigurationField getField(String fieldName) { + return this.getFields().stream().filter(f -> f.name.equalsIgnoreCase(fieldName)).findFirst().orElse(null); + } + + public String getValue(String fieldName) { + Optional field = getFields().stream().filter(f -> fieldName.equalsIgnoreCase(f.name)).findFirst(); + if (field.isPresent()) { + return field.get().getValue(); + } else { + return ""; + } + } + + public void printIniFileTemplate(PrintStream stream) { + for (ConfigurationField field : this.configurationFields.getFields()) { + stream.printf("%s: %s\t%s%n", + field.name, + StringUtils.isEmpty(field.getDefaultValue()) ? "_" : field.getDefaultValue(), + field.toolTip); + } + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DBConfigurationException.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DBConfigurationException.java new file mode 100644 index 00000000..a216f700 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DBConfigurationException.java @@ -0,0 +1,24 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +public class DBConfigurationException extends RuntimeException { + public DBConfigurationException(String s) { + super(s); + } +} diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/DbSettings.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DbSettings.java similarity index 76% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/DbSettings.java rename to rabbit-core/src/main/java/org/ohdsi/databases/configuration/DbSettings.java index 72637902..459898c6 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/DbSettings.java +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DbSettings.java @@ -15,13 +15,13 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit; +package org.ohdsi.databases.configuration; import java.util.ArrayList; import java.util.List; import org.apache.commons.csv.CSVFormat; -import org.ohdsi.databases.DbType; +import org.ohdsi.databases.configuration.DbType; public class DbSettings { public enum SourceType { @@ -36,10 +36,17 @@ public enum SourceType { public String user; public String password; public String database; + public String warehouse; + public String schema; public String server; public String domain; // CSV file settings public char delimiter = ','; public CSVFormat csvFormat = CSVFormat.RFC4180; + + public String toString() { + return String.format("sourceType: %s; dbType: %s; user: %s; password: xxxx; database:%s; tables: %s", + sourceType, (dbType == null) ? "null" : dbType.name(), user, database, tables); + } } diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DbType.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DbType.java new file mode 100644 index 00000000..75e09f1e --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/DbType.java @@ -0,0 +1,119 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +import org.apache.commons.lang.StringUtils; +import org.ohdsi.databases.StorageHandler; +import org.ohdsi.databases.SnowflakeHandler; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public enum DbType { + /* + * Please note: the names and strings and the Type enum below must match when String.toUpperCase().replace(" ", "_") + * is applied (see constructor and the normalizedName() method). This is enforced when the enum values are constructed, + * and a violation of this rule will result in a DBConfigurationException being thrown. + */ + DELIMITED_TEXT_FILES("Delimited text files", null), + MYSQL("MySQL", "com.mysql.cj.jdbc.Driver"), + ORACLE("Oracle", "oracle.jdbc.driver.OracleDriver"), + SQL_SERVER("SQL Server", "com.microsoft.sqlserver.jdbc.SQLServerDriver"), + POSTGRESQL("PostgreSQL", "org.postgresql.Driver"), + MS_ACCESS("MS Access", "net.ucanaccess.jdbc.UcanaccessDriver"), + PDW("PDW", "com.microsoft.sqlserver.jdbc.SQLServerDriver"), + REDSHIFT("Redshift", "com.amazon.redshift.jdbc42.Driver"), + TERADATA("Teradata", "com.teradata.jdbc.TeraDriver"), + BIGQUERY("BigQuery", "com.simba.googlebigquery.jdbc42.Driver"), + AZURE("Azure", "com.microsoft.sqlserver.jdbc.SQLServerDriver"), + SNOWFLAKE("Snowflake", "net.snowflake.client.jdbc.SnowflakeDriver", SnowflakeHandler.INSTANCE), + SAS7BDAT("Sas7bdat", null); + + private final String label; + private final String driverName; + private final StorageHandler implementingClass; + + DbType(String type, String driverName) { + this(type, driverName, null); + } + + DbType(String label, String driverName, StorageHandler implementingClass) { + this.label = label; + this.driverName = driverName; + this.implementingClass = implementingClass; + if (!this.name().equals(normalizedName(label))) { + throw new DBConfigurationException(String.format( + "%s: the normalized value of label '%s' (%s) must match the name of the enum constant (%s)", + DbType.class.getName(), + label, + normalizedName(label), + this.name() + )); + } + } + + public boolean equalsDbType(DbType other) { + return (other != null && other.equals(this)); + } + + public boolean supportsStorageHandler() { + return this.implementingClass != null; + } + + public StorageHandler getStorageHandler() throws DBConfigurationException { + if (this.supportsStorageHandler()) { + return this.implementingClass; + } else { + throw new DBConfigurationException(String.format("Class %s does not implement interface %s", + this.implementingClass.getClass().getName(), + StorageHandler.class.getName())); + } + } + + public static DbType getDbType(String name) { + return Enum.valueOf(DbType.class, normalizedName(name)); + } + + /** + * Returns the list of supported database in the order that they should appear in the GUI. + * + * @return Array of labels for the supported database, intended for use in a selector (like a Swing JComboBox) + */ + public static String[] pickList() { + return Stream.of(DELIMITED_TEXT_FILES, SAS7BDAT, MYSQL, ORACLE, SQL_SERVER, POSTGRESQL, MS_ACCESS, PDW, REDSHIFT, TERADATA, BIGQUERY, AZURE, SNOWFLAKE) + .map(DbType::label).toArray(String[]::new); + } + + public static List driverNames() { + // return a list of unique names, without null values + return Stream.of(values()).filter(v -> StringUtils.isNotEmpty(v.driverName)).map(d -> d.driverName).distinct().collect(Collectors.toList()); + } + + public String label() { + return this.label; + } + + public String driverName() { + return this.driverName; + } + + private static String normalizedName(String name) { + return name.toUpperCase().replace(" ", "_"); + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/FieldValidator.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/FieldValidator.java new file mode 100644 index 00000000..aa1ab734 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/FieldValidator.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +@FunctionalInterface +public interface FieldValidator { + ValidationFeedback validate(ConfigurationField field); +} diff --git a/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ValidationFeedback.java b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ValidationFeedback.java new file mode 100644 index 00000000..531d7910 --- /dev/null +++ b/rabbit-core/src/main/java/org/ohdsi/databases/configuration/ValidationFeedback.java @@ -0,0 +1,77 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +import one.util.streamex.EntryStream; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class ValidationFeedback { + private Map> warnings = new HashMap<>(); + private Map> errors = new HashMap<>(); + + public boolean isFullyValid() { + return warnings.isEmpty() && errors.isEmpty(); + } + + public boolean hasWarnings() { + return !warnings.isEmpty(); + } + + public boolean hasErrors() { + return !errors.isEmpty(); + } + + public Map> getWarnings() { + return this.warnings; + } + + public Map> getErrors() { + return this.errors; + } + + public void addWarning(String warning, ConfigurationField field) { + if (this.warnings.containsKey(warning)) { + this.warnings.get(warning).add(field); + } else { + this.warnings.put(warning, Collections.singletonList(field)); + } + } + + public void addError(String error, ConfigurationField field) { + if (this.errors.containsKey(error)) { + this.errors.get(error).add(field); + } else { + this.errors.put(error, Stream.of(field).collect(Collectors.toList())); + } + } + + public void add(ValidationFeedback feedback) { + this.warnings = EntryStream.of(this.warnings) + .append(EntryStream.of(feedback.getWarnings())) + .toMap((e1, e2) -> e1); + this.errors = EntryStream.of(this.errors) + .append(EntryStream.of(feedback.getErrors())) + .toMap((e1, e2) -> e1); + } +} diff --git a/rabbit-core/src/main/java/org/ohdsi/ooxml/ReadXlsxFileWithHeader.java b/rabbit-core/src/main/java/org/ohdsi/ooxml/ReadXlsxFileWithHeader.java index ed26bdfa..867f5267 100644 --- a/rabbit-core/src/main/java/org/ohdsi/ooxml/ReadXlsxFileWithHeader.java +++ b/rabbit-core/src/main/java/org/ohdsi/ooxml/ReadXlsxFileWithHeader.java @@ -83,7 +83,7 @@ public Row next() { List cells = new ArrayList(fieldName2ColumnIndex.size()); for (Cell cell : iterator.next()) { String text; - if (cell.getCellTypeEnum() == CellType.NUMERIC) + if (cell.getCellType() == CellType.NUMERIC) text = myFormatter.format(cell.getNumericCellValue()); else text = cell.toString(); diff --git a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/TableCellLongTextRenderer.java b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/TableCellLongTextRenderer.java index 9e49e383..4d5a40dd 100644 --- a/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/TableCellLongTextRenderer.java +++ b/rabbit-core/src/main/java/org/ohdsi/rabbitInAHat/dataModel/TableCellLongTextRenderer.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.rabbitInAHat.dataModel; import java.awt.Component; diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/ScanFieldName.java b/rabbit-core/src/main/java/org/ohdsi/utilities/ScanFieldName.java index 0110e696..721cb9e1 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/ScanFieldName.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/ScanFieldName.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.utilities; public interface ScanFieldName { diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/ScanSheetName.java b/rabbit-core/src/main/java/org/ohdsi/utilities/ScanSheetName.java index 56e65c7a..216c2db4 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/ScanSheetName.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/ScanSheetName.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.utilities; public interface ScanSheetName { diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/SimpleCounter.java b/rabbit-core/src/main/java/org/ohdsi/utilities/SimpleCounter.java index 8a066408..fa7939ce 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/SimpleCounter.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/SimpleCounter.java @@ -17,7 +17,13 @@ ******************************************************************************/ package org.ohdsi.utilities; +import org.ohdsi.databases.UniformSamplingReservoir; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + public class SimpleCounter { + static Logger logger = LoggerFactory.getLogger(SimpleCounter.class); + private int reportN; private long count; private long lastTime; @@ -43,14 +49,17 @@ public void count() { } private void report() { - if (reportRate){ - long interval = System.currentTimeMillis() - lastTime; - long processed = count - lastCount; - System.out.println(count + " (time per unit = " + interval/(double)processed + "ms)"); - lastTime = System.currentTimeMillis(); - lastCount = count; - } else - System.out.println(count); + if (logger.isInfoEnabled()) { + if (reportRate) { + long interval = System.currentTimeMillis() - lastTime; + long processed = count - lastCount; + logger.info("{} (time per unit = {} ms", count, interval / (double) processed); + lastTime = System.currentTimeMillis(); + lastCount = count; + } else { + logger.info(String.valueOf(count)); + } + } } public void finish() { diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/StringUtilities.java b/rabbit-core/src/main/java/org/ohdsi/utilities/StringUtilities.java index 70aeb954..e5b9a969 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/StringUtilities.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/StringUtilities.java @@ -17,6 +17,10 @@ ******************************************************************************/ package org.ohdsi.utilities; +import org.ohdsi.utilities.collections.CountingSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.DateFormat; @@ -32,7 +36,8 @@ import java.util.zip.DataFormatException; public class StringUtilities { - + static Logger logger = LoggerFactory.getLogger(StringUtilities.class); + public static long SECOND = 1000; public static long MINUTE = 60 * SECOND; public static long HOUR = 60 * MINUTE; @@ -328,7 +333,8 @@ public static String now() { DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.MEDIUM); return df.format(d); } - + + @SuppressWarnings("java:S106") // System.out is intended here public static void outputWithTime(String message) { System.out.println(now() + "\t" + message); } @@ -767,7 +773,6 @@ public static String getMD5Digest(String str) { md5.update(buffer); result = md5.digest(); - // System.out.println(result); // create hex string from the 16-byte hash buf = new StringBuffer(result.length * 2); for (int i = 0; i < result.length; i++) { @@ -779,9 +784,7 @@ public static String getMD5Digest(String str) { } return buf.toString(); } catch (NoSuchAlgorithmException e) { - System.err.println("Exception caught: " + e); - e.printStackTrace(); - + logger.error(e.getMessage(), e); } return null; } @@ -802,7 +805,6 @@ public static String getSHA256Digest(String str) { sha256.update(buffer); result = sha256.digest(); - // System.out.println(result); // create hex string from the 16-byte hash buf = new StringBuffer(result.length * 2); for (int i = 0; i < result.length; i++) { diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/Version.java b/rabbit-core/src/main/java/org/ohdsi/utilities/Version.java index 79f4be67..5ee076f0 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/Version.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/Version.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.utilities; import java.io.IOException; diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/collections/CountingSet.java b/rabbit-core/src/main/java/org/ohdsi/utilities/collections/CountingSet.java index a70241c5..12d19144 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/collections/CountingSet.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/collections/CountingSet.java @@ -17,6 +17,10 @@ ******************************************************************************/ package org.ohdsi.utilities.collections; +import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.AbstractSet; import java.util.Comparator; import java.util.HashMap; @@ -32,7 +36,8 @@ * @param */ public class CountingSet extends AbstractSet { - + static Logger logger = LoggerFactory.getLogger(CountingSet.class); + public Map key2count; public CountingSet() { @@ -188,7 +193,7 @@ public int compareTo(Count o) { public void printCounts() { decliningCountStream() - .forEach(entry -> System.out.println(entry.getKey() + "\t" + entry.getValue().count)); + .forEach(entry -> logger.info("{}\t{}", entry.getKey(), entry.getValue().count)); } private Stream> decliningCountStream() { diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/files/IniFile.java b/rabbit-core/src/main/java/org/ohdsi/utilities/files/IniFile.java index e2605d76..9bf348fd 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/files/IniFile.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/files/IniFile.java @@ -17,12 +17,18 @@ ******************************************************************************/ package org.ohdsi.utilities.files; +import org.apache.commons.lang.StringUtils; +import org.ohdsi.databases.configuration.DBConfiguration; + import java.util.HashMap; import java.util.Map; public class IniFile { private Map settings = new HashMap(); + public IniFile() { + + } public IniFile(String filename){ for (String line : new ReadTextFile(filename)){ int indexOfHash = line.lastIndexOf('#'); @@ -43,4 +49,20 @@ public String get(String fieldName){ else return value; } + + public void set(String fieldName, String value) { + settings.put(fieldName.trim().toLowerCase(), value); + } + + public String getOrFail(String fieldName){ + String value = this.get(fieldName); + if (StringUtils.isEmpty(value)) { + throw new RuntimeException("Ini file should contain a value for '" + fieldName + "'"); + } + return value; + } + + public String getDataType() { + return getOrFail(DBConfiguration.DATA_TYPE_FIELD); + } } diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/files/QuickAndDirtyXlsxReader.java b/rabbit-core/src/main/java/org/ohdsi/utilities/files/QuickAndDirtyXlsxReader.java index f62f74eb..dbb43037 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/files/QuickAndDirtyXlsxReader.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/files/QuickAndDirtyXlsxReader.java @@ -32,12 +32,16 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import org.ohdsi.utilities.SimpleCounter; import org.ohdsi.utilities.StringUtilities; import org.ohdsi.utilities.collections.IntegerComparator; import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Sheet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class QuickAndDirtyXlsxReader extends ArrayList { + static Logger logger = LoggerFactory.getLogger(QuickAndDirtyXlsxReader.class); private static final long serialVersionUID = 25124428448185386L; private static final Pattern DOUBLE_IGNORE_PATTERN = Pattern.compile("[<>= ]+"); @@ -142,7 +146,6 @@ private void processSharedStrings(ZipInputStream inputStream) throws IOException private void processSheet(String filename, ZipInputStream inputStream) throws IOException { Sheet sheet = filenameToSheet.get(filename); - //System.out.println(filename + "\t" + sheet.name); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)); String line; StringBuilder fullSheet = new StringBuilder(); @@ -152,8 +155,9 @@ private void processSheet(String filename, ZipInputStream inputStream) throws IO for (String rowLine : StringUtilities.multiFindBetween(fullSheet.toString(), "")) { Row row = new Row(sheet); row.addAll(findCellValues(rowLine)); - if (row.size() != 0) + if (!row.isEmpty()) { sheet.add(row); + } } } diff --git a/rabbit-core/src/main/java/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java b/rabbit-core/src/main/java/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java index f6329154..0c2604f9 100644 --- a/rabbit-core/src/main/java/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java +++ b/rabbit-core/src/main/java/org/ohdsi/utilities/files/WriteCSVFileWithHeader.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.utilities.files; import java.io.FileWriter; diff --git a/rabbit-core/src/test/java/org/ohdsi/databases/DBConfigurationTest.java b/rabbit-core/src/test/java/org/ohdsi/databases/DBConfigurationTest.java new file mode 100644 index 00000000..ff7019a1 --- /dev/null +++ b/rabbit-core/src/test/java/org/ohdsi/databases/DBConfigurationTest.java @@ -0,0 +1,59 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.ohdsi.databases.configuration.ConfigurationField; +import org.ohdsi.databases.configuration.DBConfiguration; +import org.ohdsi.databases.configuration.DBConfigurationException; + +import static org.junit.jupiter.api.Assertions.*; +import static org.ohdsi.databases.configuration.DBConfiguration.ERROR_DUPLICATE_DEFINITIONS_FOR_FIELD; + +class DBConfigurationTest { + + private final String NAME_FIELD1 = "FIELD_1"; + private final String LABEL_FIELD1 = "Field one"; + private final String TOOLTIP_FIELD1 = "Tooltip for field one"; + private final String NAME_FIELD2 = "FIELD_2"; + private final String LABEL_FIELD2 = "Field two"; + private final String TOOLTIP_FIELD2 = "Tooltip for field two"; + + @BeforeEach + void setUp() { + } + + @Test + void doNotAcceptDuplicateDefinitionsForField() { + Exception exception = assertThrows(DBConfigurationException.class, () -> { + DBConfiguration testConfiguration = new DBConfiguration( + ConfigurationField.create(NAME_FIELD1, LABEL_FIELD1, TOOLTIP_FIELD1).required(), + ConfigurationField.create(NAME_FIELD1, LABEL_FIELD2, TOOLTIP_FIELD2)); + }); + assertTrue(exception.getMessage().startsWith(ERROR_DUPLICATE_DEFINITIONS_FOR_FIELD)); + } + + @Test + void getFields() { + } + + @Test + void printIniFileTemplate() { + } +} \ No newline at end of file diff --git a/rabbit-core/src/test/java/org/ohdsi/databases/DBConnectorTest.java b/rabbit-core/src/test/java/org/ohdsi/databases/DBConnectorTest.java new file mode 100644 index 00000000..d6883e1a --- /dev/null +++ b/rabbit-core/src/test/java/org/ohdsi/databases/DBConnectorTest.java @@ -0,0 +1,46 @@ +package org.ohdsi.databases; + +import org.junit.jupiter.api.Test; +import org.ohdsi.databases.configuration.DbType; + +import java.sql.Driver; +import java.sql.DriverManager; +import java.util.Enumeration; + +import static org.junit.jupiter.api.Assertions.*; + +class DBConnectorTest { + + public static void main(String[] args) { + DBConnectorTest dbConnectorTest = new DBConnectorTest(); + dbConnectorTest.verifyDrivers(); + } + + @Test + void verifyDrivers() { + // verify that a JDBC driver that is not included/supported cannot be loaded + String notSupportedDriver = "org.sqlite.JDBC"; // change this if WhiteRabbit starts supporting SQLite + assertFalse(DbType.driverNames().contains(notSupportedDriver), "Cannot test this for a supported driver."); + assertThrows(ClassNotFoundException.class, () -> + testJDBCDriverAndVersion(notSupportedDriver)); + DbType.driverNames().forEach(driver -> { + try { + testJDBCDriverAndVersion(driver); + } catch (ClassNotFoundException e) { + fail(String.format("JDBC driver class could not be loaded for %s", driver)); + } + }); + System.out.println("All configured JDBC drivers could be loaded."); + } + + void testJDBCDriverAndVersion(String driverName) throws ClassNotFoundException { + Enumeration drivers = DriverManager.getDrivers(); + while (drivers.hasMoreElements()) { + Driver driver = drivers.nextElement(); + Class driverClass = Class.forName(driverName); + if (driver.getClass().isAssignableFrom(driverClass)) { + int ignoredMajorVersion = driver.getMajorVersion(); + } + } + } +} \ No newline at end of file diff --git a/rabbit-core/src/test/java/org/ohdsi/databases/SnowflakeTestUtils.java b/rabbit-core/src/test/java/org/ohdsi/databases/SnowflakeTestUtils.java new file mode 100644 index 00000000..1f40631f --- /dev/null +++ b/rabbit-core/src/test/java/org/ohdsi/databases/SnowflakeTestUtils.java @@ -0,0 +1,91 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.apache.commons.lang.StringUtils; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.function.BooleanSupplier; + +public class SnowflakeTestUtils { + + public static String getEnvOrFail(String name) { + String value = System.getenv(name); + if (StringUtils.isEmpty(value)) { + throw new RuntimeException(String.format("Environment variable '%s' is not set.", name)); + } + + return value; + } + + public static String getPropertyOrFail(String name) { + String value = System.getProperty(name); + if (StringUtils.isEmpty(value)) { + throw new RuntimeException(String.format("System property '%s' is not set.", name)); + } + + return value; + } + + @FunctionalInterface + public interface ReaderInterface { + String getOrFail(String name); + } + + public static class EnvironmentReader implements ReaderInterface { + public String getOrFail(String name) { + return getEnvOrFail(name); + } + } + public static class PropertyReader implements ReaderInterface { + public String getOrFail(String name) { + return getPropertyOrFail(name); + } + } + + public static class SnowflakeSystemPropertiesFileChecker implements BooleanSupplier { + @Override + public boolean getAsBoolean() { + String buildDirectory = System.getProperty("projectBuildDirectory"); + Path snowflakeEnvVarPath = Paths.get(buildDirectory,"../..", "snowflake.env"); + if (StringUtils.isNotEmpty(buildDirectory) && Files.exists(snowflakeEnvVarPath)) { + try { + loadSystemProperties(snowflakeEnvVarPath); + } catch (IOException e) { + throw new RuntimeException(e); + } + return true; + } + // check the endpoint here and return either true or false + return false; + } + + private void loadSystemProperties(Path envVarFile) throws IOException { + Files.lines(envVarFile) + .map(line -> line.replaceAll("^export ", "")) + .map(line2 -> line2.split("=", 2)) + .forEach(v -> System.setProperty(v[0], v[1])); + } + } +} diff --git a/rabbit-core/src/test/java/org/ohdsi/databases/TestConfigurationField.java b/rabbit-core/src/test/java/org/ohdsi/databases/TestConfigurationField.java new file mode 100644 index 00000000..f5462857 --- /dev/null +++ b/rabbit-core/src/test/java/org/ohdsi/databases/TestConfigurationField.java @@ -0,0 +1,142 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.junit.jupiter.api.Test; +import org.ohdsi.databases.configuration.ConfigurationField; +import org.ohdsi.databases.configuration.DBConfiguration; +import org.ohdsi.databases.configuration.FieldValidator; +import org.ohdsi.databases.configuration.ValidationFeedback; + +import static org.junit.jupiter.api.Assertions.*; +import static org.ohdsi.databases.configuration.ConfigurationField.*; + +class TestConfigurationField { + + @Test + void testStandardValidators() { + final String REQUIRED_FIELD = "REQUIRED_FIELD"; + final String OPTIONAL_INTEGER_FIELD = "OPTIONAL_INTEGER_FIELD"; + final String REQUIRED_INTEGER_FIELD = "REQUIRED_INTEGER_FIELD"; + final String OPTIONAL_YESNO_FIELD = "OPTIONAL_YESNO_FIELD"; + final String REQUIRED_YESNO_FIELD = "REQUIRED_YESNO_FIELD"; + DBConfiguration configuration = new DBConfiguration( + ConfigurationField + .create(REQUIRED_FIELD, REQUIRED_FIELD, "") + .required(), + ConfigurationField + .create(OPTIONAL_INTEGER_FIELD, OPTIONAL_INTEGER_FIELD, "") + .integerValue(), + ConfigurationField + .create(REQUIRED_INTEGER_FIELD, REQUIRED_INTEGER_FIELD, "") + .integerValue() + .required(), + ConfigurationField + .create(OPTIONAL_YESNO_FIELD, OPTIONAL_YESNO_FIELD, "") + .yesNoValue(), + ConfigurationField + .create(REQUIRED_YESNO_FIELD, REQUIRED_YESNO_FIELD, "") + .yesNoValue() + .required() + ); + + // test for values in required fields + ValidationFeedback feedback = configuration.validateAll(); + assertEquals(0, feedback.getWarnings().size()); + assertEquals(3, feedback.getErrors().size()); + String expectedErrorKey = String.format(VALUE_REQUIRED_FORMAT_STRING, REQUIRED_FIELD, REQUIRED_FIELD); + assertTrue(feedback.getErrors().containsKey(expectedErrorKey)); + assertTrue(feedback.getErrors().get(expectedErrorKey).get(0).name.equalsIgnoreCase(REQUIRED_FIELD)); + expectedErrorKey = String.format(VALUE_REQUIRED_FORMAT_STRING, REQUIRED_INTEGER_FIELD, REQUIRED_INTEGER_FIELD); + assertTrue(feedback.getErrors().containsKey(expectedErrorKey)); + assertTrue(feedback.getErrors().get(expectedErrorKey).get(0).name.equalsIgnoreCase(REQUIRED_INTEGER_FIELD)); + expectedErrorKey = String.format(VALUE_REQUIRED_FORMAT_STRING, REQUIRED_YESNO_FIELD, REQUIRED_YESNO_FIELD); + assertTrue(feedback.getErrors().containsKey(expectedErrorKey)); + assertTrue(feedback.getErrors().get(expectedErrorKey).get(0).name.equalsIgnoreCase(REQUIRED_YESNO_FIELD)); + + // set (valid) values where required + configuration.getField(REQUIRED_FIELD).setValue("some value"); + configuration.getField(REQUIRED_INTEGER_FIELD).setValue("123"); + configuration.getField(REQUIRED_YESNO_FIELD).setValue("yes"); + feedback = configuration.validateAll(); + assertEquals(0, feedback.getWarnings().size()); + assertEquals(0, feedback.getErrors().size()); + + // set some bogus values + configuration.getField(REQUIRED_INTEGER_FIELD).setValue("abc"); + configuration.getField(REQUIRED_YESNO_FIELD).setValue("of course!"); + configuration.getField(OPTIONAL_YESNO_FIELD).setValue("maybe not?"); + feedback = configuration.validateAll(); + assertEquals(0, feedback.getWarnings().size()); + assertEquals(3, feedback.getErrors().size()); + expectedErrorKey = String.format(INTEGER_VALUE_REQUIRED_FORMAT_STRING, REQUIRED_INTEGER_FIELD, REQUIRED_INTEGER_FIELD); + assertTrue(feedback.getErrors().containsKey(expectedErrorKey)); + assertTrue(feedback.getErrors().get(expectedErrorKey).get(0).name.equalsIgnoreCase(REQUIRED_INTEGER_FIELD)); + expectedErrorKey = String.format(ONLY_YESNO_ALLOWED_FORMAT_STRING, REQUIRED_YESNO_FIELD, REQUIRED_YESNO_FIELD); + assertTrue(feedback.getErrors().containsKey(expectedErrorKey)); + assertTrue(feedback.getErrors().get(expectedErrorKey).get(0).name.equalsIgnoreCase(REQUIRED_YESNO_FIELD)); + expectedErrorKey = String.format(ONLY_YESNO_ALLOWED_FORMAT_STRING, OPTIONAL_YESNO_FIELD, OPTIONAL_YESNO_FIELD); + assertTrue(feedback.getErrors().containsKey(expectedErrorKey)); + assertTrue(feedback.getErrors().get(expectedErrorKey).get(0).name.equalsIgnoreCase(OPTIONAL_YESNO_FIELD)); + + // and test the normalization of a yes/no field + configuration.getField(REQUIRED_INTEGER_FIELD).setValue("0"); // no error wanted here either + configuration.getField(REQUIRED_YESNO_FIELD).setValue("YeS"); + configuration.getField(OPTIONAL_YESNO_FIELD).setValue("NO"); + feedback = configuration.validateAll(); + assertEquals(0, feedback.getWarnings().size()); + assertEquals(0, feedback.getErrors().size()); + assertEquals("yes", configuration.getField(REQUIRED_YESNO_FIELD).getValue()); + assertEquals("no", configuration.getField(OPTIONAL_YESNO_FIELD).getValue()); + } + + static class WarningValidator implements FieldValidator { + final static String expectedValue = "Expected value"; + final static String warning = "Field does not contain the expected value!"; + @Override + public ValidationFeedback validate(ConfigurationField field) { + ValidationFeedback feedback = new ValidationFeedback(); + + if (!field.getValue().equalsIgnoreCase(expectedValue)) { + feedback.addWarning(warning, field); + } + + return feedback; + } + } + + @Test + void testBespokeWarningValidator() { + final String FIELD_NAME = "FieldName"; + DBConfiguration configuration = new DBConfiguration( + ConfigurationField + .create(FIELD_NAME, FIELD_NAME, "") + .addValidator(new WarningValidator()) + .setValue("")); + + ValidationFeedback feedback = configuration.validateAll(); + assertEquals(1, feedback.getWarnings().size()); + assertEquals(0, feedback.getErrors().size()); + assertTrue(feedback.getWarnings().get(WarningValidator.warning).get(0).name.equalsIgnoreCase(FIELD_NAME)); + + configuration.getFields().get(0).setValue(WarningValidator.expectedValue); + feedback = configuration.validateAll(); + assertEquals(0, feedback.getWarnings().size()); + assertEquals(0, feedback.getErrors().size()); + } +} \ No newline at end of file diff --git a/rabbit-core/src/test/java/org/ohdsi/databases/TestSnowflakeHandler.java b/rabbit-core/src/test/java/org/ohdsi/databases/TestSnowflakeHandler.java new file mode 100644 index 00000000..bb942336 --- /dev/null +++ b/rabbit-core/src/test/java/org/ohdsi/databases/TestSnowflakeHandler.java @@ -0,0 +1,100 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases; + +import org.apache.commons.lang.StringUtils; +import org.junit.jupiter.api.Test; +import org.ohdsi.databases.configuration.*; +import org.ohdsi.utilities.files.IniFile; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; + +import static org.junit.jupiter.api.Assertions.*; +import static org.ohdsi.databases.SnowflakeHandler.*; + +class TestSnowflakeHandler { + + Logger logger = LoggerFactory.getLogger(TestSnowflakeHandler.class); + + @Test + void testPrintIniFileTemplate() throws IOException { + String output; + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); PrintStream printStream = new PrintStream(outputStream)) { + DBConfiguration configuration = new SnowflakeConfiguration(); + configuration.printIniFileTemplate(printStream); + output = outputStream.toString(); + for (ConfigurationField field: configuration.getFields()) { + assertTrue(output.contains(field.name), String.format("ini file template should contain field name (%s)", field.name)); + assertTrue(output.contains(field.toolTip), String.format("ini file template should contain tool tip (%s)", field.toolTip)); + if (!StringUtils.isEmpty(field.getDefaultValue())) { + assertTrue(output.contains(field.getDefaultValue()), String.format("ini file template should contain default value (%s)", field.getDefaultValue())); + } + } + } + } + + @Test + void testLoadAndValidateConfiguration() { + DBConfiguration snowflakeConfiguration = new SnowflakeConfiguration(); + IniFile iniFile = new IniFile(); + + iniFile.set(DBConfiguration.DATA_TYPE_FIELD, DbType.SNOWFLAKE.name()); + + // start with no values set, should generate an error for each required field + ValidationFeedback feedback = snowflakeConfiguration.loadAndValidateConfiguration(iniFile); + assertFalse(feedback.hasWarnings()); + assertTrue(feedback.hasErrors()); + assertEquals(6,feedback.getErrors().size()); + + // fill in all required fields, verify no errors + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_ACCOUNT, "some-account"); + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_USER, "some-user"); + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_PASSWORD, "some-password"); + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_WAREHOUSE, "some-warehouse"); + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_DATABASE, "some-database"); + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_SCHEMA, "some-schema"); + + feedback = snowflakeConfiguration.loadAndValidateConfiguration(iniFile); + assertFalse(feedback.hasWarnings()); + assertFalse(feedback.hasErrors()); + + // add (invalid) value for authenticator field, should generate two errors + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_AUTHENTICATOR, "some-value"); + + feedback = snowflakeConfiguration.loadAndValidateConfiguration(iniFile); + assertFalse(feedback.hasWarnings()); + assertTrue(feedback.hasErrors()); + assertEquals(2,feedback.getErrors().size()); + assertTrue(feedback.getErrors().containsKey(SnowflakeConfiguration.ERROR_MUST_NOT_SET_PASSWORD_AND_AUTHENTICATOR), + "there should be an error indicating that both password and authenticator were set"); + assertEquals(1, + (int) new ArrayList<>(feedback.getErrors().keySet()).stream().filter(k -> k.startsWith(SnowflakeConfiguration.ERROR_VALUE_CAN_ONLY_BE_ONE_OF)).count(), + "there should be an error indicating that a wrong value was set for the authenticator field"); + + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_PASSWORD, null); + iniFile.set(SnowflakeConfiguration.SNOWFLAKE_AUTHENTICATOR, "externalbrowser"); + feedback = snowflakeConfiguration.loadAndValidateConfiguration(iniFile); + assertFalse(feedback.hasWarnings()); + assertFalse(feedback.hasErrors()); + } +} \ No newline at end of file diff --git a/rabbit-core/src/test/java/org/ohdsi/databases/configuration/DbTypeTest.java b/rabbit-core/src/test/java/org/ohdsi/databases/configuration/DbTypeTest.java new file mode 100644 index 00000000..c3fbd5b7 --- /dev/null +++ b/rabbit-core/src/test/java/org/ohdsi/databases/configuration/DbTypeTest.java @@ -0,0 +1,37 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.databases.configuration; + +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + +class DbTypeTest { + + @Test + void testPickList() { + List labelsFromAllDbTypeValues = Stream.of(DbType.values()).map(DbType::label).sorted().collect(Collectors.toList()); + List labelsFromPickList = Stream.of(DbType.pickList()).sorted().collect(Collectors.toList()); + + assertEquals(labelsFromAllDbTypeValues, labelsFromPickList, "The picklist should contain all the labels defined in the DbType enum"); + } +} \ No newline at end of file diff --git a/rabbit-core/src/test/resources/snowflake.ini b/rabbit-core/src/test/resources/snowflake.ini new file mode 100644 index 00000000..6299dcea --- /dev/null +++ b/rabbit-core/src/test/resources/snowflake.ini @@ -0,0 +1,16 @@ +# Usage: dist/bin/whiteRabbit -ini +WORKING_FOLDER = . +DATA_TYPE = Snowflake +SNOWFLAKE_ACCOUNT = some-account +SNOWFLAKE_USER = some-user +SNOWFLAKE_PASSWORD = some-password +SNOWFLAKE_WAREHOUSE = some-warehouse +SNOWFLAKE_DATABASE = some-database +SNOWFLAKE_SCHEMA = some-schema +TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database +SCAN_FIELD_VALUES = yes # Include the frequency of field values in the scan report? "yes" or "no" +MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report +MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported +ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values +CALCULATE_NUMERIC_STATS = no # Include average, standard deviation and quartiles in the scan report? "yes" or "no" +NUMERIC_STATS_SAMPLER_SIZE = 500 # Maximum number of rows used to calculate numeric statistics diff --git a/rabbitinahat/pom.xml b/rabbitinahat/pom.xml index 533b81d4..8876ed55 100644 --- a/rabbitinahat/pom.xml +++ b/rabbitinahat/pom.xml @@ -36,7 +36,28 @@ org.apache.maven.plugins maven-surefire-plugin - 2.22.2 + + + false + com.github.caciocavallosilano.cacio.ctc.CTCToolkit + com.github.caciocavallosilano.cacio.ctc.CTCGraphicsEnvironment + + + --add-exports=java.desktop/java.awt=ALL-UNNAMED + --add-exports=java.desktop/java.awt.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.image=ALL-UNNAMED + --add-exports=java.desktop/sun.java2d=ALL-UNNAMED + --add-exports=java.desktop/java.awt.dnd.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.event=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.datatransfer=ALL-UNNAMED + --add-exports=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.desktop/java.awt=ALL-UNNAMED + --add-opens=java.desktop/sun.java2d=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + + @@ -56,6 +77,35 @@ + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.4.1 + + + enforce-max-java-version + verify + enforce + + + + ${org.ohdsi.whiterabbit.maxjdkversion} + test + + + true + + + + + + org.codehaus.mojo + extra-enforcer-rules + 1.7.0 + + + @@ -64,47 +114,12 @@ - org.ohdsi rabbit-core ${project.version} - - - org.apache.maven.plugins - maven-surefire-plugin - 2.22.2 - test - - junit junit @@ -127,7 +142,7 @@ com.github.caciocavallosilano cacio-tta - 1.10 + 1.17.1 test @@ -137,6 +152,11 @@ 3.1.0 test + + org.junit.jupiter + junit-jupiter-api + 5.9.2 + test + - diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/DescriptionTextArea.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/DescriptionTextArea.java index 4d2db70e..9cfdeafb 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/DescriptionTextArea.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/DescriptionTextArea.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.rabbitInAHat; import javax.swing.JTextArea; diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FetchCDMModelFromServer.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FetchCDMModelFromServer.java index 7dc47aee..cad7a708 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FetchCDMModelFromServer.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FetchCDMModelFromServer.java @@ -17,7 +17,8 @@ ******************************************************************************/ package org.ohdsi.rabbitInAHat; -import org.ohdsi.databases.DbType; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; import org.ohdsi.databases.RichConnection; import org.ohdsi.utilities.files.Row; import org.ohdsi.utilities.files.WriteCSVFileWithHeader; @@ -31,7 +32,12 @@ public class FetchCDMModelFromServer { public static void main(String[] args) { - RichConnection connection = new RichConnection("127.0.0.1/ohdsi", null, "postgres", "F1r3starter", DbType.POSTGRESQL); + DbSettings dbSettings = new DbSettings(); + dbSettings.server = "127.0.0.1/ohdsi"; + dbSettings.user = "postgres"; + dbSettings.password = "F1r3starter"; + dbSettings.dbType = DbType.POSTGRESQL; + RichConnection connection = new RichConnection(dbSettings); connection.use("cdm5"); WriteCSVFileWithHeader out = new WriteCSVFileWithHeader("c:/temp/CDMV5Model.csv"); diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FilterDialog.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FilterDialog.java index a39f6b14..33fe2e6d 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FilterDialog.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/FilterDialog.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.rabbitInAHat; import java.awt.Container; diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/MappingPanel.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/MappingPanel.java index 0cfd1e0b..2a935797 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/MappingPanel.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/MappingPanel.java @@ -62,6 +62,8 @@ public class MappingPanel extends JPanel implements MouseListener, MouseMotionLi public static final int MIN_SPACE_BETWEEN_COLUMNS = 200; public static final int ARROW_START_WIDTH = 50; public static final int BORDER_HEIGHT = 25; + // Extra margin between header and first item when using stem table + public static final int STEM_HEIGHT_MARGIN = ITEM_HEIGHT / 2; private int sourceX; private int cdmX; @@ -240,7 +242,7 @@ private void setLabeledRectanglesLocation(List components, int int y = HEADER_HEIGHT + HEADER_TOP_MARGIN; if (ObjectExchange.etl.hasStemTable()) { // Move all non-stem items - y = HEADER_TOP_MARGIN + ITEM_HEIGHT; + y += STEM_HEIGHT_MARGIN; } for (LabeledRectangle component : components) { // Exception for laying out the stem table @@ -264,8 +266,18 @@ private void setLabeledRectanglesLocation(List components, int public Dimension getMinimumSize() { Dimension dimension = new Dimension(); dimension.width = 2 * (ITEM_WIDTH + MARGIN) + MIN_SPACE_BETWEEN_COLUMNS; - dimension.height = Math.min(HEADER_HEIGHT + HEADER_TOP_MARGIN + Math.max(sourceComponents.size(), cdmComponents.size()) * (ITEM_HEIGHT + MARGIN), - maxHeight); + int maxComponentsSize = Math.max(sourceComponents.size(), cdmComponents.size()); + int componentsHeight = maxComponentsSize * (ITEM_HEIGHT + MARGIN); + dimension.height = Math.min(HEADER_HEIGHT + HEADER_TOP_MARGIN + componentsHeight, maxHeight); + + if (ObjectExchange.etl.hasStemTable()) { + dimension.height += STEM_HEIGHT_MARGIN; + // For the table mapping panel, deduct the stem table from the items (as it's not shown as a normal item in the list) + boolean isTablesPanel = cdmComponents.stream().allMatch(n -> (n.getItem() instanceof Table)); + if (!cdmComponents.isEmpty() && isTablesPanel) { + dimension.height -= (ITEM_HEIGHT + MARGIN); + } + } return dimension; } diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java index 81fbdd9f..6d6ecfd6 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/RabbitInAHatMain.java @@ -739,7 +739,6 @@ private Dimension getPreferredDimension() { if (matcher.groupCount() == 2) { preferredHeight = Integer.parseInt(matcher.group(1)); preferredWidth = Integer.parseInt(matcher.group(2)); - //System.out.println("Using cacio screen size: " + cacioScreenSize); } } } diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/SQLGenerator.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/SQLGenerator.java index 442a3bb9..fdf79454 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/SQLGenerator.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/SQLGenerator.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.rabbitInAHat; import org.ohdsi.rabbitInAHat.dataModel.*; diff --git a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/dataModel/StemTableFactory.java b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/dataModel/StemTableFactory.java index ca8602aa..250265b3 100644 --- a/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/dataModel/StemTableFactory.java +++ b/rabbitinahat/src/main/java/org/ohdsi/rabbitInAHat/dataModel/StemTableFactory.java @@ -1,3 +1,20 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.rabbitInAHat.dataModel; import java.io.IOException; diff --git a/rabbitinahat/src/test/java/org/ohdsi/rabbitInAHat/TestRabbitInAHatMain.java b/rabbitinahat/src/test/java/org/ohdsi/rabbitInAHat/TestRabbitInAHatMain.java index bf0fea59..b6cc22b1 100644 --- a/rabbitinahat/src/test/java/org/ohdsi/rabbitInAHat/TestRabbitInAHatMain.java +++ b/rabbitinahat/src/test/java/org/ohdsi/rabbitInAHat/TestRabbitInAHatMain.java @@ -1,18 +1,33 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.rabbitInAHat; -import com.github.caciocavallosilano.cacio.ctc.junit.CacioAssertJRunner; +import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest; import org.assertj.swing.annotation.GUITest; import org.assertj.swing.core.ComponentDragAndDrop; import org.assertj.swing.edt.GuiActionRunner; import org.assertj.swing.finder.JFileChooserFinder; import org.assertj.swing.fixture.FrameFixture; import org.assertj.swing.fixture.JFileChooserFixture; -import org.assertj.swing.image.ScreenshotTaker; import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import org.junit.runner.RunWith; import java.awt.*; import java.awt.event.KeyEvent; @@ -27,13 +42,13 @@ import static org.ohdsi.rabbitInAHat.RabbitInAHatMain.*; /* - * CacioTestRunner enables running the Swing GUI tests in a virtual screen. This allows the integration tests to run + * The @CacioTest annotation below enables running the Swing GUI tests in a virtual screen. This allows the integration tests to run * anywhere without being blocked by the absence of a real screen (e.g. github actions), and without being * disrupted by unrelated user activity on workstations/laptops (any keyboard or mouse action). * For debugging purposes, you can disable the annotation below to have the tests run on your screen. Be aware that * any interaction with mouse or keyboard can (will) disrupt the tests if they run on your screen. */ -@RunWith(CacioAssertJRunner.class) +@CacioTest public class TestRabbitInAHatMain { private static FrameFixture window; @@ -81,7 +96,6 @@ public void openReport() throws URISyntaxException { fileChooser.selectFile(new File(Objects.requireNonNull(scanReportUrl).toURI())).approve(); } - @GUITest @Test public void openAndVerifySavedETLSpecs() throws URISyntaxException { // open the test ETL specification @@ -115,8 +129,8 @@ private void openETLSpecs(String specName) throws URISyntaxException { URL etlSpecsUrl = this.getClass().getClassLoader().getResource(specName); fileChooser.selectFile(new File(Objects.requireNonNull(etlSpecsUrl).toURI())).approve(); MappingPanel tablesPanel = window.panel(PANEL_TABLE_MAPPING).targetCastedTo(MappingPanel.class); - assertTrue("There should be source items", tablesPanel.getVisibleSourceComponents().size() > 0); - assertTrue("There should be target items", tablesPanel.getVisibleTargetComponents().size() > 0); + assertFalse("There should be source items", tablesPanel.getVisibleSourceComponents().isEmpty()); + assertFalse("There should be target items", tablesPanel.getVisibleTargetComponents().isEmpty()); } private void verifyTableMapping(MappingPanel tablesPanel, String sourceName, String targetName) { LabeledRectangle sourceTable = findMappableItem(tablesPanel.getVisibleSourceComponents(), sourceName); @@ -150,11 +164,6 @@ private void clickAndVerifyLabeledRectangles(MappingPanel tablesPanel, LabeledRe if (rectangles.length > 1) { window.robot().releaseKey(KeyEvent.VK_SHIFT); } -// if (!r.isSelected()) { -// ScreenshotTaker screenshotTaker = new ScreenshotTaker(); -// screenshotTaker.saveDesktopAsPng("problem.png"); -// System.out.println("Problem!"); -// } assertTrue(r.isSelected()); }); } diff --git a/whiterabbit/pom.xml b/whiterabbit/pom.xml index 0ea7943b..23234e3a 100644 --- a/whiterabbit/pom.xml +++ b/whiterabbit/pom.xml @@ -27,28 +27,126 @@ -Xmx1200m - org.ohdsi.whiteRabbit.WhiteRabbitMain + org.ohdsi.whiterabbit.WhiteRabbitMain whiteRabbit - org.apache.maven.plugins - maven-surefire-plugin - - 3.0.0-M8 + org.apache.maven.plugins + maven-surefire-plugin + - -Doracle.jdbc.timezoneAsRegion=false + 1 + false + ${skipUnitTests} + + false + com.github.caciocavallosilano.cacio.ctc.CTCToolkit + com.github.caciocavallosilano.cacio.ctc.CTCGraphicsEnvironment + + + -Doracle.jdbc.timezoneAsRegion=false + + --add-exports=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + + --add-opens java.base/java.lang.reflect=ALL-UNNAMED + --add-exports java.base/java.lang.reflect=ALL-UNNAMED + --add-exports=java.desktop/java.awt=ALL-UNNAMED + --add-exports=java.desktop/java.awt.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.image=ALL-UNNAMED + --add-exports=java.desktop/sun.java2d=ALL-UNNAMED + --add-exports=java.desktop/java.awt.dnd.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.event=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.datatransfer=ALL-UNNAMED + --add-exports=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.desktop/java.awt=ALL-UNNAMED + --add-opens=java.desktop/sun.java2d=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens java.base/java.lang=ALL-UNNAMED + - - - org.junit.jupiter - junit-jupiter - 5.8.2 - - - + + + org.apache.maven.plugins + maven-failsafe-plugin + + + propertyValue + ${project.build.directory} + + 1 + false + + **/*IT.java + + ${skipIntegrationTests} + + + --add-opens java.base/java.nio=ALL-UNNAMED + + --add-opens java.base/java.lang.reflect=ALL-UNNAMED + --add-exports java.base/java.lang.reflect=ALL-UNNAMED + --add-exports=java.desktop/java.awt=ALL-UNNAMED + --add-exports=java.desktop/java.awt.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.image=ALL-UNNAMED + --add-exports=java.desktop/sun.java2d=ALL-UNNAMED + --add-exports=java.desktop/java.awt.dnd.peer=ALL-UNNAMED + --add-exports=java.desktop/sun.awt=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.event=ALL-UNNAMED + --add-exports=java.desktop/sun.awt.datatransfer=ALL-UNNAMED + --add-exports=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.desktop/java.awt=ALL-UNNAMED + --add-opens=java.desktop/sun.java2d=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens java.base/java.lang=ALL-UNNAMED + + + + + + integration-test + verify + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.4.1 + + + enforce-max-java-version + verify + enforce + + + + ${org.ohdsi.whiterabbit.maxjdkversion} + test + + + true + + + + + + org.codehaus.mojo + extra-enforcer-rules + 1.7.0 + + + @@ -99,6 +197,12 @@ org.testcontainers testcontainers test + + + com.fasterxml.jackson.core + jackson-annotations + + @@ -130,6 +234,55 @@ oracle-xe test + + org.ohdsi + rabbit-core + ${project.version} + test-jar + test + + + + + + + + one.util + streamex + 0.8.2 + + + org.apache.poi + poi-ooxml-lite + 5.2.4 + compile + + + com.github.caciocavallosilano + cacio-tta + 1.17.3 + test + + + + org.assertj + assertj-swing-junit + 3.17.1 + test + + + + org.assertj + assertj-swing + 3.17.1 + test + + + + org.apache.logging.log4j + log4j-core + 2.21.1 + diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/DataType.java b/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/DataType.java deleted file mode 100644 index ee6eaf18..00000000 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/DataType.java +++ /dev/null @@ -1,5 +0,0 @@ -package org.ohdsi.whiteRabbit.scan; - -public enum DataType { - EMPTY, TEXT, DATE, INT, REAL, VARCHAR; -} diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/Console.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/Console.java similarity index 85% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/Console.java rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/Console.java index 374be963..1b0b389d 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/Console.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/Console.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit; +package org.ohdsi.whiterabbit; import java.io.IOException; import java.io.OutputStream; @@ -27,16 +27,10 @@ public class Console extends OutputStream { - private StringBuffer buffer = new StringBuffer(); + private StringBuilder buffer = new StringBuilder(); private WriteTextFile debug = null; private JTextArea textArea; - - public void println(String string) { - textArea.append(string + "\n"); - textArea.repaint(); - System.out.println(string); - } - + public void setTextArea(JTextArea textArea) { this.textArea = textArea; } @@ -66,8 +60,7 @@ public void write(int b) throws IOException { debug.writeln(buffer.toString()); debug.flush(); } - buffer = new StringBuffer(); + buffer = new StringBuilder(); } } - } diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/ErrorReport.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/ErrorReport.java similarity index 96% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/ErrorReport.java rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/ErrorReport.java index 28fe0eea..b7cd1059 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/ErrorReport.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/ErrorReport.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit; +package org.ohdsi.whiterabbit; import java.io.File; import java.text.DecimalFormat; diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/ObjectExchange.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/ObjectExchange.java similarity index 94% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/ObjectExchange.java rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/ObjectExchange.java index 643e87a0..a03891ee 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/ObjectExchange.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/ObjectExchange.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit; +package org.ohdsi.whiterabbit; import javax.swing.JFrame; diff --git a/whiterabbit/src/main/java/org/ohdsi/whiterabbit/PanelsManager.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/PanelsManager.java new file mode 100644 index 00000000..a7e7e8c3 --- /dev/null +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/PanelsManager.java @@ -0,0 +1,32 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit; + +import javax.swing.*; +import java.util.List; + +/** + * Defines the interface between the application's main class and its (Swing) components (panels). + */ +public interface PanelsManager { + void runConnectionTest(); + + JButton getAddAllButton(); + + List getComponentsToDisableWhenRunning(); +} diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/WhiteRabbitMain.java similarity index 60% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/WhiteRabbitMain.java index 13b34149..f42ceeeb 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/WhiteRabbitMain.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/WhiteRabbitMain.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit; +package org.ohdsi.whiterabbit; import java.awt.BorderLayout; import java.awt.Color; @@ -29,63 +29,57 @@ import java.awt.Toolkit; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; -import java.awt.event.WindowAdapter; -import java.awt.event.WindowEvent; -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; +import java.io.*; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; +import java.util.Set; import java.util.Vector; -import javax.swing.BorderFactory; -import javax.swing.Box; -import javax.swing.BoxLayout; -import javax.swing.JButton; -import javax.swing.JCheckBox; -import javax.swing.JComboBox; -import javax.swing.JComponent; -import javax.swing.JDialog; -import javax.swing.JFileChooser; -import javax.swing.JFrame; -import javax.swing.JLabel; -import javax.swing.JList; -import javax.swing.JMenu; -import javax.swing.JMenuBar; -import javax.swing.JMenuItem; -import javax.swing.JOptionPane; -import javax.swing.JPanel; -import javax.swing.JPasswordField; -import javax.swing.JScrollPane; -import javax.swing.JSpinner; -import javax.swing.JTabbedPane; -import javax.swing.JTextArea; -import javax.swing.JTextField; +import javax.swing.*; import javax.swing.border.TitledBorder; import javax.swing.filechooser.FileNameExtensionFilter; import org.apache.commons.csv.CSVFormat; -import org.ohdsi.databases.DbType; -import org.ohdsi.databases.RichConnection; +import org.apache.commons.io.output.TeeOutputStream; +import org.ohdsi.databases.*; +import org.ohdsi.databases.configuration.*; import org.ohdsi.utilities.DirectoryUtilities; import org.ohdsi.utilities.StringUtilities; import org.ohdsi.utilities.Version; import org.ohdsi.utilities.files.IniFile; -import org.ohdsi.whiteRabbit.fakeDataGenerator.FakeDataGenerator; -import org.ohdsi.whiteRabbit.scan.SourceDataScan; +import org.ohdsi.whiterabbit.fakeDataGenerator.FakeDataGenerator; +import org.ohdsi.whiterabbit.gui.LocationsPanel; +import org.ohdsi.whiterabbit.scan.SourceDataScan; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This is the WhiteRabbit main class */ -public class WhiteRabbitMain implements ActionListener { +public class WhiteRabbitMain implements ActionListener, PanelsManager { - public final static String DOCUMENTATION_URL = "http://ohdsi.github.io/WhiteRabbit"; - public final static String ACTION_CMD_HELP = "Open documentation"; + Logger logger = LoggerFactory.getLogger(WhiteRabbitMain.class); + + public static final String DOCUMENTATION_URL = "http://ohdsi.github.io/WhiteRabbit"; + public static final String ACTION_CMD_HELP = "Open documentation"; + + public static final String DELIMITED_TEXT_FILES = DbType.DELIMITED_TEXT_FILES.label(); + + public static final String LABEL_TEST_CONNECTION = "Test connection"; + public static final String LABEL_CONNECTION_SUCCESSFUL = "Connection successful"; + public static final String NAME_TABBED_PANE = "TabbedPane"; + public static final String LABEL_LOCATIONS = "Locations"; + public static final String LABEL_SCAN = "Scan"; + public static final String LABEL_SCAN_TABLES = "Scan tables"; + + public static final String LABEL_ADD_ALL_IN_DB = "Add all in DB"; + + public static final String TITLE_ERRORS_IN_DATABASE_CONFIGURATION = "There are errors in the database configuration"; + public static final String TITLE_WARNINGS_ABOUT_DATABASE_CONFIGURATION = "There are warnings about the database configuration"; private JFrame frame; - private JTextField folderField; private JTextField scanReportFileField; private JComboBox scanRowCount; @@ -95,43 +89,40 @@ public class WhiteRabbitMain implements ActionListener { private JComboBox numericStatsSampleSize; private JSpinner scanMinCellCount; private JSpinner generateRowCount; - private JComboBox sourceType; private JComboBox targetType; private JTextField targetUserField; private JTextField targetPasswordField; private JTextField targetServerField; private JTextField targetDatabaseField; - private JTextField sourceDelimiterField; private JComboBox targetCSVFormat; private JCheckBox doUniformSampling; - private JTextField sourceServerField; - private JTextField sourceUserField; - private JTextField sourcePasswordField; - private JTextField sourceDatabaseField; private JButton addAllButton; private JList tableList; private Vector tables = new Vector(); - private boolean sourceIsFiles = true; - private boolean sourceIsSas = false; private boolean targetIsFiles = false; + private LocationsPanel locationsPanel; + + private Console console; + + private boolean teeOutputStreams; // for testing/debugging purposes private List componentsToDisableWhenRunning = new ArrayList(); - public static void main(String[] args) { - new WhiteRabbitMain(args); + public String reportFilePath = ""; + + public static void main(String[] args) throws IOException { + new WhiteRabbitMain(false, args); } - public WhiteRabbitMain(String[] args) { - if (args.length == 2 && args[0].equalsIgnoreCase("-ini")) + public WhiteRabbitMain(boolean teeOutputStreams, String[] args) throws IOException { + this.teeOutputStreams = teeOutputStreams; + if (args.length == 2 && (args[0].equalsIgnoreCase("-ini") || args[0].equalsIgnoreCase("--ini"))) launchCommandLine(args[1]); else { frame = new JFrame("White Rabbit"); - frame.addWindowListener(new WindowAdapter() { - public void windowClosing(WindowEvent e) { - System.exit(0); - } - }); + frame.setDefaultCloseOperation(WindowConstants.DISPOSE_ON_CLOSE); + frame.setLayout(new BorderLayout()); frame.setJMenuBar(createMenuBar()); @@ -148,72 +139,82 @@ public void windowClosing(WindowEvent e) { } } - private void launchCommandLine(String iniFileName) { + public JButton getAddAllButton() { + return this.addAllButton; + } + + public List getComponentsToDisableWhenRunning() { + return this.componentsToDisableWhenRunning; + } + + private void launchCommandLine(String iniFileName) throws IOException { IniFile iniFile = new IniFile(iniFileName); - DbSettings dbSettings = new DbSettings(); - if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Delimited text files")) { - dbSettings.sourceType = DbSettings.SourceType.CSV_FILES; - if (iniFile.get("DELIMITER").equalsIgnoreCase("tab")) - dbSettings.delimiter = '\t'; - else - dbSettings.delimiter = iniFile.get("DELIMITER").charAt(0); - } else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("SAS7bdat")) { - dbSettings.sourceType = DbSettings.SourceType.SAS_FILES; + DbSettings dbSettings = getDbSettings(iniFile); + findTablesToScan(iniFile, dbSettings); + performSourceDataScan(iniFile, dbSettings); + } + + private DbSettings getDbSettings(IniFile iniFile) { + DbSettings dbSettings; + + DbType dbType = DbType.getDbType(iniFile.getDataType()); + if (dbType.supportsStorageHandler()) { + dbSettings = dbType.getStorageHandler().getDbSettings(iniFile, null, System.out); } else { - dbSettings.sourceType = DbSettings.SourceType.DATABASE; - dbSettings.user = iniFile.get("USER_NAME"); - dbSettings.password = iniFile.get("PASSWORD"); - dbSettings.server = iniFile.get("SERVER_LOCATION"); - dbSettings.database = iniFile.get("DATABASE_NAME"); - if (iniFile.get("DATA_TYPE").equalsIgnoreCase("MySQL")) - dbSettings.dbType = DbType.MYSQL; - else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Oracle")) - dbSettings.dbType = DbType.ORACLE; - else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("PostgreSQL")) - dbSettings.dbType = DbType.POSTGRESQL; - else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Redshift")) - dbSettings.dbType = DbType.REDSHIFT; - else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("SQL Server")) { - dbSettings.dbType = DbType.MSSQL; - if (iniFile.get("USER_NAME").length() != 0) { // Not using windows authentication - String[] parts = iniFile.get("USER_NAME").split("/"); - if (parts.length == 2) { - dbSettings.user = parts[1]; - dbSettings.domain = parts[0]; + dbSettings = new DbSettings(); + if (iniFile.get(DBConfiguration.DATA_TYPE_FIELD).equalsIgnoreCase(DELIMITED_TEXT_FILES)) { + dbSettings.sourceType = DbSettings.SourceType.CSV_FILES; + if (iniFile.get("DELIMITER").equalsIgnoreCase("tab")) + dbSettings.delimiter = '\t'; + else + dbSettings.delimiter = iniFile.get("DELIMITER").charAt(0); + } else if (iniFile.get(DBConfiguration.DATA_TYPE_FIELD).equalsIgnoreCase(DbType.SAS7BDAT.label())) { + dbSettings.sourceType = DbSettings.SourceType.SAS_FILES; + } else { + dbSettings.sourceType = DbSettings.SourceType.DATABASE; + dbSettings.user = iniFile.get("USER_NAME"); + dbSettings.password = iniFile.get("PASSWORD"); + dbSettings.server = iniFile.get("SERVER_LOCATION"); + dbSettings.database = iniFile.get("DATABASE_NAME"); + dbSettings.dbType = dbType; + if (dbType == DbType.SQL_SERVER) { + if (!iniFile.get("USER_NAME").isEmpty()) { // Not using windows authentication + String[] parts = iniFile.get("USER_NAME").split("/"); + if (parts.length == 2) { + dbSettings.user = parts[1]; + dbSettings.domain = parts[0]; + } } - } - } else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Azure")) { - dbSettings.dbType = DbType.AZURE; - if (iniFile.get("USER_NAME").length() != 0) { // Not using windows authentication - String[] parts = iniFile.get("USER_NAME").split("/"); - if (parts.length == 2) { - dbSettings.user = parts[1]; - dbSettings.domain = parts[0]; + } else if (dbType == DbType.AZURE) { + if (!iniFile.get("USER_NAME").isEmpty()) { // Not using windows authentication + String[] parts = iniFile.get("USER_NAME").split("/"); + if (parts.length == 2) { + dbSettings.user = parts[1]; + dbSettings.domain = parts[0]; + } } - } - } else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("PDW")) { - dbSettings.dbType = DbType.PDW; - if (iniFile.get("USER_NAME").length() != 0) { // Not using windows authentication - String[] parts = iniFile.get("USER_NAME").split("/"); - if (parts.length == 2) { - dbSettings.user = parts[1]; - dbSettings.domain = parts[0]; + } else if (dbType == DbType.PDW) { + if (!iniFile.get("USER_NAME").isEmpty()) { // Not using windows authentication + String[] parts = iniFile.get("USER_NAME").split("/"); + if (parts.length == 2) { + dbSettings.user = parts[1]; + dbSettings.domain = parts[0]; + } } + } else if (dbType == DbType.BIGQUERY) { + /* GBQ requires database. Putting database into domain var for connect() */ + dbSettings.domain = dbSettings.database; } - } else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("MS Access")) - dbSettings.dbType = DbType.MSACCESS; - else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("Teradata")) - dbSettings.dbType = DbType.TERADATA; - else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("BigQuery")) { - dbSettings.dbType = DbType.BIGQUERY; - /* GBQ requires database. Putting database into domain var for connect() */ - dbSettings.domain = dbSettings.database; } } + return dbSettings; + } + + private void findTablesToScan(IniFile iniFile, DbSettings dbSettings) { if (iniFile.get("TABLES_TO_SCAN").equalsIgnoreCase("*")) { if (dbSettings.sourceType == DbSettings.SourceType.DATABASE) { - try (RichConnection connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + try (RichConnection connection = new RichConnection(dbSettings)) { dbSettings.tables.addAll(connection.getTableNames(dbSettings.database)); } } else { @@ -242,7 +243,9 @@ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("BigQuery")) { dbSettings.tables.add(table); } } + } + private void performSourceDataScan(IniFile iniFile, DbSettings dbSettings) throws IOException { SourceDataScan sourceDataScan = new SourceDataScan(); int maxRows = Integer.parseInt(iniFile.get("ROWS_PER_TABLE")); boolean scanValues = iniFile.get("SCAN_FIELD_VALUES").equalsIgnoreCase("yes"); @@ -265,17 +268,19 @@ else if (iniFile.get("DATA_TYPE").equalsIgnoreCase("BigQuery")) { sourceDataScan.setMaxValues(maxValues); sourceDataScan.setCalculateNumericStats(calculateNumericStats); sourceDataScan.setNumStatsSamplerSize(numericStatsSamplerSize); - sourceDataScan.process(dbSettings, iniFile.get("WORKING_FOLDER") + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME); + reportFilePath = iniFile.get("WORKING_FOLDER") + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME; + sourceDataScan.process(dbSettings, reportFilePath); } private JComponent createTabsPanel() { JTabbedPane tabbedPane = new JTabbedPane(); + tabbedPane.setName(NAME_TABBED_PANE); - JPanel locationPanel = createLocationsPanel(); - tabbedPane.addTab("Locations", null, locationPanel, "Specify the location of the source data and the working folder"); + this.locationsPanel = createLocationsPanel(componentsToDisableWhenRunning); + tabbedPane.addTab(LABEL_LOCATIONS, null, locationsPanel, "Specify the location of the source data and the working folder"); JPanel scanPanel = createScanPanel(); - tabbedPane.addTab("Scan", null, scanPanel, "Create a scan of the source data"); + tabbedPane.addTab(LABEL_SCAN, null, scanPanel, "Create a scan of the source data"); JPanel fakeDataPanel = createFakeDataPanel(); tabbedPane.addTab("Fake data generation", null, fakeDataPanel, "Create fake data based on a scan report for development purposes"); @@ -283,136 +288,10 @@ private JComponent createTabsPanel() { return tabbedPane; } - private JPanel createLocationsPanel() { - JPanel panel = new JPanel(); - - panel.setLayout(new GridBagLayout()); - GridBagConstraints c = new GridBagConstraints(); - c.fill = GridBagConstraints.BOTH; - c.weightx = 0.5; - - JPanel folderPanel = new JPanel(); - folderPanel.setLayout(new BoxLayout(folderPanel, BoxLayout.X_AXIS)); - folderPanel.setBorder(BorderFactory.createTitledBorder("Working folder")); - folderField = new JTextField(); - folderField.setText((new File("").getAbsolutePath())); - folderField.setToolTipText("The folder where all output will be written"); - folderPanel.add(folderField); - JButton pickButton = new JButton("Pick folder"); - pickButton.setToolTipText("Pick a different working folder"); - folderPanel.add(pickButton); - pickButton.addActionListener(e -> pickFolder()); - componentsToDisableWhenRunning.add(pickButton); - c.gridx = 0; - c.gridy = 0; - c.gridwidth = 1; - panel.add(folderPanel, c); - - JPanel sourcePanel = new JPanel(); - sourcePanel.setLayout(new GridLayout(0, 2)); - sourcePanel.setBorder(BorderFactory.createTitledBorder("Source data location")); - sourcePanel.add(new JLabel("Data type")); - sourceType = new JComboBox<>(new String[] { "Delimited text files", "SAS7bdat", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "PDW", "Redshift", "Teradata", "BigQuery", "Azure"}); - sourceType.setToolTipText("Select the type of source data available"); - sourceType.addItemListener(itemEvent -> { - String selectedSourceType = itemEvent.getItem().toString(); - sourceIsFiles = selectedSourceType.equals("Delimited text files"); - sourceIsSas = selectedSourceType.equals("SAS7bdat"); - boolean sourceIsDatabase = !(sourceIsFiles || sourceIsSas); - - sourceServerField.setEnabled(sourceIsDatabase); - sourceUserField.setEnabled(sourceIsDatabase); - sourcePasswordField.setEnabled(sourceIsDatabase); - sourceDatabaseField.setEnabled(sourceIsDatabase && !selectedSourceType.equals("Azure")); - sourceDelimiterField.setEnabled(sourceIsFiles); - addAllButton.setEnabled(sourceIsDatabase); - - if (sourceIsDatabase && selectedSourceType.equals("Oracle")) { - sourceServerField.setToolTipText("For Oracle servers this field contains the SID, servicename, and optionally the port: '/', ':/', '/', or ':/'"); - sourceUserField.setToolTipText("For Oracle servers this field contains the name of the user used to log in"); - sourcePasswordField.setToolTipText("For Oracle servers this field contains the password corresponding to the user"); - sourceDatabaseField.setToolTipText("For Oracle servers this field contains the schema (i.e. 'user' in Oracle terms) containing the source tables"); - } else if (sourceIsDatabase && selectedSourceType.equals("PostgreSQL")) { - sourceServerField.setToolTipText("For PostgreSQL servers this field contains the host name and database name (/)"); - sourceUserField.setToolTipText("The user used to log in to the server"); - sourcePasswordField.setToolTipText("The password used to log in to the server"); - sourceDatabaseField.setToolTipText("For PostgreSQL servers this field contains the schema containing the source tables"); - } else if (sourceIsDatabase && selectedSourceType.equals("BigQuery")) { - sourceServerField.setToolTipText("GBQ SA & UA: ProjectID"); - sourceUserField.setToolTipText("GBQ SA only: OAuthServiceAccountEMAIL"); - sourcePasswordField.setToolTipText("GBQ SA only: OAuthPvtKeyPath"); - sourceDatabaseField.setToolTipText("GBQ SA & UA: Data Set within ProjectID"); - } else if (sourceIsDatabase) { - if (selectedSourceType.equals("Azure")) { - sourceServerField.setToolTipText("For Azure, this field contains the host name and database name (;database=)"); - } else { - sourceServerField.setToolTipText("This field contains the name or IP address of the database server"); - } - if (selectedSourceType.equals("SQL Server")) { - sourceUserField.setToolTipText("The user used to log in to the server. Optionally, the domain can be specified as / (e.g. 'MyDomain/Joe')"); - } else { - sourceUserField.setToolTipText("The user used to log in to the server"); - } - sourcePasswordField.setToolTipText("The password used to log in to the server"); - if (selectedSourceType.equals("Azure")) { - sourceDatabaseField.setToolTipText("For Azure, leave this empty"); - } else { - sourceDatabaseField.setToolTipText("The name of the database containing the source tables"); - } - } - }); - sourcePanel.add(sourceType); - - sourcePanel.add(new JLabel("Server location")); - sourceServerField = new JTextField("127.0.0.1"); - sourceServerField.setEnabled(false); - sourcePanel.add(sourceServerField); - sourcePanel.add(new JLabel("User name")); - sourceUserField = new JTextField(""); - sourceUserField.setEnabled(false); - sourcePanel.add(sourceUserField); - sourcePanel.add(new JLabel("Password")); - sourcePasswordField = new JPasswordField(""); - sourcePasswordField.setEnabled(false); - sourcePanel.add(sourcePasswordField); - sourcePanel.add(new JLabel("Database name")); - sourceDatabaseField = new JTextField(""); - sourceDatabaseField.setEnabled(false); - sourcePanel.add(sourceDatabaseField); - - sourcePanel.add(new JLabel("Delimiter")); - sourceDelimiterField = new JTextField(","); - sourceDelimiterField.setToolTipText("The delimiter that separates values. Enter 'tab' for tab."); - sourcePanel.add(sourceDelimiterField); - - c.gridx = 0; - c.gridy = 1; - c.gridwidth = 1; - panel.add(sourcePanel, c); - - JPanel testConnectionButtonPanel = new JPanel(); - testConnectionButtonPanel.setLayout(new BoxLayout(testConnectionButtonPanel, BoxLayout.X_AXIS)); - testConnectionButtonPanel.add(Box.createHorizontalGlue()); - - JButton testConnectionButton = new JButton("Test connection"); - testConnectionButton.setBackground(new Color(151, 220, 141)); - testConnectionButton.setToolTipText("Test the connection"); - testConnectionButton.addActionListener(new ActionListener() { - public void actionPerformed(ActionEvent e) { - testConnection(getSourceDbSettings()); - } - }); - componentsToDisableWhenRunning.add(testConnectionButton); - testConnectionButtonPanel.add(testConnectionButton); - - c.gridx = 0; - c.gridy = 2; - c.gridwidth = 1; - panel.add(testConnectionButtonPanel, c); - - return panel; + private LocationsPanel createLocationsPanel(List componentsToDisableWhenRunning) { + return new LocationsPanel(frame, this); } - + private JPanel createScanPanel() { JPanel panel = new JPanel(); panel.setLayout(new BorderLayout()); @@ -421,12 +300,14 @@ private JPanel createScanPanel() { tablePanel.setLayout(new BorderLayout()); tablePanel.setBorder(new TitledBorder("Tables to scan")); tableList = new JList(); + tableList.setName("TableList"); tableList.setToolTipText("Specify the tables (or CSV files) to be scanned here"); tablePanel.add(new JScrollPane(tableList), BorderLayout.CENTER); JPanel tableButtonPanel = new JPanel(); tableButtonPanel.setLayout(new GridLayout(3, 1)); - addAllButton = new JButton("Add all in DB"); + addAllButton = new JButton(LABEL_ADD_ALL_IN_DB); + addAllButton.setName(LABEL_ADD_ALL_IN_DB); addAllButton.setToolTipText("Add all tables in the database"); addAllButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { @@ -436,6 +317,7 @@ public void actionPerformed(ActionEvent e) { addAllButton.setEnabled(false); tableButtonPanel.add(addAllButton); JButton addButton = new JButton("Add"); + addButton.setName("Add"); addButton.setToolTipText("Add tables to list"); addButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { @@ -515,7 +397,8 @@ public void actionPerformed(ActionEvent e) { southPanel.add(Box.createVerticalStrut(3)); - JButton scanButton = new JButton("Scan tables"); + JButton scanButton = new JButton(LABEL_SCAN_TABLES); + scanButton.setName(LABEL_SCAN_TABLES); scanButton.setBackground(new Color(151, 220, 141)); scanButton.setToolTipText("Scan the selected tables"); scanButton.addActionListener(new ActionListener() { @@ -564,10 +447,10 @@ public void actionPerformed(ActionEvent e) { targetPanel.setLayout(new GridLayout(0, 2)); targetPanel.setBorder(BorderFactory.createTitledBorder("Target data location")); targetPanel.add(new JLabel("Data type")); - targetType = new JComboBox<>(new String[] {"Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "PDW"}); + targetType = new JComboBox<>(new String[] {DELIMITED_TEXT_FILES, "MySQL", "Oracle", "SQL Server", "PostgreSQL", "PDW"}); targetType.setToolTipText("Select the type of source data available"); targetType.addItemListener(event -> { - targetIsFiles = event.getItem().toString().equals("Delimited text files"); + targetIsFiles = event.getItem().toString().equals(DELIMITED_TEXT_FILES); targetServerField.setEnabled(!targetIsFiles); targetUserField.setEnabled(!targetIsFiles); targetPasswordField.setEnabled(!targetIsFiles); @@ -649,12 +532,13 @@ public void actionPerformed(ActionEvent e) { fakeDataButtonPanel.add(generateRowCount); fakeDataButtonPanel.add(Box.createHorizontalGlue()); - JButton testConnectionButton = new JButton("Test connection"); + JButton testConnectionButton = new JButton(LABEL_TEST_CONNECTION); + testConnectionButton.setName(LABEL_TEST_CONNECTION); testConnectionButton.setBackground(new Color(151, 220, 141)); testConnectionButton.setToolTipText("Test the connection"); testConnectionButton.addActionListener(new ActionListener() { public void actionPerformed(ActionEvent e) { - testConnection(getTargetDbSettings()); + testConnection(getTargetDbSettings(), null); } }); componentsToDisableWhenRunning.add(testConnectionButton); @@ -681,20 +565,37 @@ public void actionPerformed(ActionEvent e) { private JComponent createConsolePanel() { JTextArea consoleArea = new JTextArea(); + consoleArea.setName("Console"); consoleArea.setToolTipText("General progress information"); consoleArea.setEditable(false); - Console console = new Console(); + this.console = new Console(); console.setTextArea(consoleArea); - System.setOut(new PrintStream(console)); - System.setErr(new PrintStream(console)); + setOutputStreamsToConsole(console); JScrollPane consoleScrollPane = new JScrollPane(consoleArea); consoleScrollPane.setBorder(BorderFactory.createTitledBorder("Console")); - consoleScrollPane.setPreferredSize(new Dimension(800, 200)); + consoleScrollPane.setPreferredSize(new Dimension(800, 180)); consoleScrollPane.setAutoscrolls(true); ObjectExchange.console = console; return consoleScrollPane; } + private void setOutputStreamsToConsole(Console console) { + if (teeOutputStreams) { + System.setOut(new PrintStream(new TeeOutputStream(System.out, new PrintStream(console)))); + System.setErr(new PrintStream(new TeeOutputStream(System.err, new PrintStream(console)))); + } else { + System.setOut(new PrintStream(console)); + System.setErr(new PrintStream(console)); + } + + Thread resetOutputStreams = new Thread(() -> { + System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out))); + System.setErr(new PrintStream(new FileOutputStream(FileDescriptor.err))); + logger = LoggerFactory.getLogger(WhiteRabbitMain.class); + }); + Runtime.getRuntime().addShutdownHook(resetOutputStreams); + } + private void loadIcons(JFrame f) { List icons = new ArrayList(); icons.add(loadIcon("WhiteRabbit16.png", f)); @@ -719,23 +620,9 @@ private Image loadIcon(String name, JFrame f) { return null; } - private void pickFolder() { - JFileChooser fileChooser = new JFileChooser(new File(folderField.getText())); - fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); - int returnVal = fileChooser.showDialog(frame, "Select folder"); - if (returnVal == JFileChooser.APPROVE_OPTION) { - File selectedDirectory = fileChooser.getSelectedFile(); - if (!selectedDirectory.exists()) { - // When no directory is selected when approving, FileChooser incorrectly appends the current directory to the path. - // Take the opened directory instead. - selectedDirectory = fileChooser.getCurrentDirectory(); - } - folderField.setText(selectedDirectory.getAbsolutePath()); - } - } private void pickScanReportFile() { - JFileChooser fileChooser = new JFileChooser(new File(folderField.getText())); + JFileChooser fileChooser = new JFileChooser(new File(locationsPanel.getFolderField().getText())); fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY); int returnVal = fileChooser.showDialog(frame, "Select scan report file"); if (returnVal == JFileChooser.APPROVE_OPTION) @@ -750,13 +637,12 @@ private void removeTables() { } private void addAllTables() { - DbSettings sourceDbSettings = getSourceDbSettings(); - if (sourceDbSettings != null) { - RichConnection connection = new RichConnection(sourceDbSettings.server, sourceDbSettings.domain, sourceDbSettings.user, sourceDbSettings.password, - sourceDbSettings.dbType); - for (String table : connection.getTableNames(sourceDbSettings.database)) { + DbSettings dbSettings = getSourceDbSettings(null); + if (dbSettings != null) { + RichConnection connection = new RichConnection(dbSettings); + for (String table : connection.getTableNames(dbSettings.database)) { if (!tables.contains(table)) - tables.add((String) table); + tables.add(table); tableList.setListData(tables); } connection.close(); @@ -764,23 +650,15 @@ private void addAllTables() { } private void pickTables() { - DbSettings sourceDbSettings = getSourceDbSettings(); + DbSettings sourceDbSettings = getSourceDbSettings(null); if (sourceDbSettings != null) { if (sourceDbSettings.sourceType == DbSettings.SourceType.CSV_FILES || sourceDbSettings.sourceType == DbSettings.SourceType.SAS_FILES) { - JFileChooser fileChooser = new JFileChooser(new File(folderField.getText())); - fileChooser.setMultiSelectionEnabled(true); - fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY); - - if (sourceDbSettings.sourceType == DbSettings.SourceType.CSV_FILES) { - fileChooser.setFileFilter(new FileNameExtensionFilter("Delimited text files", "csv", "txt")); - } else if (sourceDbSettings.sourceType == DbSettings.SourceType.SAS_FILES) { - fileChooser.setFileFilter(new FileNameExtensionFilter("SAS Data Files", "sas7bdat")); - } + JFileChooser fileChooser = getjFileChooser(sourceDbSettings, locationsPanel); int returnVal = fileChooser.showDialog(frame, "Select tables"); if (returnVal == JFileChooser.APPROVE_OPTION) { for (File table : fileChooser.getSelectedFiles()) { - String tableName = DirectoryUtilities.getRelativePath(new File(folderField.getText()), table); + String tableName = DirectoryUtilities.getRelativePath(new File(locationsPanel.getFolderField().getText()), table); if (!tables.contains(tableName)) tables.add(tableName); tableList.setListData(tables); @@ -788,10 +666,9 @@ private void pickTables() { } } else if (sourceDbSettings.sourceType == DbSettings.SourceType.DATABASE) { - RichConnection connection = new RichConnection(sourceDbSettings.server, sourceDbSettings.domain, sourceDbSettings.user, - sourceDbSettings.password, sourceDbSettings.dbType); + RichConnection connection = new RichConnection(sourceDbSettings); String tableNames = StringUtilities.join(connection.getTableNames(sourceDbSettings.database), "\t"); - if (tableNames.length() == 0) { + if (tableNames.isEmpty()) { JOptionPane.showMessageDialog(frame, "No tables found in database " + sourceDbSettings.database, "Error fetching table names", JOptionPane.ERROR_MESSAGE); } else { @@ -809,78 +686,102 @@ private void pickTables() { } } - private DbSettings getSourceDbSettings() { - DbSettings dbSettings = new DbSettings(); - if (sourceType.getSelectedItem().equals("Delimited text files")) { - dbSettings.sourceType = DbSettings.SourceType.CSV_FILES; - if (sourceDelimiterField.getText().length() == 0) { - JOptionPane.showMessageDialog(frame, "Delimiter field cannot be empty for source database", "Error connecting to server", - JOptionPane.ERROR_MESSAGE); - return null; - } - if (sourceDelimiterField.getText().equalsIgnoreCase("tab")) - dbSettings.delimiter = '\t'; - else - dbSettings.delimiter = sourceDelimiterField.getText().charAt(0); - } else if (sourceType.getSelectedItem().equals("SAS7bdat")) { - dbSettings.sourceType = DbSettings.SourceType.SAS_FILES; - } else { - dbSettings.sourceType = DbSettings.SourceType.DATABASE; - dbSettings.user = sourceUserField.getText(); - dbSettings.password = sourcePasswordField.getText(); - dbSettings.server = sourceServerField.getText(); - dbSettings.database = sourceDatabaseField.getText().trim().length() == 0 ? null : sourceDatabaseField.getText(); - if (sourceType.getSelectedItem().toString().equals("MySQL")) - dbSettings.dbType = DbType.MYSQL; - else if (sourceType.getSelectedItem().toString().equals("Oracle")) - dbSettings.dbType = DbType.ORACLE; - else if (sourceType.getSelectedItem().toString().equals("PostgreSQL")) - dbSettings.dbType = DbType.POSTGRESQL; - else if (sourceType.getSelectedItem().toString().equals("BigQuery")) - dbSettings.dbType = DbType.BIGQUERY; - else if (sourceType.getSelectedItem().toString().equals("Redshift")) - dbSettings.dbType = DbType.REDSHIFT; - else if (sourceType.getSelectedItem().toString().equals("SQL Server")) { - dbSettings.dbType = DbType.MSSQL; - if (sourceUserField.getText().length() != 0) { // Not using windows authentication - String[] parts = sourceUserField.getText().split("/"); - if (parts.length == 2) { - dbSettings.user = parts[1]; - dbSettings.domain = parts[0]; - } - } - } else if (sourceType.getSelectedItem().toString().equals("PDW")) { - dbSettings.dbType = DbType.PDW; - if (sourceUserField.getText().length() != 0) { // Not using windows authentication - String[] parts = sourceUserField.getText().split("/"); - if (parts.length == 2) { - dbSettings.user = parts[1]; - dbSettings.domain = parts[0]; + private static JFileChooser getjFileChooser(DbSettings sourceDbSettings, LocationsPanel locationsPanel) { + JFileChooser fileChooser = new JFileChooser(new File(locationsPanel.getFolderField().getText())); + fileChooser.setName("FileChooser"); + fileChooser.setMultiSelectionEnabled(true); + fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY); + + if (sourceDbSettings.sourceType == DbSettings.SourceType.CSV_FILES) { + fileChooser.setFileFilter(new FileNameExtensionFilter(DELIMITED_TEXT_FILES, "csv", "txt")); + } else if (sourceDbSettings.sourceType == DbSettings.SourceType.SAS_FILES) { + fileChooser.setFileFilter(new FileNameExtensionFilter("SAS Data Files", DbType.SAS7BDAT.name().toLowerCase())); + } + return fileChooser; + } + + private DbSettings getSourceDbSettings(ValidationFeedback feedback) { + DbType dbChoice = locationsPanel.getCurrentDbChoice(); + DbSettings dbSettings; + if (dbChoice != null && dbChoice.supportsStorageHandler()) { + dbSettings = locationsPanel.getCurrentDbChoice().getStorageHandler().getDbSettings(feedback); + return dbSettings; + } else { + String sourceDelimiterField = locationsPanel.getSourceDelimiterField().getText(); + String sourceType = locationsPanel.getSelectedSourceType(); + dbSettings = new DbSettings(); + if (sourceType.equals(DbType.DELIMITED_TEXT_FILES.label())) { + dbSettings.dbType = DbType.DELIMITED_TEXT_FILES; + dbSettings.sourceType = DbSettings.SourceType.CSV_FILES; + if (sourceDelimiterField.isEmpty()) { + JOptionPane.showMessageDialog(frame, "Delimiter field cannot be empty for source database", "Error connecting to server", + JOptionPane.ERROR_MESSAGE); + return null; + } + if (sourceDelimiterField.equalsIgnoreCase("tab")) + dbSettings.delimiter = '\t'; + else + dbSettings.delimiter = locationsPanel.getSourceDelimiterField().getText().charAt(0); + } else if (sourceType.equalsIgnoreCase(DbType.SAS7BDAT.label())) { + dbSettings.sourceType = DbSettings.SourceType.SAS_FILES; + dbSettings.dbType = DbType.SAS7BDAT; + } else { + dbSettings.sourceType = DbSettings.SourceType.DATABASE; + dbSettings.user = locationsPanel.getSourceUserField(); + dbSettings.password = locationsPanel.getSourcePasswordField(); + dbSettings.server = locationsPanel.getSourceServerField(); + String sourceDatabaseField = locationsPanel.getSourceDatabaseField(); + dbSettings.database = sourceDatabaseField.trim().isEmpty() ? null : sourceDatabaseField; + dbSettings.dbType = dbChoice; + if (dbChoice == DbType.SQL_SERVER) { + if (!dbSettings.user.isEmpty()) { // Not using windows authentication + String[] parts = dbSettings.user.split("/"); + if (parts.length == 2) { + dbSettings.user = parts[1]; + dbSettings.domain = parts[0]; + } + } + } else if (dbChoice == DbType.PDW) { + if (!dbSettings.user.isEmpty()) { // Not using windows authentication + String[] parts = dbSettings.user.split("/"); + if (parts.length == 2) { + dbSettings.user = parts[1]; + dbSettings.domain = parts[0]; + } } - } - } else if (sourceType.getSelectedItem().toString().equals("MS Access")) - dbSettings.dbType = DbType.MSACCESS; - else if (sourceType.getSelectedItem().toString().equals("Teradata")) - dbSettings.dbType = DbType.TERADATA; - else if (sourceType.getSelectedItem().toString().equals("Azure")) { - dbSettings.dbType = DbType.AZURE; - dbSettings.database = ""; - } + } else if (dbChoice == DbType.AZURE) { + dbSettings.database = ""; + } + } + return dbSettings; + } + } + + public void runConnectionTest() { + ValidationFeedback feedback = new ValidationFeedback(); + DbSettings dbSettings = getSourceDbSettings(feedback); + if (dbSettings != null) { + testConnection(dbSettings, feedback); + } else { + throw new DBConfigurationException("Source database settings were not initialized"); } - return dbSettings; } - private void testConnection(DbSettings dbSettings) { + private void testConnection(DbSettings dbSettings, ValidationFeedback feedback) { + if (feedbackBlocksContinuation(feedback)) { + return; + } + String folder = locationsPanel.getFolderField().getText(); if (dbSettings.sourceType == DbSettings.SourceType.CSV_FILES || dbSettings.sourceType == DbSettings.SourceType.SAS_FILES) { - if (new File(folderField.getText()).exists()) { - String message = "Folder " + folderField.getText() + " found"; + if (new File(folder).exists()) { + String message = "Folder " + folder + " found"; JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "Working folder found", JOptionPane.INFORMATION_MESSAGE); } else { - String message = "Folder " + folderField.getText() + " not found"; + String message = "Folder " + folder + " not found"; JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "Working folder not found", JOptionPane.ERROR_MESSAGE); } } else { - if (sourceDatabaseField.isEnabled() && (dbSettings.database == null || dbSettings.database.equals(""))) { + if (locationsPanel.isSourceDatabaseFieldEnabled() && (dbSettings.database == null || dbSettings.database.equals(""))) { JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap("Please specify database name", 80), "Error connecting to server", JOptionPane.ERROR_MESSAGE); return; @@ -893,7 +794,7 @@ private void testConnection(DbSettings dbSettings) { RichConnection connection; try { - connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType); + connection = new RichConnection(dbSettings); } catch (Exception e) { String message = "Could not connect: " + e.getMessage(); JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "Error connecting to server", JOptionPane.ERROR_MESSAGE); @@ -901,7 +802,7 @@ private void testConnection(DbSettings dbSettings) { } try { List tableNames = connection.getTableNames(dbSettings.database); - if (tableNames.size() == 0) + if (tableNames.isEmpty()) throw new RuntimeException("Unable to retrieve table names for database " + dbSettings.database); } catch (Exception e) { String message = "Could not connect to database: " + e.getMessage(); @@ -910,15 +811,57 @@ private void testConnection(DbSettings dbSettings) { } connection.close(); - String message = "Succesfully connected to " + dbSettings.database + " on server " + dbSettings.server; - JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "Connection succesful", JOptionPane.INFORMATION_MESSAGE); + String message = "Successfully connected to " + dbSettings.database + " on server " + dbSettings.server; + JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), LABEL_CONNECTION_SUCCESSFUL, JOptionPane.INFORMATION_MESSAGE); + } + } + + private boolean feedbackBlocksContinuation(ValidationFeedback feedback) { + if (feedback == null || (!feedback.hasWarnings() && !feedback.hasErrors())) { + return false; + } else { + if (feedback.hasErrors()) { + showFeedback(feedback); + return true; + } + if (feedback.hasWarnings()) { + showFeedback(feedback); + return false; + } + } + return false; + } + private void showFeedback(ValidationFeedback feedback) { + if (feedback == null) { + return; } + String message = ""; + String title = ""; + int messageType = JOptionPane.INFORMATION_MESSAGE; + if (feedback.hasErrors()) { + title = TITLE_ERRORS_IN_DATABASE_CONFIGURATION; + message = createMessage(feedback.getErrors().keySet()); + messageType = JOptionPane.ERROR_MESSAGE; + } else if (feedback.hasWarnings()) { + title = TITLE_WARNINGS_ABOUT_DATABASE_CONFIGURATION; + message = createMessage(feedback.getWarnings().keySet()); + messageType = JOptionPane.WARNING_MESSAGE; + } + JOptionPane.showMessageDialog(ObjectExchange.frame, message, title, messageType); + } + + private static String createMessage(Set messages) { + StringBuilder messageBuilder = new StringBuilder(); + for (String message : messages) { + messageBuilder.append(String.format("%s%n", message)); + } + return messageBuilder.toString(); } private DbSettings getTargetDbSettings() { DbSettings dbSettings = new DbSettings(); - if (targetType.getSelectedItem().equals("Delimited text files")) { + if (targetType.getSelectedItem().equals(DELIMITED_TEXT_FILES)) { dbSettings.sourceType = DbSettings.SourceType.CSV_FILES; switch(targetCSVFormat.getSelectedItem().toString()) { @@ -954,7 +897,7 @@ private DbSettings getTargetDbSettings() { dbSettings.dbType = DbType.POSTGRESQL; break; case "SQL Server": - dbSettings.dbType = DbType.MSSQL; + dbSettings.dbType = DbType.SQL_SERVER; if (targetUserField.getText().length() != 0) { // Not using windows authentication String[] parts = targetUserField.getText().split("/"); if (parts.length == 2) { @@ -986,7 +929,7 @@ private DbSettings getTargetDbSettings() { private void scanRun() { if (tables.size() == 0) { - if (sourceIsFiles || sourceIsSas) { + if (locationsPanel.sourceIsFiles() || locationsPanel.sourceIsSas()) { String message = "No files selected for scanning"; JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "No files selected", JOptionPane.ERROR_MESSAGE); return; @@ -1005,7 +948,7 @@ private void scanRun() { int valuesCount = StringUtilities.numericOptionToInt(scanValuesCount.getSelectedItem().toString()); int numStatsSamplerSize = StringUtilities.numericOptionToInt(numericStatsSampleSize.getSelectedItem().toString()); - ScanThread scanThread = new ScanThread( + ScanRunner scanscanRunner = new ScanRunner( rowCount, valuesCount, scanValueScan.isSelected(), @@ -1013,7 +956,7 @@ private void scanRun() { calculateNumericStats.isSelected(), numStatsSamplerSize ); - scanThread.start(); + scanscanRunner.run(); } private void fakeDataRun() { @@ -1022,16 +965,16 @@ private void fakeDataRun() { String message = "File " + filename + " not found"; JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "File not found", JOptionPane.ERROR_MESSAGE); } else { - FakeDataThread thread = new FakeDataThread(); - thread.start(); + FakeDataRunner runner = new FakeDataRunner(); + runner.run(); } } - private class ScanThread extends Thread { + private class ScanRunner implements Runnable { SourceDataScan sourceDataScan = new SourceDataScan(); - public ScanThread(int maxRows, int maxValues, boolean scanValues, int minCellCount, boolean calculateNumericStats, int numericStatsSampleSize) { + public ScanRunner(int maxRows, int maxValues, boolean scanValues, int minCellCount, boolean calculateNumericStats, int numericStatsSampleSize) { sourceDataScan.setSampleSize(maxRows); sourceDataScan.setScanValues(scanValues); sourceDataScan.setMinCellCount(minCellCount); @@ -1044,14 +987,14 @@ public void run() { for (JComponent component : componentsToDisableWhenRunning) component.setEnabled(false); try { - DbSettings dbSettings = getSourceDbSettings(); + DbSettings dbSettings = getSourceDbSettings(null); if (dbSettings != null) { for (String table : tables) { if (dbSettings.sourceType == DbSettings.SourceType.CSV_FILES || dbSettings.sourceType == DbSettings.SourceType.SAS_FILES) - table = folderField.getText() + "/" + table; + table = locationsPanel.getFolderField().getText() + "/" + table; dbSettings.tables.add(table); } - sourceDataScan.process(dbSettings, folderField.getText() + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME); + sourceDataScan.process(dbSettings, locationsPanel.getFolderField().getText() + "/" + SourceDataScan.SCAN_REPORT_FILE_NAME); } } catch (Exception e) { handleError(e); @@ -1060,10 +1003,9 @@ public void run() { component.setEnabled(true); } } - } - private class FakeDataThread extends Thread { + private class FakeDataRunner implements Runnable { public void run() { for (JComponent component : componentsToDisableWhenRunning) { @@ -1077,7 +1019,7 @@ public void run() { dbSettings, Integer.parseInt(generateRowCount.getValue().toString()), scanReportFileField.getText(), - folderField.getText(), + locationsPanel.getFolderField().getText(), doUniformSampling.isSelected() ); } @@ -1164,11 +1106,11 @@ private void doOpenDocumentation() { } private void handleError(Exception e) { - System.err.println("Error: " + e.getMessage()); - String errorReportFilename = ErrorReport.generate(folderField.getText(), e); + logger.error(e.getMessage(), e); + String errorReportFilename = ErrorReport.generate(locationsPanel.getFolderField().getText(), e); String message = "Error: " + e.getLocalizedMessage(); message += "\nAn error report has been generated:\n" + errorReportFilename; - System.out.println(message); + logger.error(message); JOptionPane.showMessageDialog(frame, StringUtilities.wordWrap(message, 80), "Error", JOptionPane.ERROR_MESSAGE); } @@ -1189,4 +1131,11 @@ private JMenuBar createMenuBar() { return menuBar; } + public JFrame getFrame() { + return frame; + } + + public Console getConsole() { + return console; + } } diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/fakeDataGenerator/FakeDataGenerator.java similarity index 91% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/fakeDataGenerator/FakeDataGenerator.java index c35f41f2..3b86b797 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/fakeDataGenerator/FakeDataGenerator.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/fakeDataGenerator/FakeDataGenerator.java @@ -15,10 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit.fakeDataGenerator; +package org.ohdsi.whiterabbit.fakeDataGenerator; import java.util.*; +import org.ohdsi.databases.configuration.DbSettings; import org.ohdsi.databases.RichConnection; import org.ohdsi.rabbitInAHat.dataModel.Database; import org.ohdsi.rabbitInAHat.dataModel.Field; @@ -27,9 +28,11 @@ import org.ohdsi.utilities.StringUtilities; import org.ohdsi.utilities.files.Row; import org.ohdsi.utilities.files.WriteCSVFileWithHeader; -import org.ohdsi.whiteRabbit.DbSettings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class FakeDataGenerator { + static Logger logger = LoggerFactory.getLogger(FakeDataGenerator.class); private RichConnection connection; private int maxRowsPerTable = 1000; @@ -49,16 +52,16 @@ public void generateData(DbSettings dbSettings, int maxRowsPerTable, String file this.doUniformSampling = doUniformSampling; StringUtilities.outputWithTime("Starting creation of fake data"); - System.out.println("Loading scan report from " + filename); + logger.info("Loading scan report from {}", filename); Database database = Database.generateModelFromScanReport(filename); if (targetType == DbSettings.SourceType.DATABASE) { - connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType); + connection = new RichConnection(dbSettings); connection.use(dbSettings.database); for (Table table : database.getTables()) { if (table.getName().toLowerCase().endsWith(".csv")) table.setName(table.getName().substring(0, table.getName().length() - 4)); - System.out.println("Generating table " + table.getName()); + logger.info("Generating table {}", table.getName()); createTable(table); connection.insertIntoTable(generateRows(table).iterator(), table.getName(), false); } @@ -68,7 +71,7 @@ public void generateData(DbSettings dbSettings, int maxRowsPerTable, String file String name = folder + "/" + table.getName(); if (!name.toLowerCase().endsWith(".csv")) name = name + ".csv"; - System.out.println("Generating table " + name); + logger.info("Generating table {}", name); WriteCSVFileWithHeader out = new WriteCSVFileWithHeader(name, dbSettings.csvFormat); for (Row row : generateRows(table)) out.write(row); diff --git a/whiterabbit/src/main/java/org/ohdsi/whiterabbit/gui/LocationsPanel.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/gui/LocationsPanel.java new file mode 100644 index 00000000..ad5b3cbf --- /dev/null +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/gui/LocationsPanel.java @@ -0,0 +1,354 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.gui; + +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.databases.configuration.DBConfiguration; +import org.ohdsi.whiterabbit.PanelsManager; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.swing.*; +import javax.swing.event.DocumentEvent; +import javax.swing.event.DocumentListener; +import java.awt.*; +import java.awt.event.ItemEvent; +import java.io.File; +import java.util.Objects; + +import static org.ohdsi.whiterabbit.WhiteRabbitMain.LABEL_TEST_CONNECTION; + +public class LocationsPanel extends JPanel { + + static Logger logger = LoggerFactory.getLogger(LocationsPanel.class); + + public static final String LABEL_LOCATIONS = "Locations"; + public static final String LABEL_SERVER_LOCATION = "Server location"; + public static final String NAME_SERVER_LOCATION = "ServerLocation"; + public static final String LABEL_USER_NAME = "User name"; + public static final String NAME_USER_NAME = "UserName"; + public static final String LABEL_PASSWORD = "Password"; + public static final String NAME_PASSWORD = "PasswordName"; + public static final String LABEL_DATABASE_NAME = "Database name"; + public static final String NAME_DATABASE_NAME = "DatabaseName"; + public static final String LABEL_DELIMITER = "Delimiter"; + public static final String NAME_DELIMITER = "DelimiterName"; + + public static final String TOOLTIP_POSTGRESQL_SERVER = "For PostgreSQL servers this field contains the host name and database name (/)"; + + private final JFrame parentFrame; + private JTextField folderField; + private JComboBox sourceType; + private JTextField sourceDelimiterField; + private JTextField sourceServerField; + private JTextField sourceUserField; + private JTextField sourcePasswordField; + private JTextField sourceDatabaseField; + private DbType currentDbType = null; + + + private SourcePanel sourcePanel; + private boolean sourceIsFiles = true; + private boolean sourceIsSas = false; + + private final transient PanelsManager panelsManager; + + public LocationsPanel(JFrame parentFrame, PanelsManager panelsManager) { + super(); + this.parentFrame = parentFrame; + this.panelsManager = panelsManager; + this.createLocationsPanel(); + } + + private void createLocationsPanel() { + JPanel panel = this; + panel.setName(LABEL_LOCATIONS); + + panel.setLayout(new GridBagLayout()); + GridBagConstraints c = new GridBagConstraints(); + c.fill = GridBagConstraints.BOTH; + c.weightx = 0.5; + c.weighty = 0.8; + + JPanel folderPanel = new JPanel(); + folderPanel.setLayout(new BoxLayout(folderPanel, BoxLayout.X_AXIS)); + folderPanel.setBorder(BorderFactory.createTitledBorder("Working folder")); + folderField = new JTextField(); + folderField.setName("FolderField"); + folderField.setText((new File("").getAbsolutePath())); + folderField.setToolTipText("The folder where all output will be written"); + folderPanel.add(folderField); + JButton pickButton = new JButton("Pick folder"); + pickButton.setToolTipText("Pick a different working folder"); + folderPanel.add(pickButton); + pickButton.addActionListener(e -> pickFolder()); + panelsManager.getComponentsToDisableWhenRunning().add(pickButton); + c.gridx = 0; + c.gridy = 0; + c.gridwidth = 1; + panel.add(folderPanel, c); + + + c.gridx = 0; + c.gridy = 1; + c.gridwidth = 1; + this.sourcePanel = createSourcePanel(); + + // make sure the sourcePanel has usable content by default + createDatabaseFields(DbType.DELIMITED_TEXT_FILES.label()); + sourceType.setSelectedItem(DbType.DELIMITED_TEXT_FILES.label()); + + panel.add(this.sourcePanel, c); + + JPanel testConnectionButtonPanel = new JPanel(); + testConnectionButtonPanel.setLayout(new BoxLayout(testConnectionButtonPanel, BoxLayout.X_AXIS)); + testConnectionButtonPanel.add(Box.createHorizontalGlue()); + + JButton testConnectionButton = new JButton(LABEL_TEST_CONNECTION); + testConnectionButton.setName(LABEL_TEST_CONNECTION); + testConnectionButton.setBackground(new Color(151, 220, 141)); + testConnectionButton.setToolTipText("Test the connection"); + testConnectionButton.addActionListener(e -> this.runConnectionTest()); + panelsManager.getComponentsToDisableWhenRunning().add(testConnectionButton); + testConnectionButtonPanel.add(testConnectionButton); + + c.gridx = 0; + c.gridy = 2; + c.gridwidth = 1; + panel.add(testConnectionButtonPanel, c); + } + + private void runConnectionTest() { + panelsManager.runConnectionTest(); + + } + + private void createDatabaseFields(ItemEvent itemEvent) { + String selectedSourceType = itemEvent.getItem().toString(); + + // remove existing DB related fields in sourcePanel + sourcePanel.clear(); + + currentDbType = DbType.getDbType(selectedSourceType); + if (currentDbType.supportsStorageHandler()) { + createDatabaseFields(); + } else { + createDatabaseFields(selectedSourceType); + } + if (panelsManager.getAddAllButton() != null) { + panelsManager.getAddAllButton().setEnabled(sourceIsDatabase(selectedSourceType)); + } + this.revalidate(); + } + + @FunctionalInterface + public interface SimpleDocumentListener extends DocumentListener { + void update(DocumentEvent e); + + @Override + default void insertUpdate(DocumentEvent e) { + update(e); + } + @Override + default void removeUpdate(DocumentEvent e) { + update(e); + } + @Override + default void changedUpdate(DocumentEvent e) { + update(e); + } + } + + private void createDatabaseFields() { + DBConfiguration currentConfiguration = this.currentDbType.getStorageHandler().getDBConfiguration(); + currentConfiguration.getFields().forEach(f -> { + sourcePanel.addReplacable(new JLabel(f.label)); + JTextField field = new JTextField(f.getValueOrDefault()); + field.setName(f.name); + field.setToolTipText(f.toolTip); + sourcePanel.addReplacable(field); + field.setEnabled(true); + field.getDocument().addDocumentListener((SimpleDocumentListener) e -> { + f.setValue(field.getText()); + }); + }); + } + + private boolean sourceIsFiles(String sourceType) { + return sourceType.equalsIgnoreCase(DbType.DELIMITED_TEXT_FILES.label()); + } + + private boolean sourceIsSas(String sourceType) { + return sourceType.equalsIgnoreCase(DbType.SAS7BDAT.label()); + } + + private boolean sourceIsDatabase(String sourceType) { + return (!sourceIsFiles(sourceType) && !sourceIsSas(sourceType)); + } + + private void createDatabaseFields(String selectedSourceType) { + sourceIsFiles = sourceIsFiles(selectedSourceType); + sourceIsSas = sourceIsSas(selectedSourceType); + boolean sourceIsDatabase = sourceIsDatabase(selectedSourceType); + + sourcePanel.addReplacable(new JLabel(LABEL_SERVER_LOCATION)); + sourceServerField = new JTextField("127.0.0.1"); + sourceServerField.setName(LABEL_SERVER_LOCATION); + sourceServerField.setEnabled(false); + sourcePanel.addReplacable(sourceServerField); + sourcePanel.addReplacable(new JLabel(LABEL_USER_NAME)); + sourceUserField = new JTextField(""); + sourceUserField.setName(LABEL_USER_NAME); + sourceUserField.setEnabled(false); + sourcePanel.addReplacable(sourceUserField); + sourcePanel.addReplacable(new JLabel(LABEL_PASSWORD)); + sourcePasswordField = new JPasswordField(""); + sourcePasswordField.setName(LABEL_PASSWORD); + sourcePasswordField.setEnabled(false); + sourcePanel.addReplacable(sourcePasswordField); + sourcePanel.addReplacable(new JLabel(LABEL_DATABASE_NAME)); + sourceDatabaseField = new JTextField(""); + sourceDatabaseField.setName(LABEL_DATABASE_NAME); + sourceDatabaseField.setEnabled(false); + sourcePanel.addReplacable(sourceDatabaseField); + + sourcePanel.addReplacable(new JLabel(LABEL_DELIMITER)); + JTextField delimiterField = new JTextField(","); + delimiterField.setName(NAME_DELIMITER); + sourceDelimiterField = delimiterField; + sourceDelimiterField.setToolTipText("The delimiter that separates values. Enter 'tab' for tab."); + sourcePanel.addReplacable(sourceDelimiterField); + sourceServerField.setEnabled(sourceIsDatabase); + sourceUserField.setEnabled(sourceIsDatabase); + sourcePasswordField.setEnabled(sourceIsDatabase); + sourceDatabaseField.setEnabled(sourceIsDatabase && !selectedSourceType.equals(DbType.AZURE.label())); + sourceDelimiterField.setEnabled(sourceIsFiles); + + if (sourceIsDatabase) { + if (selectedSourceType.equals(DbType.ORACLE.label())) { + sourceServerField.setToolTipText("For Oracle servers this field contains the SID, servicename, and optionally the port: '/', ':/', '/', or ':/'"); + sourceUserField.setToolTipText("For Oracle servers this field contains the name of the user used to log in"); + sourcePasswordField.setToolTipText("For Oracle servers this field contains the password corresponding to the user"); + sourceDatabaseField.setToolTipText("For Oracle servers this field contains the schema (i.e. 'user' in Oracle terms) containing the source tables"); + } else if (selectedSourceType.equals(DbType.POSTGRESQL.label())) { + sourceServerField.setToolTipText(TOOLTIP_POSTGRESQL_SERVER); + sourceUserField.setToolTipText("The user used to log in to the server"); + sourcePasswordField.setToolTipText("The password used to log in to the server"); + sourceDatabaseField.setToolTipText("For PostgreSQL servers this field contains the schema containing the source tables"); + } else if (selectedSourceType.equals(DbType.BIGQUERY.label())) { + sourceServerField.setToolTipText("GBQ SA & UA: ProjectID"); + sourceUserField.setToolTipText("GBQ SA only: OAuthServiceAccountEMAIL"); + sourcePasswordField.setToolTipText("GBQ SA only: OAuthPvtKeyPath"); + sourceDatabaseField.setToolTipText("GBQ SA & UA: Data Set within ProjectID"); + } else { + if (selectedSourceType.equals(DbType.AZURE.label())) { + sourceServerField.setToolTipText("For Azure, this field contains the host name and database name (;database=)"); + } else { + sourceServerField.setToolTipText("This field contains the name or IP address of the database server"); + } + if (selectedSourceType.equals(DbType.SQL_SERVER.label())) { + sourceUserField.setToolTipText("The user used to log in to the server. Optionally, the domain can be specified as / (e.g. 'MyDomain/Joe')"); + } else { + sourceUserField.setToolTipText("The user used to log in to the server"); + } + sourcePasswordField.setToolTipText("The password used to log in to the server"); + if (selectedSourceType.equals(DbType.AZURE.label())) { + sourceDatabaseField.setToolTipText("For Azure, leave this empty"); + } else { + sourceDatabaseField.setToolTipText("The name of the database containing the source tables"); + } + } + } + } + + private SourcePanel createSourcePanel() { + SourcePanel sourcePanel = new SourcePanel(); + sourcePanel.setLayout(new GridLayout(0, 2)); + sourcePanel.setBorder(BorderFactory.createTitledBorder("Source data location")); + sourcePanel.add(new JLabel("Data type")); + sourceType = new JComboBox<>(DbType.pickList()); + sourceType.setName("SourceType"); + sourceType.setToolTipText("Select the type of source data available"); + sourceType.addItemListener(event -> { + if (event.getStateChange() == ItemEvent.SELECTED) { + createDatabaseFields(event); + } + }); + sourcePanel.add(sourceType); + + return sourcePanel; + } + + public JTextField getFolderField() { + return folderField; + } + + public String getSelectedSourceType() { + return Objects.requireNonNull(sourceType.getSelectedItem()).toString(); + } + + public JTextField getSourceDelimiterField() { + return sourceDelimiterField; + } + + public boolean sourceIsFiles() { + return sourceIsFiles; + } + + public boolean sourceIsSas() { + return sourceIsSas; + } + + public String getSourceServerField() { + return sourceServerField.getText(); + } + + public String getSourceUserField() { + return sourceUserField.getText(); + } + + public String getSourcePasswordField() { + return sourcePasswordField.getText(); + } + public String getSourceDatabaseField() { + return sourceDatabaseField.getText(); + } + + public boolean isSourceDatabaseFieldEnabled() { + return sourceDatabaseField.isEnabled(); + } + + public DbType getCurrentDbChoice() { + return this.currentDbType; + } + + private void pickFolder() { + JFileChooser fileChooser = new JFileChooser(new File(folderField.getText())); + fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); + int returnVal = fileChooser.showDialog(parentFrame, "Select folder"); + if (returnVal == JFileChooser.APPROVE_OPTION) { + File selectedDirectory = fileChooser.getSelectedFile(); + if (!selectedDirectory.exists()) { + // When no directory is selected when approving, FileChooser incorrectly appends the current directory to the path. + // Take the opened directory instead. + selectedDirectory = fileChooser.getCurrentDirectory(); + } + folderField.setText(selectedDirectory.getAbsolutePath()); + } + } +} diff --git a/whiterabbit/src/main/java/org/ohdsi/whiterabbit/gui/SourcePanel.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/gui/SourcePanel.java new file mode 100644 index 00000000..4dd565b5 --- /dev/null +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/gui/SourcePanel.java @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.gui; + +import javax.swing.*; +import java.util.*; + +public class SourcePanel extends JPanel { + private List clearableComponents = new ArrayList<>(); + + public void addReplacable(JComponent component) { + + this.add(component); + clearableComponents.add(component); + } + + public void clear() { + // remove the components in the reverse order of how they were added, keeps the layout of the JPanel intact + Collections.reverse(clearableComponents); + clearableComponents.forEach(this::remove); + clearableComponents.clear(); + } + +} diff --git a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/scan/SourceDataScan.java similarity index 59% rename from whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java rename to whiterabbit/src/main/java/org/ohdsi/whiterabbit/scan/SourceDataScan.java index 38e64ba3..11e88715 100644 --- a/whiterabbit/src/main/java/org/ohdsi/whiteRabbit/scan/SourceDataScan.java +++ b/whiterabbit/src/main/java/org/ohdsi/whiterabbit/scan/SourceDataScan.java @@ -1,30 +1,26 @@ /******************************************************************************* * Copyright 2019 Observational Health Data Sciences and Informatics - * + *

* This file is part of WhiteRabbit - * + *

* Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + *

* http://www.apache.org/licenses/LICENSE-2.0 - * + *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ -package org.ohdsi.whiteRabbit.scan; +package org.ohdsi.whiterabbit.scan; import java.io.*; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.rmi.RemoteException; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.time.LocalDate; import java.time.LocalDateTime; import java.util.*; import java.util.stream.Collectors; @@ -38,34 +34,30 @@ import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.xssf.streaming.SXSSFWorkbook; import org.apache.commons.io.FileUtils; -import org.ohdsi.databases.DbType; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; import org.ohdsi.databases.RichConnection; -import org.ohdsi.databases.RichConnection.QueryResult; +import org.ohdsi.databases.QueryResult; +import org.ohdsi.databases.*; import org.ohdsi.rabbitInAHat.dataModel.Table; import org.ohdsi.utilities.*; -import org.ohdsi.utilities.collections.CountingSet; -import org.ohdsi.utilities.collections.CountingSet.Count; import org.ohdsi.utilities.collections.Pair; import org.ohdsi.utilities.files.ReadTextFile; -import org.ohdsi.whiteRabbit.DbSettings; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static java.lang.Long.max; -public class SourceDataScan { - - public static int MAX_VALUES_IN_MEMORY = 100000; - public static int MIN_CELL_COUNT_FOR_CSV = 1000000; - public static int N_FOR_FREE_TEXT_CHECK = 1000; - public static int MIN_AVERAGE_LENGTH_FOR_FREE_TEXT = 100; - +public class SourceDataScan implements ScanParameters { + static Logger logger = LoggerFactory.getLogger(SourceDataScan.class); public final static String SCAN_REPORT_FILE_NAME = "ScanReport.xlsx"; public static final String POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME = "ORG_OHDSI_WHITERABBIT_POI_TMPDIR"; public static final String POI_TMP_DIR_PROPERTY_NAME = "org.ohdsi.whiterabbit.poi.tmpdir"; - private SXSSFWorkbook workbook; + private XSSFWorkbook workbook; private char delimiter = ','; private int sampleSize; private boolean scanValues = false; @@ -75,7 +67,6 @@ public class SourceDataScan { private int maxValues; private DbSettings.SourceType sourceType; private DbType dbType; - private String database; private Map> tableToFieldInfos; private Map indexedTableNameLookup; @@ -96,6 +87,22 @@ public void setSampleSize(int sampleSize) { this.sampleSize = sampleSize; } + public boolean doCalculateNumericStats() { + return calculateNumericStats; + } + + public int getMaxValues() { + return maxValues; + } + + public boolean doScanValues() { + return scanValues; + } + + public int getNumStatsSamplerSize() { + return numStatsSamplerSize; + } + public void setScanValues(boolean scanValues) { this.scanValues = scanValues; } @@ -104,6 +111,14 @@ public void setMinCellCount(int minCellCount) { this.minCellCount = minCellCount; } + public int getMinCellCount() { + return minCellCount; + } + + public int getSampleSize() { + return sampleSize; + } + public void setMaxValues(int maxValues) { this.maxValues = maxValues; } @@ -116,11 +131,10 @@ public void setNumStatsSamplerSize(int numStatsSamplerSize) { this.numStatsSamplerSize = numStatsSamplerSize; } - public void process(DbSettings dbSettings, String outputFileName) { + public void process(DbSettings dbSettings, String outputFileName) throws IOException { startTimeStamp = LocalDateTime.now(); sourceType = dbSettings.sourceType; dbType = dbSettings.dbType; - database = dbSettings.database; tableToFieldInfos = new HashMap<>(); StringUtilities.outputWithTime("Started new scan of " + dbSettings.tables.size() + " tables..."); @@ -181,7 +195,7 @@ private static Path setupTmpDir(Path tmpDir) { private static void checkWritableTmpDir(String dir) { if (isNotWritable(Paths.get(dir))) { String message = String.format("Directory %s is not writable! (used for tmp files for Apache POI)", dir); - System.out.println(message); + logger.warn(message); throw new RuntimeException(message); } } @@ -214,15 +228,14 @@ private void processDatabase(DbSettings dbSettings) { if (dbSettings.dbType == DbType.BIGQUERY) { dbSettings.domain = dbSettings.database; } - - try (RichConnection connection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + try (RichConnection connection = new RichConnection(dbSettings)) { connection.setVerbose(false); connection.use(dbSettings.database); tableToFieldInfos = dbSettings.tables.stream() .collect(Collectors.toMap( Table::new, - table -> processDatabaseTable(table, connection) + table -> processDatabaseTable(table, connection, dbSettings.database) )); } } @@ -257,11 +270,11 @@ private void processSasFiles(DbSettings dbSettings) { } } - private void generateReport(String filename) { + private void generateReport(String filename) throws IOException { StringUtilities.outputWithTime("Generating scan report"); removeEmptyTables(); - workbook = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk + workbook = new XSSFWorkbook(); int i = 0; indexedTableNameLookup = new HashMap<>(); @@ -465,8 +478,7 @@ private void createMetaSheet() { addRow(metaSheet, "N_FOR_FREE_TEXT_CHECK", SourceDataScan.N_FOR_FREE_TEXT_CHECK); addRow(metaSheet, "MIN_AVERAGE_LENGTH_FOR_FREE_TEXT", SourceDataScan.MIN_AVERAGE_LENGTH_FOR_FREE_TEXT); addRow(metaSheet, "sourceType", this.sourceType.toString()); - addRow(metaSheet, "dbType", this.dbType != null ? this.dbType.getTypeName() : ""); -// addRow(metaSheet, "database", this.database); + addRow(metaSheet, "dbType", this.dbType != null ? this.dbType.name() : ""); addRow(metaSheet, "delimiter", this.delimiter); addRow(metaSheet, "sampleSize", this.sampleSize); addRow(metaSheet, "scanValues", this.scanValues); @@ -479,33 +491,38 @@ private void createMetaSheet() { private void removeEmptyTables() { tableToFieldInfos.entrySet() - .removeIf(stringListEntry -> stringListEntry.getValue().size() == 0); + .removeIf(stringListEntry -> stringListEntry.getValue().isEmpty()); } - private List processDatabaseTable(String table, RichConnection connection) { + private List processDatabaseTable(String table, RichConnection connection, String database) { StringUtilities.outputWithTime("Scanning table " + table); - long rowCount = connection.getTableSize(table); - List fieldInfos = fetchTableStructure(connection, table); + long rowCount; + if (connection.getConnection().hasStorageHandler()) { + rowCount = connection.getConnection().getStorageHandler().getTableSize(table); + } else { + rowCount = connection.getTableSize(table); + } + List fieldInfos = connection.fetchTableStructure(connection, database, table, this); if (scanValues) { int actualCount = 0; QueryResult queryResult = null; try { - queryResult = fetchRowsFromTable(connection, table, rowCount); + queryResult = connection.fetchRowsFromTable(table, rowCount, this); for (org.ohdsi.utilities.files.Row row : queryResult) { for (FieldInfo fieldInfo : fieldInfos) { fieldInfo.processValue(row.get(fieldInfo.name)); } actualCount++; if (sampleSize != -1 && actualCount >= sampleSize) { - System.out.println("Stopped after " + actualCount + " rows"); + logger.info("Stopped after {} rows", actualCount); break; } } for (FieldInfo fieldInfo : fieldInfos) fieldInfo.trim(); } catch (Exception e) { - System.out.println("Error: " + e.getMessage()); + logger.error(e.getMessage(), e); } finally { if (queryResult != null) { queryResult.close(); @@ -516,105 +533,6 @@ private List processDatabaseTable(String table, RichConnection connec return fieldInfos; } - private QueryResult fetchRowsFromTable(RichConnection connection, String table, long rowCount) { - String query = null; - - if (sampleSize == -1) { - if (dbType == DbType.MSACCESS) - query = "SELECT * FROM [" + table + "]"; - else if (dbType == DbType.MSSQL || dbType == DbType.PDW || dbType == DbType.AZURE) - query = "SELECT * FROM [" + table.replaceAll("\\.", "].[") + "]"; - else - query = "SELECT * FROM " + table; - } else { - if (dbType == DbType.MSSQL || dbType == DbType.AZURE) - query = "SELECT * FROM [" + table.replaceAll("\\.", "].[") + "] TABLESAMPLE (" + sampleSize + " ROWS)"; - else if (dbType == DbType.MYSQL) - query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize; - else if (dbType == DbType.PDW) - query = "SELECT TOP " + sampleSize + " * FROM [" + table.replaceAll("\\.", "].[") + "] ORDER BY RAND()"; - else if (dbType == DbType.ORACLE) { - if (sampleSize < rowCount) { - double percentage = 100 * sampleSize / (double) rowCount; - if (percentage < 100) - query = "SELECT * FROM " + table + " SAMPLE(" + percentage + ")"; - } else { - query = "SELECT * FROM " + table; - } - } else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) - query = "SELECT * FROM " + table + " ORDER BY RANDOM() LIMIT " + sampleSize; - else if (dbType == DbType.MSACCESS) - query = "SELECT " + "TOP " + sampleSize + " * FROM [" + table + "]"; - else if (dbType == DbType.BIGQUERY) - query = "SELECT * FROM " + table + " ORDER BY RAND() LIMIT " + sampleSize; - } - // System.out.println("SQL: " + query); - return connection.query(query); - - } - - private List fetchTableStructure(RichConnection connection, String table) { - List fieldInfos = new ArrayList<>(); - - if (dbType == DbType.MSACCESS) { - ResultSet rs = connection.getMsAccessFieldNames(table); - try { - while (rs.next()) { - FieldInfo fieldInfo = new FieldInfo(rs.getString("COLUMN_NAME")); - fieldInfo.type = rs.getString("TYPE_NAME"); - fieldInfo.rowCount = connection.getTableSize(table); - fieldInfos.add(fieldInfo); - } - } catch (SQLException e) { - throw new RuntimeException(e.getMessage()); - } - } else { - String query = null; - if (dbType == DbType.ORACLE) - query = "SELECT COLUMN_NAME,DATA_TYPE FROM ALL_TAB_COLUMNS WHERE table_name = '" + table + "' AND owner = '" + database.toUpperCase() + "'"; - else if (dbType == DbType.MSSQL || dbType == DbType.PDW) { - String trimmedDatabase = database; - if (database.startsWith("[") && database.endsWith("]")) - trimmedDatabase = database.substring(1, database.length() - 1); - String[] parts = table.split("\\."); - query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_CATALOG='" + trimmedDatabase + "' AND TABLE_SCHEMA='" + parts[0] + - "' AND TABLE_NAME='" + parts[1] + "';"; - } else if (dbType == DbType.AZURE) { - String[] parts = table.split("\\."); - query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA='" + parts[0] + - "' AND TABLE_NAME='" + parts[1] + "';"; - } else if (dbType == DbType.MYSQL) - query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database + "' AND TABLE_NAME = '" + table - + "';"; - else if (dbType == DbType.POSTGRESQL || dbType == DbType.REDSHIFT) - query = "SELECT COLUMN_NAME,DATA_TYPE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '" + database.toLowerCase() + "' AND TABLE_NAME = '" - + table.toLowerCase() + "' ORDER BY ordinal_position;"; - else if (dbType == DbType.TERADATA) { - query = "SELECT ColumnName, ColumnType FROM dbc.columns WHERE DatabaseName= '" + database.toLowerCase() + "' AND TableName = '" - + table.toLowerCase() + "';"; - } else if (dbType == DbType.BIGQUERY) { - query = "SELECT column_name AS COLUMN_NAME, data_type as DATA_TYPE FROM " + database + ".INFORMATION_SCHEMA.COLUMNS WHERE table_name = \"" + table + "\";"; - } - - for (org.ohdsi.utilities.files.Row row : connection.query(query)) { - row.upperCaseFieldNames(); - FieldInfo fieldInfo; - if (dbType == DbType.TERADATA) { - fieldInfo = new FieldInfo(row.get("COLUMNNAME")); - } else { - fieldInfo = new FieldInfo(row.get("COLUMN_NAME")); - } - if (dbType == DbType.TERADATA) { - fieldInfo.type = row.get("COLUMNTYPE"); - } else { - fieldInfo.type = row.get("DATA_TYPE"); - } - fieldInfo.rowCount = connection.getTableSize(table); - fieldInfos.add(fieldInfo); - } - } - return fieldInfos; - } private List processCsvFile(String filename) { StringUtilities.outputWithTime("Scanning table " + filename); @@ -633,7 +551,7 @@ private List processCsvFile(String filename) { if (lineNr == 1) { for (String cell : row) { - fieldInfos.add(new FieldInfo(cell)); + fieldInfos.add(new FieldInfo(this, cell)); } if (!scanValues) { @@ -660,7 +578,7 @@ private List processSasFile(SasFileReader sasFileReader) throws IOExc SasFileProperties sasFileProperties = sasFileReader.getSasFileProperties(); for (Column column : sasFileReader.getColumns()) { - FieldInfo fieldInfo = new FieldInfo(column.getName()); + FieldInfo fieldInfo = new FieldInfo(this, column.getName()); fieldInfo.label = column.getLabel(); fieldInfo.rowCount = sasFileProperties.getRowCount(); if (!scanValues) { @@ -698,229 +616,6 @@ private List processSasFile(SasFileReader sasFileReader) throws IOExc return fieldInfos; } - private class FieldInfo { - public String type; - public String name; - public String label; - public CountingSet valueCounts = new CountingSet<>(); - public long sumLength = 0; - public int maxLength = 0; - public long nProcessed = 0; - public long emptyCount = 0; - public long uniqueCount = 0; - public long rowCount = -1; - public boolean isInteger = true; - public boolean isReal = true; - public boolean isDate = true; - public boolean isFreeText = false; - public boolean tooManyValues = false; - public UniformSamplingReservoir samplingReservoir; - public Object average; - public Object stdev; - public Object minimum; - public Object maximum; - public Object q1; - public Object q2; - public Object q3; - - public FieldInfo(String name) { - this.name = name; - if (calculateNumericStats) { - this.samplingReservoir = new UniformSamplingReservoir(numStatsSamplerSize); - } - } - - public void trim() { - // Only keep values that are used in scan report - if (valueCounts.size() > maxValues) { - valueCounts.keepTopN(maxValues); - } - - // Calculate numeric stats and dereference sampling reservoir to save memory. - if (calculateNumericStats) { - average = getAverage(); - stdev = getStandardDeviation(); - minimum = getMinimum(); - maximum = getMaximum(); - q1 = getQ1(); - q2 = getQ2(); - q3 = getQ3(); - } - samplingReservoir = null; - } - - public boolean hasValuesTrimmed() { - return tooManyValues; - } - - public Double getFractionEmpty() { - if (nProcessed == 0) - return 1d; - else - return emptyCount / (double) nProcessed; - } - - public String getTypeDescription() { - if (type != null) - return type; - else if (!scanValues) // If not type assigned and not values scanned, do not derive - return ""; - else if (nProcessed == emptyCount) - return DataType.EMPTY.name(); - else if (isFreeText) - return DataType.TEXT.name(); - else if (isDate) - return DataType.DATE.name(); - else if (isInteger) - return DataType.INT.name(); - else if (isReal) - return DataType.REAL.name(); - else - return DataType.VARCHAR.name(); - } - - public Double getFractionUnique() { - if (nProcessed == 0 || uniqueCount == 1) { - return 0d; - } else { - return uniqueCount / (double) nProcessed; - } - - } - - public void processValue(String value) { - nProcessed++; - sumLength += value.length(); - if (value.length() > maxLength) - maxLength = value.length(); - - String trimValue = value.trim(); - if (trimValue.length() == 0) - emptyCount++; - - if (!isFreeText) { - boolean newlyAdded = valueCounts.add(value); - if (newlyAdded) uniqueCount++; - - if (trimValue.length() != 0) { - evaluateDataType(trimValue); - } - - if (nProcessed == N_FOR_FREE_TEXT_CHECK && !isInteger && !isReal && !isDate) { - doFreeTextCheck(); - } - } else { - valueCounts.addAll(StringUtilities.mapToWords(trimValue.toLowerCase())); - } - - // if over this large constant number, then trimmed back to size used in report (maxValues). - if (!tooManyValues && valueCounts.size() > MAX_VALUES_IN_MEMORY) { - tooManyValues = true; - this.trim(); - } - - if (calculateNumericStats && !trimValue.isEmpty()) { - if (isInteger || isReal) { - samplingReservoir.add(Double.parseDouble(trimValue)); - } else if (isDate) { - samplingReservoir.add(DateUtilities.parseDate(trimValue)); - } - } - } - - public List> getSortedValuesWithoutSmallValues() { - List> result = valueCounts.key2count.entrySet().stream() - .filter(e -> e.getValue().count >= minCellCount) - .sorted(Comparator.>comparingInt(e -> e.getValue().count).reversed()) - .limit(maxValues) - .map(e -> new Pair<>(e.getKey(), e.getValue().count)) - .collect(Collectors.toCollection(ArrayList::new)); - - if (result.size() < valueCounts.key2count.size()) { - result.add(new Pair<>("List truncated...", -1)); - } - return result; - } - - private void evaluateDataType(String value) { - if (isReal && !StringUtilities.isNumber(value)) - isReal = false; - if (isInteger && !StringUtilities.isLong(value)) - isInteger = false; - if (isDate && !StringUtilities.isDate(value)) - isDate = false; - } - - private void doFreeTextCheck() { - double averageLength = sumLength / (double) (nProcessed - emptyCount); - if (averageLength >= MIN_AVERAGE_LENGTH_FOR_FREE_TEXT) { - isFreeText = true; - // Reset value count to word count - CountingSet wordCounts = new CountingSet<>(); - for (Map.Entry entry : valueCounts.key2count.entrySet()) - for (String word : StringUtilities.mapToWords(entry.getKey().toLowerCase())) - wordCounts.add(word, entry.getValue().count); - valueCounts = wordCounts; - } - } - - private Object formatNumericValue(double value) { - return formatNumericValue(value, false); - } - - private Object formatNumericValue(double value, boolean dateAsDays) { - if (nProcessed == 0) { - return Double.NaN; - } else if (getTypeDescription().equals(DataType.EMPTY.name())) { - return Double.NaN; - } else if (isInteger || isReal) { - return value; - } else if (isDate && dateAsDays) { - return value; - } else if (isDate) { - return LocalDate.ofEpochDay((long) value).toString(); - } else { - return Double.NaN; - } - } - - private Object getMinimum() { - double min = samplingReservoir.getPopulationMinimum(); - return formatNumericValue(min); - } - - private Object getMaximum() { - double max = samplingReservoir.getPopulationMaximum(); - return formatNumericValue(max); - } - - private Object getAverage() { - double average = samplingReservoir.getPopulationMean(); - return formatNumericValue(average); - } - - private Object getStandardDeviation() { - double stddev = samplingReservoir.getSampleStandardDeviation(); - return formatNumericValue(stddev, true); - } - - private Object getQ1() { - double q1 = samplingReservoir.getSampleQuartiles().get(0); - return formatNumericValue(q1); - } - - private Object getQ2() { - double q2 = samplingReservoir.getSampleQuartiles().get(1); - return formatNumericValue(q2); - } - - private Object getQ3() { - double q3 = samplingReservoir.getSampleQuartiles().get(2); - return formatNumericValue(q3); - } - - } - private Row addRow(Sheet sheet, Object... values) { Row row = sheet.createRow(sheet.getPhysicalNumberOfRows()); for (Object value : values) { diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit.ico b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit.ico similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit.ico rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit.ico diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit128.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit128.png similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit128.png rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit128.png diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit16.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit16.png similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit16.png rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit16.png diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit256.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit256.png similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit256.png rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit256.png diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit32.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit32.png similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit32.png rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit32.png diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit48.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit48.png similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit48.png rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit48.png diff --git a/whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit64.png b/whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit64.png similarity index 100% rename from whiterabbit/src/main/resources/org/ohdsi/whiteRabbit/WhiteRabbit64.png rename to whiterabbit/src/main/resources/org/ohdsi/whiterabbit/WhiteRabbit64.png diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/GUITestExtension.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/GUITestExtension.java new file mode 100644 index 00000000..12553f70 --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/GUITestExtension.java @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import org.assertj.swing.junit.runner.FailureScreenshotTaker; +import org.assertj.swing.junit.runner.ImageFolderCreator; +import org.junit.jupiter.api.extension.Extension; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.jupiter.api.extension.InvocationInterceptor; +import org.junit.jupiter.api.extension.ReflectiveInvocationContext; + +import java.lang.reflect.Method; + +import static org.assertj.swing.annotation.GUITestFinder.isGUITest; +import static org.assertj.swing.junit.runner.Formatter.testNameFrom; + +/** + * Understands a JUnit 5 extension that takes a screenshot of a failed GUI test. + * The Junit 4 runner is available in {@link org.assertj.swing.junit.runner.GUITestRunner}. + * + * @see assertj-swing #259 + * @author William Bakker + */ +public class GUITestExtension implements Extension, InvocationInterceptor { + //private final FailureScreenshotTaker screenshotTaker; + + public GUITestExtension() { + //screenshotTaker = new FailureScreenshotTaker(new ImageFolderCreator().createImageFolder()); + } + + @Override + public void interceptTestMethod( + Invocation invocation, + ReflectiveInvocationContext invocationContext, + ExtensionContext extensionContext) + throws Throwable { + try { + invocation.proceed(); + } catch (Throwable t) { + //takeScreenshot(invocationContext.getExecutable()); + throw t; + } + } + + private void takeScreenshot(Method method) { + final Class testClass = method.getDeclaringClass(); + if (!(isGUITest(testClass, method))) + return; + //screenshotTaker.saveScreenshot(testNameFrom(testClass, method)); + } +} \ No newline at end of file diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java index 26776c48..19bf8b3b 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/ScanTestUtils.java @@ -1,127 +1,177 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.whiterabbit.scan; -import org.ohdsi.databases.DbType; -import org.ohdsi.databases.RichConnection; -import org.ohdsi.ooxml.ReadXlsxFileWithHeader; -import org.ohdsi.utilities.files.Row; -import org.ohdsi.utilities.files.RowUtilities; -import org.ohdsi.whiteRabbit.DbSettings; -import org.testcontainers.containers.PostgreSQLContainer; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.assertj.swing.timing.Condition; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.whiterabbit.Console; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; +import java.util.*; +import java.util.stream.IntStream; +import static org.assertj.swing.timing.Pause.pause; +import static org.assertj.swing.timing.Timeout.timeout; import static org.junit.jupiter.api.Assertions.*; +import static org.ohdsi.databases.configuration.DbType.*; public class ScanTestUtils { - public static void verifyScanResultsFromXSLX(Path results, DbType dbType) { - assertTrue(Files.exists(results)); - FileInputStream file = null; - try { - file = new FileInputStream(new File(results.toUri())); - } catch (FileNotFoundException e) { - throw new RuntimeException(String.format("File %s was expected to be found, but does not exist.", results), e); - } - - ReadXlsxFileWithHeader sheet = new ReadXlsxFileWithHeader(file); + // Convenience for having the same scan parameters across tests + public static SourceDataScan createSourceDataScan() { + SourceDataScan sourceDataScan = new SourceDataScan(); + sourceDataScan.setMinCellCount(5); + sourceDataScan.setScanValues(true); + sourceDataScan.setMaxValues(1000); + sourceDataScan.setNumStatsSamplerSize(500); + sourceDataScan.setCalculateNumericStats(false); + sourceDataScan.setSampleSize(100000); + + return sourceDataScan; + } - List data = new ArrayList<>(); - int i = 0; - for (Row row : sheet) { - data.add(row); - i++; - } + public static boolean scanResultsSheetMatchesReference(Path scanResults, Path referenceResults, DbType dbType) throws IOException { + Map>> scanSheets = readXlsxAsStringValues(scanResults); + Map>> referenceSheets = readXlsxAsStringValues(referenceResults); - // apparently the order of rows in the generated xslx table is not fixed, - // so they need to be sorted to be able to verify their contents - RowUtilities.sort(data, "Table", "Field"); - assertEquals(42, i); - - // since the table is generated with empty lines between the different tables of the source database, - // a number of empty lines is expected. Verify this, and the first non-empty line - expectRowNIsLike(0, data, dbType, "", "", "", "", "", ""); - expectRowNIsLike(1, data, dbType, "", "", "", "", "", ""); - expectRowNIsLike(2, data, dbType, "cost", "amount_allowed", "", "numeric", "0", "34"); - - // sample some other rows in the available range - expectRowNIsLike(9, data,dbType, "cost", "drg_source_value", "", "character varying", "0", "34"); - expectRowNIsLike(23, data,dbType, "cost", "total_paid", "", "numeric", "0", "34"); - expectRowNIsLike(24, data,dbType, "person", "birth_datetime", "", "timestamp without time zone", "0", "30"); - expectRowNIsLike(41, data,dbType, "person", "year_of_birth", "", "integer", "0", "30"); + return scanValuesMatchReferenceValues(scanSheets, referenceSheets, dbType); } - private static void expectRowNIsLike(int n, List rows, DbType dbType, String... expectedValues) { - assert expectedValues.length == 6; - testColumnValue(n, rows.get(n), "Table", expectedValues[0]); - testColumnValue(n, rows.get(n), "Field", expectedValues[1]); - testColumnValue(n, rows.get(n), "Description", expectedValues[2]); - testColumnValue(n, rows.get(n), "Type", expectedTypeValue(expectedValues[3], dbType)); - testColumnValue(n, rows.get(n), "Max length", expectedValues[4]); - testColumnValue(n, rows.get(n), "N rows", expectedValues[5]); - } + public static boolean isScanReportGeneratedAndMatchesReference(Console console, Path expectedPath, Path referencePath, DbType dbType) throws IOException { + assertNotNull(console); + // wait for the "Scan report generated:" message in the Console text area + pause(new Condition("Label Timeout") { + public boolean test() { + return console.getText().contains("Scan report generated:"); + } - private static void testColumnValue(int i, Row row, String fieldName, String expected) { - if (!expected.equalsIgnoreCase(row.get(fieldName))) { - fail(String.format("In row %d, value '%s' was expected for column '%s', but '%s' was found", - i, expected, fieldName, row.get(fieldName))); - } + }, timeout(10000)); + assertTrue(console.getText().contains(expectedPath.toString())); + + return scanResultsSheetMatchesReference(expectedPath, referencePath, dbType); } - private static String expectedTypeValue(String columnName, DbType dbType) { - /* - * This is very pragmatical and may need to change when tests are added for more databases. - * For now, PostgreSQL is used as the reference, and the expected types need to be adapted to match - * for other database. - */ - if (dbType == DbType.POSTGRESQL || columnName.equals("")) { - return columnName; - } - else if (dbType == DbType.ORACLE){ - switch (columnName) { - case "integer": - return "NUMBER"; - case "numeric": - return "FLOAT"; - case "character varying": - return "VARCHAR2"; - case "timestamp without time zone": - // seems a mismatch in the OMOP CMD v5.2 (Oracle defaults to WITH time zone) - return "TIMESTAMP(6) WITH TIME ZONE"; - default: - throw new RuntimeException("Unsupported column type: " + columnName); + public static boolean scanValuesMatchReferenceValues(Map>> scanSheets, Map>> referenceSheets, DbType dbType) { + assertEquals(scanSheets.size(), referenceSheets.size(), "Number of sheets does not match."); + for (String tabName: new String[]{"Field Overview", "Table Overview", "cost.csv", "person.csv"}) { + if (scanSheets.containsKey(tabName)) { + List> scanSheet = scanSheets.get(tabName); + List> referenceSheet = referenceSheets.get(tabName); + assertEquals(scanSheet.size(), referenceSheet.size(), String.format("Number of rows in sheet %s does not match.", tabName)); + // in WhiteRabbit v0.10.7 and older, the order or tables is not defined, so this can result in differences due to the rows + // being in a different order. By sorting the rows in both sheets, these kind of differences should not play a role. + scanSheet.sort(new RowsComparator()); + referenceSheet.sort(new RowsComparator()); + for (int i = 0; i < scanSheet.size(); ++i) { + final int fi = i; + IntStream.range(0, scanSheet.get(fi).size()) + .parallel() + .forEach(j -> { + final String scanValue = scanSheet.get(fi).get(j); + final String referenceValue = referenceSheet.get(fi).get(j); + if (tabName.equals("Field Overview") && j == 3 && !scanValue.equalsIgnoreCase(referenceValue)) { + assertTrue(matchTypeName(scanValue, referenceValue, dbType), + String.format("Field type '%s' cannot be matched with reference type '%s' for DbType %s", + scanValue, referenceValue, dbType.name())); + } else { + assertTrue(scanValue.equalsIgnoreCase(referenceValue), + String.format("In sheet %s, value '%s' in scan results does not match '%s' in reference", + tabName, scanValue, referenceValue)); + } + }); + } } } - else { - throw new RuntimeException("Unsupported DBType: " + dbType); - } + + return true; } - static DbSettings getTestPostgreSQLSettings(PostgreSQLContainer container) { - DbSettings dbSettings = new DbSettings(); - dbSettings.dbType = DbType.POSTGRESQL; - dbSettings.sourceType = DbSettings.SourceType.DATABASE; - dbSettings.server = container.getJdbcUrl(); - dbSettings.database = "public"; // yes, really - dbSettings.user = container.getUsername(); - dbSettings.password = container.getPassword(); - dbSettings.tables = getTableNamesPostgreSQL(dbSettings); - - return dbSettings; + private static boolean matchTypeName(String type, String reference, DbType dbType) { + if (dbType == ORACLE) { + switch (type) { + case "NUMBER": return reference.equals("integer"); + case "VARCHAR2": return reference.equals("character varying"); + case "FLOAT": return reference.equals("numeric"); + // seems a mismatch in the OMOP CMD v5.2 (Oracle defaults to WITH time zone): + case "TIMESTAMP(6) WITH TIME ZONE": return reference.equals("timestamp without time zone"); + default: throw new RuntimeException(String.format("Unsupported column type '%s' for DbType %s ", type, dbType.name())); + } + } else if (dbType == DbType.SNOWFLAKE) { + switch (type) { + case "NUMBER": return reference.equals("integer") || reference.equals("numeric"); + case "VARCHAR": return reference.equals("character varying"); + case "TIMESTAMPNTZ": return reference.equals("timestamp without time zone"); + default: throw new RuntimeException(String.format("Unsupported column type '%s' for DbType %s ", type, dbType.name())); + } + } else { + throw new RuntimeException("Unsupported DbType: " + dbType.name()); + } } - static List getTableNamesPostgreSQL(DbSettings dbSettings) { - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { - return richConnection.getTableNames("public"); + static class RowsComparator implements Comparator> { + @Override + public int compare(List o1, List o2) { + String firstString_o1 = o1.get(0); + String firstString_o2 = o2.get(0); + return firstString_o1.compareToIgnoreCase(firstString_o2); } } + private static Map>> readXlsxAsStringValues(Path xlsx) throws IOException { + assertTrue(Files.exists(xlsx), String.format("File %s does not exist.", xlsx)); + + Map>> sheets = new HashMap<>(); + FileInputStream file = null; + try { + file = new FileInputStream(new File(xlsx.toUri())); + } catch (FileNotFoundException e) { + throw new RuntimeException(String.format("File %s was expected to be found, but does not exist.", xlsx), e); + } + XSSFWorkbook xssfWorkbook = new XSSFWorkbook(file); + + for (int i = 0; i < xssfWorkbook.getNumberOfSheets(); ++i) { + XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(i); + + List> sheet = new ArrayList<>(); + for (org.apache.poi.ss.usermodel.Row row : xssfSheet) { + List values = new ArrayList<>(); + for (Cell cell: row) { + switch (cell.getCellType()) { + case NUMERIC: values.add(String.valueOf(cell.getNumericCellValue())); break; + case STRING: values.add(cell.getStringCellValue()); break; + default: throw new RuntimeException("Unsupported cell type: " + cell.getCellType().name()); + }; + } + sheet.add(values); + } + sheets.put(xssfSheet.getSheetName(), sheet); + } + return sheets; + } } diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanOracle.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanOracleIT.java similarity index 72% rename from whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanOracle.java rename to whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanOracleIT.java index 3f31dfe5..d05fd37c 100644 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanOracle.java +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanOracleIT.java @@ -1,25 +1,42 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ package org.ohdsi.whiterabbit.scan; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; -import org.ohdsi.databases.DbType; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; import org.ohdsi.databases.RichConnection; -import org.ohdsi.whiteRabbit.DbSettings; -import org.ohdsi.whiteRabbit.scan.SourceDataScan; import org.testcontainers.containers.OracleContainer; import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; import java.io.*; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.*; import static org.junit.jupiter.api.Assertions.*; -@Testcontainers(disabledWithoutDocker = true) -class TestSourceDataScanOracle { +class SourceDataScanOracleIT { private final static String USER_NAME = "test_user"; private final static String SCHEMA_NAME = USER_NAME; @@ -44,11 +61,16 @@ class TestSourceDataScanOracle { .withDatabaseName("testDB") .withInitScript("scan_data/create_data_oracle.sql"); + @BeforeAll + public static void startContainer() { + oracleContainer.start(); + } + @Test public void connectToDatabase() { // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker DbSettings dbSettings = getTestDbSettings(); - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + try (RichConnection richConnection = new RichConnection(dbSettings)) { // do nothing, connection will be closed automatically because RichConnection implements interface Closeable } } @@ -61,14 +83,16 @@ public void testGetTableNames() { assertEquals(2, tableNames.size()); } @Test - void testSourceDataScan(@TempDir Path tempDir) throws IOException { + void testSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException { loadData(); Path outFile = tempDir.resolve("scanresult.xslx"); - SourceDataScan sourceDataScan = new SourceDataScan(); + URL referenceScanReport = SourceDataScanOracleIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx"); + + SourceDataScan sourceDataScan = ScanTestUtils.createSourceDataScan(); DbSettings dbSettings = getTestDbSettings(); sourceDataScan.process(dbSettings, outFile.toString()); - ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType); + assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(outFile, Paths.get(referenceScanReport.toURI()), DbType.ORACLE)); } private void loadData() { @@ -78,15 +102,16 @@ private void loadData() { private void insertDataFromCsv(String tableName) { DbSettings dbSettings = getTestDbSettings(); - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + try (RichConnection richConnection = new RichConnection(dbSettings)) { try (BufferedReader reader = new BufferedReader(getResourcePath(tableName))) { String line = null; while ((line = reader.readLine()) != null) { - String[] values = line.split("\t"); - if (line.endsWith("\t")) { + String[] values = line.split(","); + if (line.endsWith(",")) { values = Arrays.copyOf(values, values.length + 1); values[values.length - 1] = ""; } + // Oracle INSERT needs quotes around the values String insertSql = String.format("INSERT INTO %s.%s VALUES('%s');", dbSettings.database, tableName, String.join("','", values)); richConnection.execute(insertSql); } @@ -97,7 +122,7 @@ private void insertDataFromCsv(String tableName) { } private InputStreamReader getResourcePath(String tableName) throws URISyntaxException, IOException { - String resourceName = String.format("scan_data/%s.csv", tableName); + String resourceName = String.format("scan_data/%s-no-header.csv", tableName); ClassLoader classLoader = getClass().getClassLoader(); File file = new File(Objects.requireNonNull(classLoader.getResource(resourceName)).toURI()); @@ -105,7 +130,7 @@ private InputStreamReader getResourcePath(String tableName) throws URISyntaxExce } private List getTableNames(DbSettings dbSettings) { - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { + try (RichConnection richConnection = new RichConnection(dbSettings)) { return richConnection.getTableNames(SCHEMA_NAME); } } diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLGuiIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLGuiIT.java new file mode 100644 index 00000000..b205f108 --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLGuiIT.java @@ -0,0 +1,121 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest; +import org.assertj.swing.annotation.GUITest; +import org.assertj.swing.core.GenericTypeMatcher; +import org.assertj.swing.edt.GuiActionRunner; +import org.assertj.swing.finder.WindowFinder; +import org.assertj.swing.fixture.DialogFixture; +import org.assertj.swing.fixture.FrameFixture; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.whiterabbit.Console; +import org.ohdsi.whiterabbit.WhiteRabbitMain; +import org.ohdsi.whiterabbit.gui.LocationsPanel; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; + +import javax.swing.*; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.ohdsi.databases.configuration.DbType.POSTGRESQL; +import static org.ohdsi.whiterabbit.scan.SourceDataScanPostgreSQLIT.createPostgreSQLContainer; + +@ExtendWith(GUITestExtension.class) +@CacioTest +class SourceDataScanPostgreSQLGuiIT { + + private static FrameFixture window; + private static Console console; + + private final static int WIDTH = 1920; + private final static int HEIGHT = 1080; + @BeforeAll + public static void setupOnce() { + System.setProperty("cacio.managed.screensize", String.format("%sx%s", WIDTH, HEIGHT)); + } + + @BeforeEach + public void onSetUp() { + String[] args = {}; + WhiteRabbitMain whiteRabbitMain = GuiActionRunner.execute(() -> new WhiteRabbitMain(true, args)); + console = whiteRabbitMain.getConsole(); + window = new FrameFixture(whiteRabbitMain.getFrame()); + window.show(); // shows the frame to test + } + + @Container + public static PostgreSQLContainer postgreSQL = createPostgreSQLContainer(); + + @ExtendWith(GUITestExtension.class) + @Test + void testConnectionAndSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException { + URL referenceScanReport = TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx"); + Path personCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/person-no-header.csv").toURI()); + Path costCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/cost-no-header.csv").toURI()); + Files.copy(personCsv, tempDir.resolve("person.csv")); + Files.copy(costCsv, tempDir.resolve("cost.csv")); + window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_LOCATIONS); + window.comboBox("SourceType").selectItem(DbType.POSTGRESQL.label()); + window.textBox("FolderField").setText(tempDir.toAbsolutePath().toString()); + // verify one tooltip text, assume that all other tooltip texts will be fine too (fingers crossed) + assertEquals(LocationsPanel.TOOLTIP_POSTGRESQL_SERVER, window.textBox(LocationsPanel.LABEL_SERVER_LOCATION).target().getToolTipText()); + window.textBox(LocationsPanel.LABEL_SERVER_LOCATION).setText(String.format("%s:%s/%s", + postgreSQL.getHost(), + postgreSQL.getFirstMappedPort(), + postgreSQL.getDatabaseName())); + window.textBox(LocationsPanel.LABEL_USER_NAME).setText(postgreSQL.getUsername()); + window.textBox(LocationsPanel.LABEL_PASSWORD).setText(postgreSQL.getPassword()); + window.textBox(LocationsPanel.LABEL_DATABASE_NAME).setText("public"); + + // use the "Test connection" button + window.button(WhiteRabbitMain.LABEL_TEST_CONNECTION).click(); + GenericTypeMatcher matcher = new GenericTypeMatcher(JDialog.class, true) { + protected boolean isMatching(JDialog frame) { + return WhiteRabbitMain.LABEL_CONNECTION_SUCCESSFUL.equals(frame.getTitle()); + } + }; + DialogFixture frame = WindowFinder.findDialog(matcher).using(window.robot()); + frame.button().click(); + + // switch to the scan panel, add all tables found and run the scan + window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_SCAN).click(); + window.button(WhiteRabbitMain.LABEL_ADD_ALL_IN_DB).click(); + window.button(WhiteRabbitMain.LABEL_SCAN_TABLES).click(); + + // verify the generated scan report against the reference + assertTrue(ScanTestUtils.isScanReportGeneratedAndMatchesReference( + console, + tempDir.resolve("ScanReport.xlsx"), + Paths.get(referenceScanReport.toURI()), + POSTGRESQL)); + + //window.close(); + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLIT.java new file mode 100644 index 00000000..2892e3a8 --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanPostgreSQLIT.java @@ -0,0 +1,107 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.configuration.DbSettings; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.databases.RichConnection; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + + +class SourceDataScanPostgreSQLIT { + + @Container + public static PostgreSQLContainer postgreSQL = createPostgreSQLContainer(); + + @Test + public void connectToDatabase() { + // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker + DbSettings dbSettings = getTestDbSettings(); + try (RichConnection richConnection = new RichConnection(dbSettings)) { + // do nothing, connection will be closed automatically because RichConnection implements interface Closeable + } + } + + @Test + public void testGetTableNames() { + // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker + DbSettings dbSettings = getTestDbSettings(); + List tableNames = getTableNames(dbSettings); + assertEquals(2, tableNames.size()); + } + + public static PostgreSQLContainer createPostgreSQLContainer() { + PostgreSQLContainer postgreSQLContainer = new PostgreSQLContainer<>("postgres:13.1") + .withUsername("test") + .withPassword("test") + .withDatabaseName("test") + .withClasspathResourceMapping( + "scan_data", + "/scan_data", + BindMode.READ_ONLY) + .withInitScript("scan_data/create_data_postgresql.sql"); + + postgreSQLContainer.start(); + + return postgreSQLContainer; + } + + @Test + void testSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException { + Path outFile = tempDir.resolve("scanresult.xslx"); + URL referenceScanReport = SourceDataScanPostgreSQLIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx"); + + SourceDataScan sourceDataScan = ScanTestUtils.createSourceDataScan(); + DbSettings dbSettings = getTestDbSettings(); + + sourceDataScan.process(dbSettings, outFile.toString()); + assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(outFile, Paths.get(referenceScanReport.toURI()), DbType.POSTGRESQL)); + } + + private List getTableNames(DbSettings dbSettings) { + try (RichConnection richConnection = new RichConnection(dbSettings)) { + return richConnection.getTableNames("public"); + } + } + + private DbSettings getTestDbSettings() { + DbSettings dbSettings = new DbSettings(); + dbSettings.dbType = DbType.POSTGRESQL; + dbSettings.sourceType = DbSettings.SourceType.DATABASE; + dbSettings.server = postgreSQL.getJdbcUrl(); + dbSettings.database = "public"; // always for PostgreSQL + dbSettings.user = postgreSQL.getUsername(); + dbSettings.password = postgreSQL.getPassword(); + dbSettings.tables = getTableNames(dbSettings); + + return dbSettings; + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeGuiIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeGuiIT.java new file mode 100644 index 00000000..880c8c2f --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeGuiIT.java @@ -0,0 +1,142 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest; +import org.assertj.swing.annotation.GUITest; +import org.assertj.swing.core.GenericTypeMatcher; +import org.assertj.swing.edt.GuiActionRunner; +import org.assertj.swing.finder.WindowFinder; +import org.assertj.swing.fixture.DialogFixture; +import org.assertj.swing.fixture.FrameFixture; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.SnowflakeHandler.SnowflakeConfiguration; +import org.ohdsi.databases.SnowflakeTestUtils; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.whiterabbit.Console; +import org.ohdsi.whiterabbit.WhiteRabbitMain; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.junit.jupiter.Container; + +import javax.swing.*; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.ohdsi.databases.configuration.DbType.SNOWFLAKE; +import static org.ohdsi.whiterabbit.scan.SourceDataScanSnowflakeIT.*; + +@ExtendWith(GUITestExtension.class) +@CacioTest +class SourceDataScanSnowflakeGuiIT { + + private static FrameFixture window; + private static Console console; + + private final static int WIDTH = 1920; + private final static int HEIGHT = 1080; + @BeforeAll + public static void setupOnce() { + System.setProperty("cacio.managed.screensize", String.format("%sx%s", WIDTH, HEIGHT)); + } + + @Container + public static GenericContainer testContainer; + + @BeforeEach + public void onSetUp() { + try { + testContainer = createPythonContainer(); + prepareTestData(testContainer); + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Creating python container failed."); + } + String[] args = {}; + WhiteRabbitMain whiteRabbitMain = GuiActionRunner.execute(() -> new WhiteRabbitMain(true, args)); + console = whiteRabbitMain.getConsole(); + window = new FrameFixture(whiteRabbitMain.getFrame()); + window.show(); // shows the frame to test + } + + @ExtendWith(GUITestExtension.class) + @Test + void testConnectionAndSourceDataScan(@TempDir Path tempDir) throws IOException, URISyntaxException { + Assumptions.assumeTrue(new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(), "Snowflake system properties file not available"); + URL referenceScanReport = TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx"); + Path personCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/person-no-header.csv").toURI()); + Path costCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/cost-no-header.csv").toURI()); + Files.copy(personCsv, tempDir.resolve("person.csv")); + Files.copy(costCsv, tempDir.resolve("cost.csv")); + window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_LOCATIONS); + window.comboBox("SourceType").selectItem(DbType.SNOWFLAKE.label()); + window.textBox("FolderField").setText(tempDir.toAbsolutePath().toString()); + + // first use the test connection button, and expect a popup that informs us that several required fields are empty + // use the "Test connection" button + window.button(WhiteRabbitMain.LABEL_TEST_CONNECTION).click(); + GenericTypeMatcher matcher = new GenericTypeMatcher(JDialog.class, true) { + protected boolean isMatching(JDialog frame) { + return WhiteRabbitMain.TITLE_ERRORS_IN_DATABASE_CONFIGURATION.equals(frame.getTitle()); + } + }; + DialogFixture frame = WindowFinder.findDialog(matcher).using(window.robot()); + frame.button().click(); // close the popup + + // fill in all the required values and try again + assertEquals(SnowflakeConfiguration.TOOLTIP_SNOWFLAKE_ACCOUNT, window.textBox(SnowflakeConfiguration.SNOWFLAKE_ACCOUNT).target().getToolTipText()); + window.textBox(SnowflakeConfiguration.SNOWFLAKE_ACCOUNT).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_ACCOUNT")); + window.textBox(SnowflakeConfiguration.SNOWFLAKE_USER).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_USER")); + window.textBox(SnowflakeConfiguration.SNOWFLAKE_PASSWORD).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_PASSWORD")); + window.textBox(SnowflakeConfiguration.SNOWFLAKE_WAREHOUSE).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_WAREHOUSE")); + window.textBox(SnowflakeConfiguration.SNOWFLAKE_DATABASE).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_DATABASE")); + window.textBox(SnowflakeConfiguration.SNOWFLAKE_SCHEMA).setText(SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_SCHEMA")); + + // use the "Test connection" button + window.button(WhiteRabbitMain.LABEL_TEST_CONNECTION).click(); + matcher = new GenericTypeMatcher(JDialog.class, true) { + protected boolean isMatching(JDialog frame) { + return WhiteRabbitMain.LABEL_CONNECTION_SUCCESSFUL.equals(frame.getTitle()); + } + }; + frame = WindowFinder.findDialog(matcher).using(window.robot()); + frame.button().click(); + + // switch to the scan panel, add all tables found and run the scan + window.tabbedPane(WhiteRabbitMain.NAME_TABBED_PANE).selectTab(WhiteRabbitMain.LABEL_SCAN).click(); + window.button(WhiteRabbitMain.LABEL_ADD_ALL_IN_DB).click(); + window.button(WhiteRabbitMain.LABEL_SCAN_TABLES).click(); + + // verify the generated scan report against the reference + assertTrue(ScanTestUtils.isScanReportGeneratedAndMatchesReference( + console, + tempDir.resolve("ScanReport.xlsx"), + Paths.get(referenceScanReport.toURI()), + SNOWFLAKE)); + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeIT.java new file mode 100644 index 00000000..acfe3dec --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/SourceDataScanSnowflakeIT.java @@ -0,0 +1,159 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import org.apache.commons.lang.StringUtils; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.databases.SnowflakeTestUtils; +import org.ohdsi.whiterabbit.WhiteRabbitMain; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.utility.DockerImageName; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.*; + +public class SourceDataScanSnowflakeIT { + + public final static String SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE = "SNOWFLAKE_WR_TEST_ACCOUNT"; + static Logger logger = LoggerFactory.getLogger(SourceDataScanSnowflakeIT.class); + + final static String CONTAINER_DATA_PATH = "/scan_data"; + @Container + public static GenericContainer testContainer; + + @BeforeEach + public void setUp() { + try { + testContainer = createPythonContainer(); + prepareTestData(testContainer); + } catch (IOException | InterruptedException e) { + throw new RuntimeException("Creating python container failed."); + } + } + + //@Test + void testWarnWhenRunningWithoutSnowflakeConfigured() { + String snowflakeWrTestAccunt = System.getenv(SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE); + assertFalse(StringUtils.isEmpty(snowflakeWrTestAccunt) && StringUtils.isEmpty(System.getProperty("ohdsi.org.whiterabbit.skip_snowflake_tests")), + String.format("\nTest class %s is being run without a Snowflake test instance configured.\n" + + "This is NOT a valid verification run.", SourceDataScanSnowflakeIT.class.getName())); + } + + @Test + //@EnabledIfEnvironmentVariable(named = SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE, matches = ".+") + void testProcessSnowflakeFromIni(@TempDir Path tempDir) throws URISyntaxException, IOException { + Assumptions.assumeTrue(new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(), "Snowflake system properties file not available"); + Charset charset = StandardCharsets.UTF_8; + Path iniFile = tempDir.resolve("snowflake.ini"); + URL iniTemplate = SourceDataScanSnowflakeIT.class.getClassLoader().getResource("scan_data/snowflake.ini.template"); + URL referenceScanReport = SourceDataScanSnowflakeIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx"); + assert iniTemplate != null; + String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset); + content = content.replaceAll("%WORKING_FOLDER%", tempDir.toString()) + .replaceAll("%SNOWFLAKE_ACCOUNT%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_ACCOUNT")) + .replaceAll("%SNOWFLAKE_USER%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_USER")) + .replaceAll("%SNOWFLAKE_PASSWORD%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_PASSWORD")) + .replaceAll("%SNOWFLAKE_WAREHOUSE%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_WAREHOUSE")) + .replaceAll("%SNOWFLAKE_DATABASE%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_DATABASE")) + .replaceAll("%SNOWFLAKE_SCHEMA%", SnowflakeTestUtils.getPropertyOrFail("SNOWFLAKE_WR_TEST_SCHEMA")); + Files.write(iniFile, content.getBytes(charset)); + WhiteRabbitMain wrMain = new WhiteRabbitMain(true, new String[]{"-ini", iniFile.toAbsolutePath().toString()}); + assert referenceScanReport != null; + assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.SNOWFLAKE)); + } + + static void prepareTestData(GenericContainer container) throws IOException, InterruptedException { + SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker checker = new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(); + if (checker.getAsBoolean()) { + prepareTestData(container, new SnowflakeTestUtils.PropertyReader()); + } + } + + static void prepareTestData(GenericContainer container, SnowflakeTestUtils.ReaderInterface reader) throws IOException, InterruptedException { + // snowsql is used for initializing the database + + // add some packages needed for the installation of snowsql + execAndVerifyCommand(container, "/bin/sh", "-c", "apt update; apt -y install wget unzip"); + // download snowsql + execAndVerifyCommand(container, "/bin/bash", "-c", + "wget -q https://sfc-repo.snowflakecomputing.com/snowsql/bootstrap/1.2/linux_x86_64/snowsql-1.2.29-linux_x86_64.bash;"); + // install snowsql + execAndVerifyCommand(container, "/bin/bash", "-c", + "echo -e \"/tmp\\nN\" | bash snowsql-1.2.29-linux_x86_64.bash "); + + // run the sql script needed to initialize the test data + execAndVerifyCommand(container, "/bin/bash", "-c", + String.format("(cd %s; SNOWSQL_PWD='%s' /tmp/snowsql -a %s -u %s -d %s -s %s -f %s/create_data_snowflake.sql)", + CONTAINER_DATA_PATH, + reader.getOrFail("SNOWFLAKE_WR_TEST_PASSWORD"), + reader.getOrFail(SNOWFLAKE_ACCOUNT_ENVIRONMENT_VARIABLE), + reader.getOrFail("SNOWFLAKE_WR_TEST_USER"), + reader.getOrFail("SNOWFLAKE_WR_TEST_DATABASE"), + reader.getOrFail("SNOWFLAKE_WR_TEST_SCHEMA"), + CONTAINER_DATA_PATH + )); + } + + public static GenericContainer createPythonContainer() throws IOException, InterruptedException { + GenericContainer testContainer = new GenericContainer<>(DockerImageName.parse("ubuntu:22.04")) + .withCommand("/bin/sh", "-c", "tail -f /dev/null") // keeps the container running until it is explicitly stopped + .withClasspathResourceMapping( + "scan_data", + CONTAINER_DATA_PATH, + BindMode.READ_ONLY); + + testContainer.start(); + + return testContainer; + } + + private static void execAndVerifyCommand(GenericContainer container, String... command) throws IOException, InterruptedException { + execAndVerifyCommand(container, 0, command); + } + private static void execAndVerifyCommand(GenericContainer container, int expectedExitValue, String... command) throws IOException, InterruptedException { + org.testcontainers.containers.Container.ExecResult result; + + result = container.execInContainer(command); + if (result.getExitCode() != expectedExitValue) { + logger.error("stdout: {}", result.getStdout()); + logger.error("stderr: {}", result.getStderr()); + // hide the password, if present, so it won't appear in logs (pragmatic) + String message = ("Command failed: " + String.join(" ", command)) + .replace(SnowflakeTestUtils.getEnvOrFail("SNOWFLAKE_WR_TEST_PASSWORD"), "xxxxx"); + assertEquals(expectedExitValue, result.getExitCode(), message); + } + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java deleted file mode 100644 index c36e99ad..00000000 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScan.java +++ /dev/null @@ -1,200 +0,0 @@ -package org.ohdsi.whiterabbit.scan; - -import org.apache.commons.io.FileUtils; -import org.junit.jupiter.api.Tag; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; -import org.ohdsi.databases.DbType; -import org.ohdsi.databases.RichConnection; -import org.ohdsi.ooxml.ReadXlsxFileWithHeader; -import org.ohdsi.utilities.files.Row; -import org.ohdsi.utilities.files.RowUtilities; -import org.ohdsi.whiteRabbit.DbSettings; -import org.ohdsi.whiteRabbit.scan.SourceDataScan; -import org.testcontainers.containers.BindMode; -import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.lang.reflect.Field; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.*; - -import static org.junit.jupiter.api.Assertions.*; - - -@Testcontainers -@Tag("DockerRequired") -class TestSourceDataScan { - - @Container - public static PostgreSQLContainer postgreSQL; - - static { - /* - * Since the database is only read, setting it up once suffices. - * - * Note that the init script is read locally, but accesses the CSV files from - * the resource mapped into the container. - * - * The data used in this test are actually OMOP data. One reason for this is convenience: the DDL - * for this data is know and could simply be copied instead of composed. - * Also, for the technical correctness of WhiteRabbit (does it open the database, get the table - * names and scan those tables), the actual nature of the source data does not matter. - */ - try { - postgreSQL = new PostgreSQLContainer<>("postgres:13.1") - .withUsername("test") - .withPassword("test") - .withDatabaseName("test") - .withClasspathResourceMapping( - "scan_data", - "/scan_data", - BindMode.READ_ONLY) - .withInitScript("scan_data/create_data_postgresql.sql"); - - postgreSQL.start(); - - } finally { - if (postgreSQL != null) { - postgreSQL.stop(); - } - } - } - - void testProcess(Path tempDir) throws IOException { - Path outFile = tempDir.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME); - SourceDataScan sourceDataScan = new SourceDataScan(); - DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); - - sourceDataScan.process(dbSettings, outFile.toString()); - ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType); - } - - @Test - void testApachePoiTmpFileProblemWithAutomaticResolution(@TempDir Path tempDir) throws IOException, ReflectiveOperationException { - // intends to verify solution of this bug: https://github.com/OHDSI/WhiteRabbit/issues/293 - - /* - * This tests a fix that assumes that the bug referenced here occurs in a multi-user situation where the - * first user running the scan, and causing /tmp/poifiles to created, does so by creating it read-only - * for everyone else. This directory is not automatically cleaned up, so every following user on the same - * system running the scan encounters the problem that /tmp/poifiles already exists and is read-only, - * causing a crash when the Apacho poi library attemps to create the xslx file. - * - * The class SourceDataScan has been extended with a static method, called implicitly once through a static{} - * block, to create a TempDir strategy that will create a unique directory for each instance/run of WhiteRabbit. - * This effectively solves the assumed error situation. - * - * This test does not execute a multi-user situation, but emulates it by leaving the tmp directory in a - * read-only state after the first scan, and then confirming that a second scan fails. After that, - * a new unique tmp dir is enforced by invoking SourceDataScan.setUniqueTempDirStrategyForApachePoi(), - * and a new scan now runs successfully. - */ - - // Make sure the scenarios are tested without a user configured tmp dir, so set environment variable and - // system property to an empty value - System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, ""); - updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, ""); - Path defaultTmpPath = SourceDataScan.getDefaultPoiTmpPath(tempDir); - - if (!Files.exists(defaultTmpPath)) { - Files.createDirectory(defaultTmpPath); - } else { - if (Files.exists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))) { - Files.delete(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME)); - } - } - - // process should pass without problem, and afterwards the default tmp dir should exist - testProcess(defaultTmpPath); - assertTrue(Files.exists(defaultTmpPath)); - - // provoke the problem situation. make the default tmp dir readonly, try to process again - assertTrue(Files.deleteIfExists(defaultTmpPath.resolve(SourceDataScan.SCAN_REPORT_FILE_NAME))); // or Apache Poi will happily reuse it - assertTrue(defaultTmpPath.toFile().setReadOnly()); - RuntimeException thrown = assertThrows(RuntimeException.class, () -> { - testProcess(defaultTmpPath); - }); - assertTrue(thrown.getMessage().contains("Permission denied")); - - // invoke the static method to set a new tmp dir, process again (should succeed) and verify that - // the new tmpdir is indeed different from the default - String myTmpDir = SourceDataScan.setUniqueTempDirStrategyForApachePoi(); - testProcess(Paths.get(myTmpDir)); - assertNotEquals(defaultTmpPath.toFile().getAbsolutePath(), myTmpDir); - - // we might have left behind an unworkable situation; attempt to solve that - if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) { - assertTrue(defaultTmpPath.toFile().setWritable(true)); - } - } - - @Test - void testApachePoiTmpFileProblemWithUserConfiguredResolution(@TempDir Path tempDir) throws IOException, ReflectiveOperationException { - // 1. Verify that the poi tmp dir property is used, if set - Path tmpDirFromProperty = tempDir.resolve("setByProperty"); - System.setProperty(SourceDataScan.POI_TMP_DIR_PROPERTY_NAME, tmpDirFromProperty.toFile().getAbsolutePath()); - Files.createDirectories(tmpDirFromProperty); - - SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the property - testProcess(tmpDirFromProperty); - assertTrue(Files.exists(tmpDirFromProperty)); - - cleanTmpDir(tmpDirFromProperty); - - // 2. Verify that the poi tmp dir environment variable is used, if set, and overrules the property set above - Path tmpDirFromEnvironmentVariable = tempDir.resolve("setByEnvVar"); - updateEnv(SourceDataScan.POI_TMP_DIR_ENVIRONMENT_VARIABLE_NAME, tmpDirFromEnvironmentVariable.toFile().getAbsolutePath()); - Files.createDirectories(tmpDirFromEnvironmentVariable); - - SourceDataScan.setUniqueTempDirStrategyForApachePoi(); // need to reset to pick up the env. var. - testProcess(tmpDirFromEnvironmentVariable); - assertFalse(Files.exists(tmpDirFromProperty)); - assertTrue(Files.exists(tmpDirFromEnvironmentVariable)); - cleanTmpDir(tmpDirFromEnvironmentVariable); - } - - @SuppressWarnings({ "unchecked" }) - private static void updateEnv(String name, String val) throws ReflectiveOperationException { - Map env = System.getenv(); - Field field = env.getClass().getDeclaredField("m"); - field.setAccessible(true); - ((Map) field.get(env)).put(name, val); - } - private List getTableNames(DbSettings dbSettings) { - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { - return richConnection.getTableNames("public"); - } - } - - private static void cleanTmpDir(Path path) { - if (Files.exists(path)) { - if (!Files.isWritable(path)) { - assertTrue(path.toFile().setWritable(true), - String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable", - path.toFile().getAbsolutePath())); - } - assertTrue(deleteDir(path.toFile())); - } - } - private static boolean deleteDir(File file) { - if (Files.exists(file.toPath())) { - File[] contents = file.listFiles(); - if (contents != null) { - for (File f : contents) { - if (!Files.isSymbolicLink(f.toPath())) { - deleteDir(f); - } - } - } - return file.delete(); - } - return true; - } -} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvGui.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvGui.java new file mode 100644 index 00000000..20c5d188 --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvGui.java @@ -0,0 +1,86 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import com.github.caciocavallosilano.cacio.ctc.junit.CacioTest; +import org.assertj.swing.edt.GuiActionRunner; +import org.assertj.swing.fixture.FrameFixture; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.whiterabbit.Console; +import org.ohdsi.whiterabbit.WhiteRabbitMain; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import org.junit.jupiter.api.extension.ExtendWith; +import org.ohdsi.whiterabbit.gui.LocationsPanel; + +@ExtendWith(GUITestExtension.class) +@CacioTest +public class TestSourceDataScanCsvGui { + private static FrameFixture window; + private static Console console; + + private final static int WIDTH = 1920; + private final static int HEIGHT = 1080; + @BeforeAll + public static void setupOnce() { + System.setProperty("cacio.managed.screensize", String.format("%sx%s", WIDTH, HEIGHT)); + } + + @BeforeEach + public void onSetUp() { + String[] args = {}; + WhiteRabbitMain whiteRabbitMain = GuiActionRunner.execute(() -> new WhiteRabbitMain(true, args)); + console = whiteRabbitMain.getConsole(); + window = new FrameFixture(whiteRabbitMain.getFrame()); + window.show(); // shows the frame to test + } + + @Test + void testSourceDataScanFromGui(@TempDir Path tempDir) throws IOException, URISyntaxException { + URL referenceScanReport = TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-csv.xlsx"); + Path personCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/person-header.csv").toURI()); + Path costCsv = Paths.get(TestSourceDataScanCsvGui.class.getClassLoader().getResource("scan_data/cost-header.csv").toURI()); + Files.copy(personCsv, tempDir.resolve("person.csv")); + Files.copy(costCsv, tempDir.resolve("cost.csv")); + window.tabbedPane("TabbedPane").selectTab(WhiteRabbitMain.LABEL_LOCATIONS); + window.comboBox("SourceType").selectItem(DbType.DELIMITED_TEXT_FILES.label()); + window.textBox(LocationsPanel.NAME_DELIMITER).setText(","); + window.textBox("FolderField").setText(tempDir.toAbsolutePath().toString()); + window.tabbedPane("TabbedPane").selectTab("Scan"); + window.button("Add").click(); + window.fileChooser("FileChooser").fileNameTextBox().setText("\"cost.csv\" \"person.csv\""); + window.fileChooser("FileChooser").approveButton().click(); + window.button(WhiteRabbitMain.LABEL_SCAN_TABLES).click(); + + assertTrue(ScanTestUtils.isScanReportGeneratedAndMatchesReference( + console, + tempDir.resolve("ScanReport.xlsx"), + Paths.get(referenceScanReport.toURI()), + DbType.DELIMITED_TEXT_FILES)); + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvIniFile.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvIniFile.java new file mode 100644 index 00000000..0e6f21c9 --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanCsvIniFile.java @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.io.TempDir; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.whiterabbit.WhiteRabbitMain; +import org.opentest4j.AssertionFailedError; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestSourceDataScanCsvIniFile { + @Test + void testSourceDataScanFromIniFile(@TempDir Path tempDir) throws URISyntaxException, IOException { + Charset charset = StandardCharsets.UTF_8; + Path iniFile = tempDir.resolve("tsv.ini"); + URL iniTemplate = TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/tsv.ini.template"); + URL referenceScanReport = TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-csv.xlsx"); + Path personCsv = Paths.get(TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/person-header.csv").toURI()); + Path costCsv = Paths.get(TestSourceDataScanCsvIniFile.class.getClassLoader().getResource("scan_data/cost-header.csv").toURI()); + assertNotNull(iniTemplate); + String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset); + content = content.replaceAll("%WORKING_FOLDER%", tempDir.toString()); + Files.write(iniFile, content.getBytes(charset)); + Files.copy(personCsv, tempDir.resolve("person.csv")); + Files.copy(costCsv, tempDir.resolve("cost.csv")); + WhiteRabbitMain wrMain = new WhiteRabbitMain(false, new String[]{"-ini", iniFile.toAbsolutePath().toString()}); + assertNotNull(referenceScanReport); + assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.DELIMITED_TEXT_FILES)); + } + + @Test + // minimal test to verify comparing ScanReports: test the tester :-) (and no, this test strictly speaking does not belong here, it should be in its own class) + void testCompareSheets() { + // conform that ScanTestUtils.compareSheets does know how to compare scan results (same, different) + Map>> sheets1 = Collections.singletonMap("Field Overview", Collections.singletonList(Arrays.asList("one", "two", "three"))); + Map>> sheets2 = Collections.singletonMap("Field Overview", Collections.singletonList(Arrays.asList("one", "two", "three"))); + Map>> sheets3 = Collections.singletonMap("Field Overview", Collections.singletonList(Arrays.asList("two", "three", "four"))); + AssertionFailedError thrown = Assertions.assertThrows(AssertionFailedError.class, () -> { + ScanTestUtils.scanValuesMatchReferenceValues(sheets1, sheets3, DbType.POSTGRESQL); + }, "AssertionFailedError was expected"); + ScanTestUtils.scanValuesMatchReferenceValues(sheets1, sheets2, DbType.POSTGRESQL); + } +} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java deleted file mode 100644 index 0678d308..00000000 --- a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/TestSourceDataScanPostgreSQL.java +++ /dev/null @@ -1,83 +0,0 @@ -package org.ohdsi.whiterabbit.scan; - -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; -import org.ohdsi.databases.RichConnection; -import org.ohdsi.whiteRabbit.DbSettings; -import org.ohdsi.whiteRabbit.scan.SourceDataScan; -import org.testcontainers.containers.BindMode; -import org.testcontainers.containers.PostgreSQLContainer; -import org.testcontainers.junit.jupiter.Container; -import org.testcontainers.junit.jupiter.Testcontainers; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.List; - -import static org.junit.jupiter.api.Assertions.*; - - -@Testcontainers(disabledWithoutDocker = true) -class TestSourceDataScanPostgreSQL { - - @Container - public static PostgreSQLContainer postgreSQL; - - static { - /* - * Since the database is only read, setting it up once suffices. - * - * Note that the init script is read locally, but accesses the CSV files from - * the resource mapped into the container. - * - * The data used in this test are actually OMOP data. One reason for this is convenience: the DDL - * for this data is know and could simply be copied instead of composed. - * Also, for the technical correctness of WhiteRabbit (does it open the database, get the table - * names and scan those tables), the actual nature of the source data does not matter. - */ - try { - postgreSQL = new PostgreSQLContainer<>("postgres:13.1") - .withUsername("test") - .withPassword("test") - .withDatabaseName("test") - .withClasspathResourceMapping( - "scan_data", - "/scan_data", - BindMode.READ_ONLY) - .withInitScript("scan_data/create_data_postgresql.sql"); - - postgreSQL.start(); - - } finally { - if (postgreSQL != null) { - postgreSQL.stop(); - } - } - } - - @Test - public void connectToDatabase() { - // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker - DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); - try (RichConnection richConnection = new RichConnection(dbSettings.server, dbSettings.domain, dbSettings.user, dbSettings.password, dbSettings.dbType)) { - // do nothing, connection will be closed automatically because RichConnection implements interface Closeable - } - } - - @Test - public void testGetTableNames() { - // this is also implicitly tested by testSourceDataScan(), but having it fail separately helps identify problems quicker - DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); - List tableNames = ScanTestUtils.getTableNamesPostgreSQL(dbSettings); - assertEquals(2, tableNames.size()); - } - @Test - void testSourceDataScan(@TempDir Path tempDir) throws IOException { - Path outFile = tempDir.resolve("scanresult.xslx"); - SourceDataScan sourceDataScan = new SourceDataScan(); - DbSettings dbSettings = ScanTestUtils.getTestPostgreSQLSettings(postgreSQL); - - sourceDataScan.process(dbSettings, outFile.toString()); - ScanTestUtils.verifyScanResultsFromXSLX(outFile, dbSettings.dbType); - } -} diff --git a/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/VerifyDistributionIT.java b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/VerifyDistributionIT.java new file mode 100644 index 00000000..dbad9b2f --- /dev/null +++ b/whiterabbit/src/test/java/org/ohdsi/whiterabbit/scan/VerifyDistributionIT.java @@ -0,0 +1,223 @@ +/******************************************************************************* + * Copyright 2023 Observational Health Data Sciences and Informatics & The Hyve + * + * This file is part of WhiteRabbit + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +package org.ohdsi.whiterabbit.scan; + +import org.apache.commons.lang.StringUtils; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.api.condition.EnabledIfSystemProperty; +import org.junit.jupiter.api.io.TempDir; +import org.junit.runners.Parameterized; +import org.ohdsi.databases.DBConnector; +import org.ohdsi.databases.SnowflakeTestUtils; +import org.ohdsi.databases.configuration.DbType; +import org.ohdsi.utilities.files.IniFile; +import org.ohdsi.whiterabbit.WhiteRabbitMain; +import org.testcontainers.containers.BindMode; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.Container.ExecResult; +import org.testcontainers.utility.DockerImageName; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.*; +import java.util.function.BooleanSupplier; +import java.util.stream.Collectors; + +import static org.junit.jupiter.api.Assertions.*; +import static org.ohdsi.whiterabbit.scan.SourceDataScanSnowflakeIT.createPythonContainer; +import static org.ohdsi.whiterabbit.scan.SourceDataScanSnowflakeIT.prepareTestData; + +/** + * Intent: "deploy" the distributed application in a docker container (TestContainer) containing a Java runtime + * of a specified version, and runs a test of WhiteRabbit that aim to verify that the distribution is complete, + * i.e. no dependencies are missing. A data for a scan on csv files is used to run whiterabbit. + * + * Note that this does not test any of the JDBC driver dependencies, unless these databases are actually used. + */ +public class VerifyDistributionIT { + + @TempDir + static Path tempDir; + + private static final String WORKDIR_IN_CONTAINER = "/whiterabbit"; + private static final String APPDIR_IN_CONTAINER = "/app"; + + @Test + void testDistributionWithJava8() throws IOException, URISyntaxException, InterruptedException { + testWhiteRabbitInContainer("eclipse-temurin:8", "openjdk version \"1.8."); + } + + @Test + void testDistributionWithJava11() throws IOException, URISyntaxException, InterruptedException { + testWhiteRabbitInContainer("eclipse-temurin:11", "openjdk version \"11.0."); + } + @Test + void testDistributionWithJava17() throws IOException, URISyntaxException, InterruptedException { + testWhiteRabbitInContainer("eclipse-temurin:17", "openjdk version \"17.0."); + } + + @Test + void verifyAllJDBCDriversLoadable() throws IOException, InterruptedException { + try (GenericContainer javaContainer = createJavaContainer("eclipse-temurin:11")) { + javaContainer.start(); + ExecResult execResult = javaContainer.execInContainer("sh", "-c", + String.format("cd %s/repo; java -classpath '*' org.ohdsi.databases.DBConnector", APPDIR_IN_CONTAINER)); + if (execResult.getExitCode() != 0) { + System.out.println("stdout:" + execResult.getStdout()); + System.out.println("stderr:" + execResult.getStderr()); + } + assertTrue(execResult.getStdout().contains(DBConnector.ALL_JDBC_DRIVERS_LOADABLE), "Not all supported JDBC drivers could be loaded"); + javaContainer.execInContainer("sh", "-c", "rm /app/repo/snowflake*"); // sabotage, confirms that test breaks if driver missing + execResult = javaContainer.execInContainer("sh", "-c", + String.format("cd %s/repo; java -classpath '*' org.ohdsi.databases.DBConnector", APPDIR_IN_CONTAINER)); + assertFalse(execResult.getStdout().contains(DBConnector.ALL_JDBC_DRIVERS_LOADABLE), "Not all supported JDBC drivers could be loaded"); + } + } + + //@Test // useful while developing/debugging, leaving in place to test again after Snowflake JDBC driver update + void verifySnowflakeFailureInJava17() throws IOException, URISyntaxException, InterruptedException { + /* + * There is an issue with Snowflake JDBC that causes a failure in Java 16 and later + * (see https://community.snowflake.com/s/article/JDBC-Driver-Compatibility-Issue-With-JDK-16-and-Later) + * A flag can be passed to the JVM to work around this: --add-opens=java.base/java.nio=ALL-UNNAMED + * + * The whiteRabbit script in the distribution passes this flag. + * + * The tests below verify that: + * - the flag does not cause problems when running with Java 8 (1.8) or 11 + * - without the flag, a failure occurs when running with Java 17 + * - passing the flag fixes the failure with Java 17 + * + * As the flag is in the distributed script, it needs to be edited out of the script. + * + * Note that we only test with the LTS versions of Java. This leaves Java 16 untested and unfixed. + * + * Once a fix is available in a newer version of the Snowflake JDBC jar, and it is used in WhiteRabbit, + * The test that now confirms the issue by expecting an Assertion error should start to fail. + * Then it is time to remove the flag (it is in the pom.xml for the whiterabbit module), and remove these tests, + * or normalize them to simply verify that all works well. + */ + String patchingFlag = "--add-opens=java.base/java.nio=ALL-UNNAMED"; + String javaOpts = String.format("JAVA_OPTS='%s'", patchingFlag); + + // verify that the flag as set in the whiteRabbit script does not have an adversary effect when running with Java 11 + // note that this flag is not supported by Java 8 (1.8) + runDistributionWithSnowflake("eclipse-temurin:11",javaOpts); + + // verify that the failure occurs when running with Java 17, without the flag + AssertionError ignoredError = Assertions.assertThrows(org.opentest4j.AssertionFailedError.class, () -> { + runDistributionWithSnowflake("eclipse-temurin:17",""); + }); + + // finally, verify that passing the flag fixes the failure when running wuth Java 17 + runDistributionWithSnowflake("eclipse-temurin:17",javaOpts); + } + + void runDistributionWithSnowflake(String javaImageName, String javaOpts) throws IOException, InterruptedException, URISyntaxException { + // test only run when there are settings available for Snowflake; otherwise it should be skipped + Assumptions.assumeTrue(new SnowflakeTestUtils.SnowflakeSystemPropertiesFileChecker(), "Snowflake system properties file not available"); + SnowflakeTestUtils.PropertyReader reader = new SnowflakeTestUtils.PropertyReader(); + try (GenericContainer testContainer = createPythonContainer()) { + prepareTestData(testContainer, reader); + testContainer.stop(); + + try (GenericContainer javaContainer = createJavaContainer(javaImageName)) { + javaContainer.start(); + Charset charset = StandardCharsets.UTF_8; + Path iniFile = tempDir.resolve("snowflake.ini"); + URL iniTemplate = VerifyDistributionIT.class.getClassLoader().getResource("scan_data/snowflake.ini.template"); + URL referenceScanReport = SourceDataScanSnowflakeIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-sql.xlsx"); + assert iniTemplate != null; + String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset); + content = content.replaceAll("%WORKING_FOLDER%", WORKDIR_IN_CONTAINER) + .replaceAll("%SNOWFLAKE_ACCOUNT%", reader.getOrFail("SNOWFLAKE_WR_TEST_ACCOUNT")) + .replaceAll("%SNOWFLAKE_USER%", reader.getOrFail("SNOWFLAKE_WR_TEST_USER")) + .replaceAll("%SNOWFLAKE_PASSWORD%", reader.getOrFail("SNOWFLAKE_WR_TEST_PASSWORD")) + .replaceAll("%SNOWFLAKE_WAREHOUSE%", reader.getOrFail("SNOWFLAKE_WR_TEST_WAREHOUSE")) + .replaceAll("%SNOWFLAKE_DATABASE%", reader.getOrFail("SNOWFLAKE_WR_TEST_DATABASE")) + .replaceAll("%SNOWFLAKE_SCHEMA%", reader.getOrFail("SNOWFLAKE_WR_TEST_SCHEMA")); + Files.write(iniFile, content.getBytes(charset)); + // verify that the distribution of whiterabbit has been generated and is available inside the container + ExecResult execResult = javaContainer.execInContainer("sh", "-c", String.format("ls %s", APPDIR_IN_CONTAINER)); + assertTrue(execResult.getStdout().contains("repo"), "WhiteRabbit distribution is not accessible inside container"); + + // run whiterabbit and verify the result + execResult = javaContainer.execInContainer("sh", "-c", + String.format("%s /app/bin/whiteRabbit -ini %s/snowflake.ini", javaOpts, WORKDIR_IN_CONTAINER)); + assertTrue(execResult.getStdout().contains("Started new scan of 2 tables...")); + assertTrue(execResult.getStdout().contains("Scanning table PERSON")); + assertTrue(execResult.getStdout().contains("Scanning table COST")); + assertTrue(execResult.getStdout().contains("Scan report generated: /whiterabbit/ScanReport.xlsx")); + + assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.SNOWFLAKE)); + } + } + } + + private void testWhiteRabbitInContainer(String imageName, String expectedVersion) throws IOException, InterruptedException, URISyntaxException { + try (GenericContainer javaContainer = createJavaContainer(imageName)) { + javaContainer.start(); + + Charset charset = StandardCharsets.UTF_8; + Path iniFile = tempDir.resolve("tsv.ini"); + URL iniTemplate = VerifyDistributionIT.class.getClassLoader().getResource("scan_data/tsv.ini.template"); + URL referenceScanReport = VerifyDistributionIT.class.getClassLoader().getResource("scan_data/ScanReport-reference-v0.10.7-csv.xlsx"); + Path personCsv = Paths.get(VerifyDistributionIT.class.getClassLoader().getResource("scan_data/person-header.csv").toURI()); + Path costCsv = Paths.get(VerifyDistributionIT.class.getClassLoader().getResource("scan_data/cost-header.csv").toURI()); + assertNotNull(iniTemplate); + String content = new String(Files.readAllBytes(Paths.get(iniTemplate.toURI())), charset); + content = content.replaceAll("%WORKING_FOLDER%", WORKDIR_IN_CONTAINER); + Files.write(iniFile, content.getBytes(charset)); + Files.copy(personCsv, tempDir.resolve("person.csv"), StandardCopyOption.REPLACE_EXISTING); + Files.copy(costCsv, tempDir.resolve("cost.csv"), StandardCopyOption.REPLACE_EXISTING); + + // verify that the default java version in the container is actually 1.8 + ExecResult execResult = javaContainer.execInContainer("sh", "-c", "java -version"); + assertTrue(execResult.getStderr().startsWith(expectedVersion), "default java version in container should match version " + expectedVersion); + + // verify that the distribution of whiterabbit has been generated and is available inside the container + execResult = javaContainer.execInContainer("sh", "-c", String.format("ls %s", APPDIR_IN_CONTAINER)); + assertTrue(execResult.getStdout().contains("repo"), "WhiteRabbit distribution is not accessible inside container"); + + // run whiterabbit and verify the result + execResult = javaContainer.execInContainer("sh", "-c", String.format("/app/bin/whiteRabbit -ini %s/tsv.ini", WORKDIR_IN_CONTAINER)); + assertTrue(execResult.getStdout().contains("Started new scan of 2 tables...")); + assertTrue(execResult.getStdout().contains("Scanning table /whiterabbit/person.csv")); + assertTrue(execResult.getStdout().contains("Scanning table /whiterabbit/cost.csv")); + assertTrue(execResult.getStdout().contains("Scan report generated: /whiterabbit/ScanReport.xlsx")); + + assertTrue(ScanTestUtils.scanResultsSheetMatchesReference(tempDir.resolve("ScanReport.xlsx"), Paths.get(referenceScanReport.toURI()), DbType.DELIMITED_TEXT_FILES)); + + javaContainer.stop(); + } + } + + private GenericContainer createJavaContainer(String imageName) { + return new GenericContainer<>( + DockerImageName.parse(imageName)) + .withCommand("sh", "-c", "tail -f /dev/null") + .withFileSystemBind(Paths.get("../dist").toAbsolutePath().toString(), APPDIR_IN_CONTAINER) + .withFileSystemBind(tempDir.toString(), WORKDIR_IN_CONTAINER, BindMode.READ_WRITE); + } +} diff --git a/whiterabbit/src/test/resources/scan_data/README.md b/whiterabbit/src/test/resources/scan_data/README.md new file mode 100644 index 00000000..19c7fe13 --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/README.md @@ -0,0 +1,6 @@ +The ScanReport-reference-v0.10.7-{csv,sql}.xlsx files in this directory were generated using the last version +of WhiteRabbit that did not have any unit or integration tests, and serve as the reference for smoke/regression +tests. + +Not that the order in which files/tables are generated into these xlsx files was (is) not entirely predictable, +so some sorting is done in the tests to match the version under test. \ No newline at end of file diff --git a/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-csv.xlsx b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-csv.xlsx new file mode 100644 index 00000000..b8d0d441 Binary files /dev/null and b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-csv.xlsx differ diff --git a/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-sql.xlsx b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-sql.xlsx new file mode 100644 index 00000000..ea091dfe Binary files /dev/null and b/whiterabbit/src/test/resources/scan_data/ScanReport-reference-v0.10.7-sql.xlsx differ diff --git a/whiterabbit/src/test/resources/scan_data/cost-header.csv b/whiterabbit/src/test/resources/scan_data/cost-header.csv new file mode 100644 index 00000000..f6825044 --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/cost-header.csv @@ -0,0 +1,35 @@ +cost_id,cost_event_id,cost_domain_id,cost_type_concept_id,currency_concept_id,total_charge,total_cost,total_paid,paid_by_payer,paid_by_patient,paid_patient_copay,paid_patient_coinsurance,paid_patient_deductible,paid_by_primary,paid_ingredient_cost,paid_dispensing_fee,payer_plan_period_id,amount_allowed,revenue_code_concept_id,reveue_code_source_value,drg_concept_id,drg_source_value +10791,1,Drug,0,44818668,,,180,,0,,0,,,,,,,0,,0, +10792,2,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0, +10793,3,Drug,0,44818668,,,60,,0,,0,,,,,,,0,,0, +10794,4,Drug,0,44818668,,,130,,40,,40,,,,,,,0,,0, +10795,6,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10796,8,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10797,10,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0, +10798,11,Drug,0,44818668,,,40,,10,,10,,,,,,,0,,0, +10799,12,Drug,0,44818668,,,110,,40,,40,,,,,,,0,,0, +10800,14,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10801,18,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10802,19,Drug,0,44818668,,,10,,0,,0,,,,,,,0,,0, +10803,21,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10804,25,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10805,27,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10806,28,Drug,0,44818668,,,0,,10,,10,,,,,,,0,,0, +10807,29,Drug,0,44818668,,,30,,10,,10,,,,,,,0,,0, +10808,31,Drug,0,44818668,,,350,,0,,0,,,,,,,0,,0, +10809,33,Drug,0,44818668,,,10,,10,,10,,,,,,,0,,0, +10810,35,Drug,0,44818668,,,570,,80,,80,,,,,,,0,,0, +10811,37,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10812,38,Drug,0,44818668,,,150,,0,,0,,,,,,,0,,0, +10813,41,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10814,42,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10815,45,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0, +10816,51,Drug,0,44818668,,,80,,0,,0,,,,,,,0,,0, +10817,52,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0, +10818,53,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0, +10819,55,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10820,56,Drug,0,44818668,,,70,,170,,170,,,,,,,0,,0, +10821,58,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0, +10822,61,Drug,0,44818668,,,160,,0,,0,,,,,,,0,,0, +10823,62,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10824,63,Drug,0,44818668,,,350,,10,,10,,,,,,,0,,0, diff --git a/whiterabbit/src/test/resources/scan_data/cost-no-header.csv b/whiterabbit/src/test/resources/scan_data/cost-no-header.csv new file mode 100644 index 00000000..fa8fa46a --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/cost-no-header.csv @@ -0,0 +1,34 @@ +10791,1,Drug,0,44818668,,,180,,0,,0,,,,,,,0,,0, +10792,2,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0, +10793,3,Drug,0,44818668,,,60,,0,,0,,,,,,,0,,0, +10794,4,Drug,0,44818668,,,130,,40,,40,,,,,,,0,,0, +10795,6,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10796,8,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10797,10,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0, +10798,11,Drug,0,44818668,,,40,,10,,10,,,,,,,0,,0, +10799,12,Drug,0,44818668,,,110,,40,,40,,,,,,,0,,0, +10800,14,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10801,18,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10802,19,Drug,0,44818668,,,10,,0,,0,,,,,,,0,,0, +10803,21,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10804,25,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10805,27,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10806,28,Drug,0,44818668,,,0,,10,,10,,,,,,,0,,0, +10807,29,Drug,0,44818668,,,30,,10,,10,,,,,,,0,,0, +10808,31,Drug,0,44818668,,,350,,0,,0,,,,,,,0,,0, +10809,33,Drug,0,44818668,,,10,,10,,10,,,,,,,0,,0, +10810,35,Drug,0,44818668,,,570,,80,,80,,,,,,,0,,0, +10811,37,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10812,38,Drug,0,44818668,,,150,,0,,0,,,,,,,0,,0, +10813,41,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10814,42,Drug,0,44818668,,,20,,0,,0,,,,,,,0,,0, +10815,45,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0, +10816,51,Drug,0,44818668,,,80,,0,,0,,,,,,,0,,0, +10817,52,Drug,0,44818668,,,120,,0,,0,,,,,,,0,,0, +10818,53,Drug,0,44818668,,,70,,70,,70,,,,,,,0,,0, +10819,55,Drug,0,44818668,,,0,,0,,0,,,,,,,0,,0, +10820,56,Drug,0,44818668,,,70,,170,,170,,,,,,,0,,0, +10821,58,Drug,0,44818668,,,70,,0,,0,,,,,,,0,,0, +10822,61,Drug,0,44818668,,,160,,0,,0,,,,,,,0,,0, +10823,62,Drug,0,44818668,,,30,,0,,0,,,,,,,0,,0, +10824,63,Drug,0,44818668,,,350,,10,,10,,,,,,,0,,0, diff --git a/whiterabbit/src/test/resources/scan_data/cost.csv b/whiterabbit/src/test/resources/scan_data/cost.csv deleted file mode 100644 index 7904c62e..00000000 --- a/whiterabbit/src/test/resources/scan_data/cost.csv +++ /dev/null @@ -1,34 +0,0 @@ -10791 1 Drug 0 44818668 180 0 0 0 0 -10792 2 Drug 0 44818668 70 70 70 0 0 -10793 3 Drug 0 44818668 60 0 0 0 0 -10794 4 Drug 0 44818668 130 40 40 0 0 -10795 6 Drug 0 44818668 30 0 0 0 0 -10796 8 Drug 0 44818668 20 0 0 0 0 -10797 10 Drug 0 44818668 120 0 0 0 0 -10798 11 Drug 0 44818668 40 10 10 0 0 -10799 12 Drug 0 44818668 110 40 40 0 0 -10800 14 Drug 0 44818668 30 0 0 0 0 -10801 18 Drug 0 44818668 0 0 0 0 0 -10802 19 Drug 0 44818668 10 0 0 0 0 -10803 21 Drug 0 44818668 30 0 0 0 0 -10804 25 Drug 0 44818668 20 0 0 0 0 -10805 27 Drug 0 44818668 20 0 0 0 0 -10806 28 Drug 0 44818668 0 10 10 0 0 -10807 29 Drug 0 44818668 30 10 10 0 0 -10808 31 Drug 0 44818668 350 0 0 0 0 -10809 33 Drug 0 44818668 10 10 10 0 0 -10810 35 Drug 0 44818668 570 80 80 0 0 -10811 37 Drug 0 44818668 0 0 0 0 0 -10812 38 Drug 0 44818668 150 0 0 0 0 -10813 41 Drug 0 44818668 0 0 0 0 0 -10814 42 Drug 0 44818668 20 0 0 0 0 -10815 45 Drug 0 44818668 70 0 0 0 0 -10816 51 Drug 0 44818668 80 0 0 0 0 -10817 52 Drug 0 44818668 120 0 0 0 0 -10818 53 Drug 0 44818668 70 70 70 0 0 -10819 55 Drug 0 44818668 0 0 0 0 0 -10820 56 Drug 0 44818668 70 170 170 0 0 -10821 58 Drug 0 44818668 70 0 0 0 0 -10822 61 Drug 0 44818668 160 0 0 0 0 -10823 62 Drug 0 44818668 30 0 0 0 0 -10824 63 Drug 0 44818668 350 10 10 0 0 diff --git a/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql b/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql index 23cd38f3..f7c7d66e 100644 --- a/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql +++ b/whiterabbit/src/test/resources/scan_data/create_data_postgresql.sql @@ -51,5 +51,5 @@ CREATE TABLE cost ; -COPY COST FROM '/scan_data/cost.csv' DELIMITER E'\t' CSV ENCODING 'UTF8'; -COPY PERSON FROM '/scan_data/person.csv' DELIMITER E'\t' CSV ENCODING 'UTF8'; +COPY COST FROM '/scan_data/cost-no-header.csv' DELIMITER ',' CSV ENCODING 'UTF8'; +COPY PERSON FROM '/scan_data/person-no-header.csv' DELIMITER ',' CSV ENCODING 'UTF8'; diff --git a/whiterabbit/src/test/resources/scan_data/create_data_snowflake.sql b/whiterabbit/src/test/resources/scan_data/create_data_snowflake.sql new file mode 100644 index 00000000..3b53cf1e --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/create_data_snowflake.sql @@ -0,0 +1,32 @@ +// +// To be able to use the configured snowflake test environment, make sure that the role and grant +// statements below have been exectuded, using the correct snowflake username for <> +// +//create role if not exists testrole; +//grant usage on database test to role testrole; +//grant usage on schema test.wr_test to role testrole; +//grant ALL PRIVILEGES on schema test.wr_test to role testrole; +//grant role testrole to user <>; + +//use schema test.wr_test; + +DROP TABLE IF EXISTS wr_test.person; +DROP TABLE IF EXISTS wr_test.cost; + +CREATE TABLE wr_test.cost (cost_id BIGINT, cost_event_id BIGINT, cost_domain_id STRING, cost_type_concept_id BIGINT, currency_concept_id BIGINT, total_charge NUMERIC, total_cost NUMERIC, total_paid NUMERIC, paid_by_payer NUMERIC, paid_by_patient NUMERIC, paid_patient_copay NUMERIC, paid_patient_coinsurance NUMERIC, paid_patient_deductible NUMERIC, paid_by_primary NUMERIC, paid_ingredient_cost NUMERIC, paid_dispensing_fee NUMERIC, payer_plan_period_id BIGINT, amount_allowed NUMERIC, revenue_code_concept_id BIGINT, reveue_code_source_value STRING, drg_concept_id BIGINT, drg_source_value STRING); + +CREATE TABLE wr_test.person (person_id BIGINT, gender_concept_id BIGINT, year_of_birth BIGINT, month_of_birth BIGINT, day_of_birth BIGINT, birth_datetime TIMESTAMP, race_concept_id BIGINT, ethnicity_concept_id BIGINT, location_id BIGINT, provider_id BIGINT, care_site_id BIGINT, person_source_value STRING, gender_source_value STRING, gender_source_concept_id BIGINT, race_source_value STRING, race_source_concept_id BIGINT, ethnicity_source_value STRING, ethnicity_source_concept_id BIGINT); + +REMOVE @~ pattern=".*csv.gz"; + +put file:///scan_data/cost-no-header.csv @~; + +put file:///scan_data/person-no-header.csv @~; + +CREATE OR REPLACE FILE FORMAT my_csv_format TYPE = 'csv' FIELD_DELIMITER = ','; + +COPY INTO cost from @~/cost-no-header.csv.gz FILE_FORMAT = (FORMAT_NAME = 'my_csv_format'); + +COPY INTO person from @~/person-no-header.csv.gz FILE_FORMAT = (FORMAT_NAME = 'my_csv_format'); + +REMOVE @~ pattern=".*csv.gz"; \ No newline at end of file diff --git a/whiterabbit/src/test/resources/scan_data/person-header.csv b/whiterabbit/src/test/resources/scan_data/person-header.csv new file mode 100644 index 00000000..2661396a --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/person-header.csv @@ -0,0 +1,31 @@ +person_id,gender_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_datetime,race_concept_id,ethnicity_concept_id,location_id,provider_id,care_site_id,person_source_value,gender_source_value,gender_source_concept_id,race_source_value,race_source_concept_id,ethnicity_source_value,ethnicity_source_concept_id +1,8507,1923,5,1,,8527,38003564,1,,,00013D2EFD8E45D1,1,,1,,1, +2,8507,1943,1,1,,8527,38003564,2,,,00016F745862898F,1,,1,,1, +3,8532,1936,9,1,,8527,38003564,3,,,0001FDD721E223DC,2,,1,,1, +4,8507,1941,6,1,,0,38003563,4,,,00021CA6FF03E670,1,,5,,5, +5,8507,1936,8,1,,8527,38003564,5,,,00024B3D2352D2D0,1,,1,,1, +6,8507,1943,10,1,,8516,38003564,6,,,0002DAE1C81CC70D,1,,2,,2, +7,8507,1922,7,1,,8527,38003564,7,,,0002F28CE057345B,1,,1,,1, +8,8507,1935,9,1,,8527,38003564,8,,,000308435E3E5B76,1,,1,,1, +9,8532,1976,9,1,,8527,38003564,9,,,000345A39D4157C9,2,,1,,1, +10,8532,1938,10,1,,8516,38003564,10,,,00036A21B65B0206,2,,2,,2, +11,8532,1934,2,1,,8527,38003564,11,,,000489E7EAAD463F,2,,1,,1, +12,8507,1929,6,1,,8527,38003564,12,,,00048EF1F4791C68,1,,1,,1, +13,8532,1936,7,1,,8527,38003564,13,,,0004F0ABD505251D,2,,1,,1, +14,8507,1934,5,1,,8527,38003564,14,,,00052705243EA128,1,,1,,1, +15,8532,1936,3,1,,8527,38003564,15,,,00070B63745BE497,2,,1,,1, +16,8507,1934,1,1,,8527,38003564,16,,,0007E57CC13CE880,1,,1,,1, +17,8532,1919,9,1,,8516,38003564,17,,,0007F12A492FD25D,2,,2,,2, +18,8532,1919,10,1,,8516,38003564,18,,,000A005BA0BED3EA,2,,2,,2, +19,8532,1942,7,1,,8527,38003564,19,,,000B4662348C35B4,2,,1,,1, +20,8507,1938,4,1,,8527,38003564,20,,,000B97BA2314E971,1,,1,,1, +21,8507,1932,8,1,,8516,38003564,21,,,000C7486B11E7030,1,,2,,2, +23,8507,1932,7,1,,8527,38003564,23,,,000DDD364C46E2C6,1,,1,,1, +25,8507,1965,4,1,,8527,38003564,25,,,00108066CA1FACCE,1,,1,,1, +26,8532,1939,12,1,,8527,38003564,26,,,0010D6F80D245D62,2,,1,,1, +27,8532,1940,4,1,,8527,38003564,27,,,0011714C14B52EEB,2,,1,,1, +28,8507,1937,10,1,,8527,38003564,28,,,0011CB1FE23E91AF,1,,1,,1, +29,8507,1938,4,1,,8527,38003564,29,,,0012AFEEC379A69D,1,,1,,1, +30,8532,1959,11,1,,8527,38003564,30,,,00131C35661B2926,2,,1,,1, +31,8532,1922,10,1,,8527,38003564,31,,,00139C345A104F72,2,,1,,1, +32,8532,1953,12,1,,8527,38003564,32,,,0013E139F1F37264,2,,1,,1, diff --git a/whiterabbit/src/test/resources/scan_data/person-no-header.csv b/whiterabbit/src/test/resources/scan_data/person-no-header.csv new file mode 100644 index 00000000..63d4629b --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/person-no-header.csv @@ -0,0 +1,30 @@ +1,8507,1923,5,1,,8527,38003564,1,,,00013D2EFD8E45D1,1,,1,,1, +2,8507,1943,1,1,,8527,38003564,2,,,00016F745862898F,1,,1,,1, +3,8532,1936,9,1,,8527,38003564,3,,,0001FDD721E223DC,2,,1,,1, +4,8507,1941,6,1,,0,38003563,4,,,00021CA6FF03E670,1,,5,,5, +5,8507,1936,8,1,,8527,38003564,5,,,00024B3D2352D2D0,1,,1,,1, +6,8507,1943,10,1,,8516,38003564,6,,,0002DAE1C81CC70D,1,,2,,2, +7,8507,1922,7,1,,8527,38003564,7,,,0002F28CE057345B,1,,1,,1, +8,8507,1935,9,1,,8527,38003564,8,,,000308435E3E5B76,1,,1,,1, +9,8532,1976,9,1,,8527,38003564,9,,,000345A39D4157C9,2,,1,,1, +10,8532,1938,10,1,,8516,38003564,10,,,00036A21B65B0206,2,,2,,2, +11,8532,1934,2,1,,8527,38003564,11,,,000489E7EAAD463F,2,,1,,1, +12,8507,1929,6,1,,8527,38003564,12,,,00048EF1F4791C68,1,,1,,1, +13,8532,1936,7,1,,8527,38003564,13,,,0004F0ABD505251D,2,,1,,1, +14,8507,1934,5,1,,8527,38003564,14,,,00052705243EA128,1,,1,,1, +15,8532,1936,3,1,,8527,38003564,15,,,00070B63745BE497,2,,1,,1, +16,8507,1934,1,1,,8527,38003564,16,,,0007E57CC13CE880,1,,1,,1, +17,8532,1919,9,1,,8516,38003564,17,,,0007F12A492FD25D,2,,2,,2, +18,8532,1919,10,1,,8516,38003564,18,,,000A005BA0BED3EA,2,,2,,2, +19,8532,1942,7,1,,8527,38003564,19,,,000B4662348C35B4,2,,1,,1, +20,8507,1938,4,1,,8527,38003564,20,,,000B97BA2314E971,1,,1,,1, +21,8507,1932,8,1,,8516,38003564,21,,,000C7486B11E7030,1,,2,,2, +23,8507,1932,7,1,,8527,38003564,23,,,000DDD364C46E2C6,1,,1,,1, +25,8507,1965,4,1,,8527,38003564,25,,,00108066CA1FACCE,1,,1,,1, +26,8532,1939,12,1,,8527,38003564,26,,,0010D6F80D245D62,2,,1,,1, +27,8532,1940,4,1,,8527,38003564,27,,,0011714C14B52EEB,2,,1,,1, +28,8507,1937,10,1,,8527,38003564,28,,,0011CB1FE23E91AF,1,,1,,1, +29,8507,1938,4,1,,8527,38003564,29,,,0012AFEEC379A69D,1,,1,,1, +30,8532,1959,11,1,,8527,38003564,30,,,00131C35661B2926,2,,1,,1, +31,8532,1922,10,1,,8527,38003564,31,,,00139C345A104F72,2,,1,,1, +32,8532,1953,12,1,,8527,38003564,32,,,0013E139F1F37264,2,,1,,1, diff --git a/whiterabbit/src/test/resources/scan_data/person.csv b/whiterabbit/src/test/resources/scan_data/person.csv deleted file mode 100644 index e10b61b0..00000000 --- a/whiterabbit/src/test/resources/scan_data/person.csv +++ /dev/null @@ -1,30 +0,0 @@ -1 8507 1923 5 1 8527 38003564 1 00013D2EFD8E45D1 1 1 1 -2 8507 1943 1 1 8527 38003564 2 00016F745862898F 1 1 1 -3 8532 1936 9 1 8527 38003564 3 0001FDD721E223DC 2 1 1 -4 8507 1941 6 1 0 38003563 4 00021CA6FF03E670 1 5 5 -5 8507 1936 8 1 8527 38003564 5 00024B3D2352D2D0 1 1 1 -6 8507 1943 10 1 8516 38003564 6 0002DAE1C81CC70D 1 2 2 -7 8507 1922 7 1 8527 38003564 7 0002F28CE057345B 1 1 1 -8 8507 1935 9 1 8527 38003564 8 000308435E3E5B76 1 1 1 -9 8532 1976 9 1 8527 38003564 9 000345A39D4157C9 2 1 1 -10 8532 1938 10 1 8516 38003564 10 00036A21B65B0206 2 2 2 -11 8532 1934 2 1 8527 38003564 11 000489E7EAAD463F 2 1 1 -12 8507 1929 6 1 8527 38003564 12 00048EF1F4791C68 1 1 1 -13 8532 1936 7 1 8527 38003564 13 0004F0ABD505251D 2 1 1 -14 8507 1934 5 1 8527 38003564 14 00052705243EA128 1 1 1 -15 8532 1936 3 1 8527 38003564 15 00070B63745BE497 2 1 1 -16 8507 1934 1 1 8527 38003564 16 0007E57CC13CE880 1 1 1 -17 8532 1919 9 1 8516 38003564 17 0007F12A492FD25D 2 2 2 -18 8532 1919 10 1 8516 38003564 18 000A005BA0BED3EA 2 2 2 -19 8532 1942 7 1 8527 38003564 19 000B4662348C35B4 2 1 1 -20 8507 1938 4 1 8527 38003564 20 000B97BA2314E971 1 1 1 -21 8507 1932 8 1 8516 38003564 21 000C7486B11E7030 1 2 2 -23 8507 1932 7 1 8527 38003564 23 000DDD364C46E2C6 1 1 1 -25 8507 1965 4 1 8527 38003564 25 00108066CA1FACCE 1 1 1 -26 8532 1939 12 1 8527 38003564 26 0010D6F80D245D62 2 1 1 -27 8532 1940 4 1 8527 38003564 27 0011714C14B52EEB 2 1 1 -28 8507 1937 10 1 8527 38003564 28 0011CB1FE23E91AF 1 1 1 -29 8507 1938 4 1 8527 38003564 29 0012AFEEC379A69D 1 1 1 -30 8532 1959 11 1 8527 38003564 30 00131C35661B2926 2 1 1 -31 8532 1922 10 1 8527 38003564 31 00139C345A104F72 2 1 1 -32 8532 1953 12 1 8527 38003564 32 0013E139F1F37264 2 1 1 diff --git a/whiterabbit/src/test/resources/scan_data/snowflake.ini.template b/whiterabbit/src/test/resources/scan_data/snowflake.ini.template new file mode 100644 index 00000000..ab12cd68 --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/snowflake.ini.template @@ -0,0 +1,16 @@ +# Usage: dist/bin/whiteRabbit -ini +WORKING_FOLDER = %WORKING_FOLDER% # Path to the folder where all output will be written +DATA_TYPE = Snowflake # "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "Redshift", "BigQuery", "Azure", "Teradata", "SAS7bdat" +SNOWFLAKE_ACCOUNT = %SNOWFLAKE_ACCOUNT% # Name or address of the server. For Postgres, add the database name +SNOWFLAKE_USER = %SNOWFLAKE_USER% # User name for the database +SNOWFLAKE_PASSWORD = %SNOWFLAKE_PASSWORD% # Password for the database +SNOWFLAKE_WAREHOUSE = %SNOWFLAKE_WAREHOUSE% # Name of the data schema used +SNOWFLAKE_DATABASE = %SNOWFLAKE_DATABASE% +SNOWFLAKE_SCHEMA = %SNOWFLAKE_SCHEMA% +TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database +SCAN_FIELD_VALUES = yes # Include the frequency of field values in the scan report? "yes" or "no" +MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report +MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported +ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values +CALCULATE_NUMERIC_STATS = no # Include average, standard deviation and quartiles in the scan report? "yes" or "no" +NUMERIC_STATS_SAMPLER_SIZE = 500 # Maximum number of rows used to calculate numeric statistics diff --git a/whiterabbit/src/test/resources/scan_data/tsv.ini.template b/whiterabbit/src/test/resources/scan_data/tsv.ini.template new file mode 100644 index 00000000..2e287355 --- /dev/null +++ b/whiterabbit/src/test/resources/scan_data/tsv.ini.template @@ -0,0 +1,14 @@ +WORKING_FOLDER = %WORKING_FOLDER% # Path to the folder where all output will be written +DATA_TYPE = Delimited text files # "Delimited text files", "MySQL", "Oracle", "SQL Server", "PostgreSQL", "MS Access", "Redshift", "BigQuery", "Azure", "Teradata", "SAS7bdat" +SERVER_LOCATION = 127.0.0.1/data_base_name # Name or address of the server. For Postgres, add the database name +USER_NAME = joe # User name for the database +PASSWORD = supersecret # Password for the database +DATABASE_NAME = schema_name # Name of the data schema used +DELIMITER = , # The delimiter that separates values +TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database +SCAN_FIELD_VALUES = yes # Include the frequency of field values in the scan report? "yes" or "no" +MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report +MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported +ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values +CALCULATE_NUMERIC_STATS = no # Include average, standard deviation and quartiles in the scan report? "yes" or "no" +NUMERIC_STATS_SAMPLER_SIZE = 500 # Maximum number of rows used to calculate numeric statistics