Skip to content

Commit

Permalink
Merge pull request #235 from OHDSI/release-0.10.1
Browse files Browse the repository at this point in the history
Release 0.10.1
  • Loading branch information
Maxim Moinat authored Jun 10, 2020
2 parents 26e8ab8 + 37fdf22 commit f09d32f
Show file tree
Hide file tree
Showing 25 changed files with 1,294 additions and 1,743 deletions.
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ <h1>Features</h1>
</div>
<div id="current-version" class="section level1">
<h1>Current version</h1>
<p><a href="https://github.com/OHDSI/WhiteRabbit/releases/tag/v0.9.0"><strong>v0.9.0</strong></a></p>
<p><a href="https://github.com/OHDSI/WhiteRabbit/releases/latest"><strong>v0.10.1</strong></a></p>
</div>


Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ It comes with **RabbitInAHat**, an application for interactive design of an ETL
- Rabbit in a Hat generates ETL specification document according to OMOP templatement according to OMOP template

# Current version
[**v0.9.0**](https://github.com/OHDSI/WhiteRabbit/releases/tag/v0.9.0)
[**v0.10.1**](https://github.com/OHDSI/WhiteRabbit/releases/latest)
Binary file modified examples.zip
Binary file not shown.
2 changes: 1 addition & 1 deletion iniFileExamples/WhiteRabbit.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ PASSWORD = supersecret # Password for the database
DATABASE_NAME = schema_name # Name of the data schema used
DELIMITER = , # The delimiter that separates values
TABLES_TO_SCAN = * # Comma-delimited list of table names to scan. Use "*" (asterix) to include all tables in the database
SCAN_FIELD_VALUES = yes # Include a frequency count of field values in the scan report? "yes" or "no"
SCAN_FIELD_VALUES = yes # Include the frequency of field values in the scan report? "yes" or "no"
MIN_CELL_COUNT = 5 # Minimum frequency for a field value to be included in the report
MAX_DISTINCT_VALUES = 1000 # Maximum number of distinct values per field to be reported
ROWS_PER_TABLE = 100000 # Maximum number of rows per table to be scanned for field values
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<groupId>org.ohdsi</groupId>
<artifactId>leporidae</artifactId>
<packaging>pom</packaging>
<version>0.10.0</version>
<version>0.10.1</version>
<modules>
<module>rabbitinahat</module>
<module>whiterabbit</module>
Expand Down
2 changes: 1 addition & 1 deletion rabbit-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>leporidae</artifactId>
<groupId>org.ohdsi</groupId>
<version>0.10.0</version>
<version>0.10.1</version>
</parent>
<modelVersion>4.0.0</modelVersion>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ public void setTables(List<Table> tables) {
this.tables = tables;
}

public void addTable(Table table) {
this.tables.add(table);
}

public String getDbName() {
return dbName;
}
Expand Down Expand Up @@ -138,11 +142,14 @@ public static Database generateModelFromScanReport(String filename) {
Database database = new Database();
QuickAndDirtyXlsxReader workbook = new QuickAndDirtyXlsxReader(filename);

// Create table lookup from tables overview, if exists
// Create table lookup from tables overview, if it exists
Map<String, Table> nameToTable = createTablesFromTableOverview(workbook, database);

// Field overview is the first sheet
Sheet overviewSheet = workbook.get(0);
Sheet overviewSheet = workbook.getByName(ScanSheetName.FIELD_OVERVIEW);
if (overviewSheet == null) {
overviewSheet = workbook.get(0);
}
Iterator<QuickAndDirtyXlsxReader.Row> overviewRows = overviewSheet.iterator();

overviewRows.next(); // Skip header
Expand All @@ -168,11 +175,12 @@ public static Database generateModelFromScanReport(String filename) {
String fieldName = row.getStringByHeaderName(ScanFieldName.FIELD);
Field field = new Field(fieldName.toLowerCase(), table);

String fractionEmpty = row.getByHeaderName(ScanFieldName.FRACTION_EMPTY);
field.setNullable(fractionEmpty == null || !fractionEmpty.equals("0"));
field.setType(row.getByHeaderName(ScanFieldName.TYPE));
field.setMaxLength(row.getIntByHeaderName(ScanFieldName.MAX_LENGTH));
field.setDescription(row.getStringByHeaderName(ScanFieldName.DESCRIPTION));
field.setFractionEmpty(row.getDoubleByHeaderName(ScanFieldName.FRACTION_EMPTY));
field.setUniqueCount(row.getIntByHeaderName(ScanFieldName.UNIQUE_COUNT));
field.setFractionUnique(row.getDoubleByHeaderName(ScanFieldName.FRACTION_UNIQUE));
field.setValueCounts(getValueCounts(workbook, tableName, fieldName));

table.getFields().add(field);
Expand All @@ -186,18 +194,13 @@ public static Table createTable(String name, String description, Integer nRows,
Table table = new Table();
table.setName(name.toLowerCase());
table.setDescription(description);
table.setRowCount((nRows == null || nRows == -1) ? nRowsChecked : nRows);
table.setRowCount(nRows == null ? -1 : nRows);
table.setRowsCheckedCount(nRowsChecked == null ? -1 : nRowsChecked);
return table;
}

public static Map<String, Table> createTablesFromTableOverview(QuickAndDirtyXlsxReader workbook, Database database) {
Sheet tableOverviewSheet = null;
for (Sheet sheet : workbook) {
if (sheet.getName().equals(ScanSheetName.TABLE_OVERVIEW)) {
tableOverviewSheet = sheet;
break;
}
}
Sheet tableOverviewSheet = workbook.getByName(ScanSheetName.TABLE_OVERVIEW);

if (tableOverviewSheet == null) { // No table overview sheet, empty nameToTable
return new HashMap<>();
Expand All @@ -224,38 +227,46 @@ public static Map<String, Table> createTablesFromTableOverview(QuickAndDirtyXlsx
return nameToTable;
}

private static String[][] getValueCounts(QuickAndDirtyXlsxReader workbook, String tableName, String fieldName) {
Sheet tableSheet = null;
private static ValueCounts getValueCounts(QuickAndDirtyXlsxReader workbook, String tableName, String fieldName) {
String targetSheetName = Table.createSheetNameFromTableName(tableName);
for (Sheet sheet : workbook) {
if (sheet.getName().equals(targetSheetName)) {
tableSheet = sheet;
break;
}
Sheet tableSheet = workbook.getByName(targetSheetName);

// Sheet not found for table, return empty
if (tableSheet == null) {
return new ValueCounts();
}
if (tableSheet == null) // Sheet not found for table, return empty array
return new String[0][0];

Iterator<org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row> iterator = tableSheet.iterator();
org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row header = iterator.next();
int index = header.indexOf(fieldName);
List<String[]> list = new ArrayList<String[]>();

ValueCounts valueCounts = new ValueCounts();
if (index != -1) // Could happen when people manually delete columns
while (iterator.hasNext()) {
org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row row = iterator.next();
if (row.size() > index) {
String value = row.get(index);
String count;
if (row.size() > index + 1)

if (row.size() > index + 1) {
count = row.get(index + 1);
else
} else {
count = "";
if (value.equals("") && count.equals(""))
}

if (value.equals("") && count.equals("")) {
break;
list.add(new String[] { value, count });
}

// If the count is not a number, ignore this row
try {
valueCounts.add(value, (int) Double.parseDouble(count));
} catch (NumberFormatException e) {
// Skip if count could not be parsed. In most cases this is for empty count at 'List Truncated...'
}
}
}
return list.toArray(new String[list.size()][2]);
return valueCounts;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@ public class Field implements MappableItem {
private Table table;
private String name;
private String comment = "";
private String[][] valueCounts;
private ValueCounts valueCounts = new ValueCounts();
private boolean isNullable;
private String type;
private String description = "";
private Integer maxLength;
private boolean isStem;
private List<ConceptsMap.Concept> conceptIdHints;
private Double fractionEmpty;
private Integer uniqueCount;
private Double fractionUnique;

public Field(String name, Table table) {
this.table = table;
Expand Down Expand Up @@ -66,11 +69,11 @@ public void setName(String name) {
this.name = name;
}

public String[][] getValueCounts() {
public ValueCounts getValueCounts() {
return valueCounts;
}

public void setValueCounts(String[][] valueCounts) {
public void setValueCounts(ValueCounts valueCounts) {
this.valueCounts = valueCounts;
}

Expand Down Expand Up @@ -137,4 +140,29 @@ public List<ConceptsMap.Concept> getConceptIdHints() {
public void setConceptIdHints(List<ConceptsMap.Concept> conceptIdHints) {
this.conceptIdHints = conceptIdHints;
}

public Double getFractionEmpty() {
return fractionEmpty;
}

public void setFractionEmpty(Double fractionEmpty) {
this.fractionEmpty = fractionEmpty;
this.setNullable(fractionEmpty == null || fractionEmpty != 0);
}

public Integer getUniqueCount() {
return uniqueCount;
}

public void setUniqueCount(Integer uniqueCount) {
this.uniqueCount = uniqueCount;
}

public Double getFractionUnique() {
return fractionUnique;
}

public void setFractionUnique(Double fractionUnique) {
this.fractionUnique = fractionUnique;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ public void setRowsCheckedCount(int rowsCheckedCount) {
this.rowsCheckedCount = rowsCheckedCount;
}

public void addField(Field field) {
this.fields.add(field);
}

public List<Field> getFields() {
return fields;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*******************************************************************************
* Copyright 2020 Observational Health Data Sciences and Informatics
*
* This file is part of WhiteRabbit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package org.ohdsi.rabbitInAHat.dataModel;

import java.util.ArrayList;

public class ValueCounts {
private ArrayList<ValueCounts.ValueCount> valueCounts = new ArrayList<>();
private int totalFrequency = 0;
private String mostFrequentValue;
private int mostFrequentValueCount = -1;

public class ValueCount {
private String value;
private int frequency;

public ValueCount(String value, int frequency) {
this.value = value;
this.frequency = frequency;
}

public String getValue() {
return value;
}

public void setValue(String value) {
this.value = value;
}

public int getFrequency() {
return frequency;
}
}

public boolean add(String value, int count) {
totalFrequency += count;
if (count > mostFrequentValueCount) {
mostFrequentValue = value;
mostFrequentValueCount = count;
}
return valueCounts.add(new ValueCount(value, count));
}

public ArrayList<ValueCounts.ValueCount> getAll() {
return valueCounts;
}

public ValueCounts.ValueCount get(int i) {
return valueCounts.get(i);
}

public String getMostFrequentValue() {
return mostFrequentValue;
}

public int getTotalFrequency() {
return totalFrequency;
}

public int size() {
return valueCounts.size();
}

public boolean isEmpty() {
return size() == 0;
}

}
Loading

0 comments on commit f09d32f

Please sign in to comment.