Skip to content

Commit

Permalink
Performance improvements for GIC
Browse files Browse the repository at this point in the history
- Made QueryProcessor filter columns that DNE
- Paralellized QueryProcessor::runQuery
- Switch `sout` logging to standard slf4j logging
  - Faster
  - No blocking threads due to synchronized methods
  • Loading branch information
Luke-Sikina committed Jul 20, 2023
1 parent 6150168 commit cd2a570
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@

import edu.harvard.hms.dbmi.avillach.hpds.data.query.Filter.DoubleFilter;
import edu.harvard.hms.dbmi.avillach.hpds.data.query.Filter.FloatFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Query {

private static final Logger log = LoggerFactory.getLogger(Query.class);

public Query() {

}
Expand Down Expand Up @@ -180,7 +184,7 @@ public String toString() {
break;
default:
//no logic here; all enum values should be present above
System.out.println("Formatting not supported for type " + expectedResultType);
log.warn("Formatting not supported for type {}", expectedResultType);
}

writePartFormat("Required Fields", requiredFields, builder, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ public PhenoCube<?> load(String key) throws Exception {
inStream.close();
return ret;
}else {
System.out.println("ColumnMeta not found for : [" + key + "]");
log.warn("ColumnMeta not found for : [{}]", key);
return null;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,21 @@ public String[] getHeaderRow(Query query) {
public void runQuery(Query query, AsyncResult result) {
TreeSet<Integer> idList = abstractProcessor.getPatientSubsetForQuery(query);
log.info("Processing " + idList.size() + " rows for result " + result.id);
for(List<Integer> list : Lists.partition(new ArrayList<>(idList), ID_BATCH_SIZE)){
result.stream.appendResultStore(buildResult(result, query, new TreeSet<Integer>(list)));
};
Lists.partition(new ArrayList<>(idList), ID_BATCH_SIZE).parallelStream()
.map(list -> buildResult(result, query, new TreeSet<>(list)))
.forEach(result.stream::appendResultStore);
}


private ResultStore buildResult(AsyncResult result, Query query, TreeSet<Integer> ids) {
List<String> paths = query.getFields();
int columnCount = paths.size() + 1;

List<ColumnMeta> columns = paths.stream()
List<ColumnMeta> columns = query.getFields().stream()
.map(abstractProcessor.getDictionary()::get)
.filter(Objects::nonNull)
.collect(Collectors.toList());
List<String> paths = columns.stream()
.map(ColumnMeta::getName)
.collect(Collectors.toList());
int columnCount = paths.size() + 1;

ArrayList<Integer> columnIndex = abstractProcessor.useResidentCubesFirst(paths, columnCount);
ResultStore results = new ResultStore(result.id, columns, ids);
Expand Down

0 comments on commit cd2a570

Please sign in to comment.