From f9845707747fa7a99fd574ec8672e35f2d301d15 Mon Sep 17 00:00:00 2001 From: Luke Sikina Date: Thu, 20 Jul 2023 15:40:14 -0400 Subject: [PATCH] Performance improvements for GIC - Made QueryProcessor filter columns that DNE - Paralellized QueryProcessor::runQuery - Switch `sout` logging to standard slf4j logging - Faster - No blocking threads due to synchronized methods --- .../hms/dbmi/avillach/hpds/data/query/Query.java | 6 +++++- .../hpds/processing/AbstractProcessor.java | 2 +- .../avillach/hpds/processing/QueryProcessor.java | 16 +++++++++------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java index 50c6a9c8..eb7ff4d7 100644 --- a/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java +++ b/client-api/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/query/Query.java @@ -4,9 +4,13 @@ import edu.harvard.hms.dbmi.avillach.hpds.data.query.Filter.DoubleFilter; import edu.harvard.hms.dbmi.avillach.hpds.data.query.Filter.FloatFilter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class Query { + private static final Logger log = LoggerFactory.getLogger(Query.class); + public Query() { } @@ -180,7 +184,7 @@ public String toString() { break; default: //no logic here; all enum values should be present above - System.out.println("Formatting not supported for type " + expectedResultType); + log.warn("Formatting not supported for type {}", expectedResultType); } writePartFormat("Required Fields", requiredFields, builder, false); diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index f0e57a58..e2f6c44a 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -627,7 +627,7 @@ public PhenoCube load(String key) throws Exception { inStream.close(); return ret; }else { - System.out.println("ColumnMeta not found for : [" + key + "]"); + log.warn("ColumnMeta not found for : [{}]", key); return null; } } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java index 44ddaad8..b54b5283 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java @@ -56,20 +56,22 @@ public String[] getHeaderRow(Query query) { public void runQuery(Query query, AsyncResult result) { TreeSet idList = abstractProcessor.getPatientSubsetForQuery(query); log.info("Processing " + idList.size() + " rows for result " + result.id); - for(List list : Lists.partition(new ArrayList<>(idList), ID_BATCH_SIZE)){ - result.stream.appendResultStore(buildResult(result, query, new TreeSet(list))); - }; + Lists.partition(new ArrayList<>(idList), ID_BATCH_SIZE).parallelStream() + .map(list -> buildResult(result, query, new TreeSet<>(list))) + .sequential() + .forEach(result.stream::appendResultStore); } private ResultStore buildResult(AsyncResult result, Query query, TreeSet ids) { - List paths = query.getFields(); - int columnCount = paths.size() + 1; - - List columns = paths.stream() + List columns = query.getFields().stream() .map(abstractProcessor.getDictionary()::get) .filter(Objects::nonNull) .collect(Collectors.toList()); + List paths = columns.stream() + .map(ColumnMeta::getName) + .collect(Collectors.toList()); + int columnCount = paths.size() + 1; ArrayList columnIndex = abstractProcessor.useResidentCubesFirst(paths, columnCount); ResultStore results = new ResultStore(result.id, columns, ids);