Skip to content

Commit

Permalink
[ALS-6173] small hpds optimization
Browse files Browse the repository at this point in the history
- optimize getValuesForKeys to work with sorted id lists
  • Loading branch information
Luke Sikina committed Mar 28, 2024
1 parent 35797d0 commit b40a021
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,15 @@ public void setLoadingMap(List<KeyAndValue<V>> newMap) {
this.loadingMap= newMap ;
}

public List<KeyAndValue<V>> getValuesForKeys(Set<Integer> patientIds) {
public List<KeyAndValue<V>> getValuesForKeys(List<Integer> sortedPatientIds) {
List<KeyAndValue<V>> values = new ArrayList<>();
int x = 0;
for(Integer id : patientIds) {
while(x < sortedByKey.length && sortedByKey[x].key<id) {
for(Integer id : sortedPatientIds) {
while(x < sortedByKey.length && sortedByKey[x].key < id) {
x++;
}
while(x < sortedByKey.length && sortedByKey[x].key==id) {
if (x >= sortedByKey.length) { return values; }
while(x < sortedByKey.length && sortedByKey[x].key == id) {
values.add(sortedByKey[x]);
x++;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package edu.harvard.hms.dbmi.avillach.hpds.data.phenotype;

import org.junit.Assert;
import org.junit.Test;

import java.util.List;

public class PhenoCubeTest {

@Test
public void shouldGetValueWhenLastKeyMatches() {
KeyAndValue[] sortedKeyValuePairs = {
new KeyAndValue<>(1, "A01.00 Typhoid fever, unspecified")
};

PhenoCube<?> subject = new PhenoCube<>("bob", KeyAndValue.class);
subject.setSortedByKey(sortedKeyValuePairs);

List<? extends KeyAndValue<?>> actual = subject.getValuesForKeys(List.of(1));
List<KeyAndValue<?>> expected = List.of(sortedKeyValuePairs[0]);

Assert.assertEquals(expected, actual);
}

@Test
public void shouldWorkForEmptyList() {
KeyAndValue[] sortedKeyValuePairs = {};

PhenoCube<?> subject = new PhenoCube<>("bob", KeyAndValue.class);
subject.setSortedByKey(sortedKeyValuePairs);

List<? extends KeyAndValue<?>> actual = subject.getValuesForKeys(List.of(1));
List<KeyAndValue<?>> expected = List.of();

Assert.assertEquals(expected, actual);
}

@Test
public void shouldGetValuesWhenPatientMatchesSeveralKeys() {
KeyAndValue[] pairs = {
new KeyAndValue<>(0, ":)"),
new KeyAndValue<>(1, "A99.9 Not actually hungry, just bored"),
new KeyAndValue<>(1, "A99.99 Feeling snackish"),
new KeyAndValue<>(1, "A99.999 Legit hungry"),
new KeyAndValue<>(1, "A99.9999 FOOD FOOD FOOD FOOD FOOD FOOD"),
new KeyAndValue<>(2, ">:|"),
};

PhenoCube<?> subject = new PhenoCube<>("bob", KeyAndValue.class);
subject.setSortedByKey(pairs);

List<? extends KeyAndValue<?>> actual = subject.getValuesForKeys(List.of(1));
List<KeyAndValue<?>> expected = List.of(pairs[1], pairs[2], pairs[3], pairs[4]);

Assert.assertEquals(expected, actual);
}

@Test
public void shouldGetValuesWhenNoneMatch() {
KeyAndValue[] pairs = {
new KeyAndValue<>(0, ":)"),
new KeyAndValue<>(2, ">:|"),
};

PhenoCube<?> subject = new PhenoCube<>("bob", KeyAndValue.class);
subject.setSortedByKey(pairs);

List<? extends KeyAndValue<?>> actual = subject.getValuesForKeys(List.of(1));
List<KeyAndValue<?>> expected = List.of();

Assert.assertEquals(expected, actual);
}

@Test
public void shouldGetValuesWithSparseMatches() {
KeyAndValue[] pairs = {
new KeyAndValue<>(0, ":)"),
new KeyAndValue<>(1, ":o"),
new KeyAndValue<>(2, ">:|"),
new KeyAndValue<>(2, ":|"),
new KeyAndValue<>(3, ":]"),
new KeyAndValue<>(3, ":["),
};

PhenoCube<?> subject = new PhenoCube<>("bob", KeyAndValue.class);
subject.setSortedByKey(pairs);

List<? extends KeyAndValue<?>> actual = subject.getValuesForKeys(List.of(1, 3));
List<KeyAndValue<?>> expected = List.of(pairs[1], pairs[4], pairs[5]);

Assert.assertEquals(expected, actual);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,8 @@

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.*;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;

import edu.harvard.hms.dbmi.avillach.hpds.data.phenotype.KeyAndValue;
Expand Down Expand Up @@ -46,7 +41,8 @@ public void runQuery(Query query, AsyncResult asyncResult) throws NotEnoughMemor
query.setRequiredFields(new ArrayList<>());

// list patients involved
Set<Integer> patientIds = abstractProcessor.getPatientSubsetForQuery(query);
List<Integer> patientIds = new ArrayList<>(abstractProcessor.getPatientSubsetForQuery(query));
patientIds.sort(Integer::compareTo);

// get start time for the timeline
long startTime = Long.MAX_VALUE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,14 @@ private void exportTimeData(Query query, AsyncResult result, TreeSet<Integer> id
pathList.addAll(query.getRequiredFields());
pathList.addAll(query.getCategoryFilters().keySet());
pathList.addAll(query.getNumericFilters().keySet());

addDataForConcepts(pathList, exportedConceptPaths, idList, result);

ArrayList<Integer> ids = new ArrayList<>(idList);
ids.sort(Integer::compareTo);

addDataForConcepts(pathList, exportedConceptPaths, ids, result);
}

private void addDataForConcepts(Collection<String> pathList, Set<String> exportedConceptPaths, TreeSet<Integer> idList, AsyncResult result) throws IOException {
private void addDataForConcepts(Collection<String> pathList, Set<String> exportedConceptPaths, List<Integer> idList, AsyncResult result) throws IOException {
for (String conceptPath : pathList) {
//skip concepts we may already have encountered
if(exportedConceptPaths.contains(conceptPath)) {
Expand Down

0 comments on commit b40a021

Please sign in to comment.