diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java index a21949f1..a8926130 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessor.java @@ -51,8 +51,6 @@ public class AbstractProcessor { private final PhenotypeMetaStore phenotypeMetaStore; - private final VariantIndexCache variantIndexCache; - private final GenomicProcessor genomicProcessor; private final LoadingCache> infoStoreValuesCache = CacheBuilder.newBuilder().build(new CacheLoader<>() { @@ -135,19 +133,16 @@ public AbstractProcessor( }); } infoStoreColumns = new ArrayList<>(infoStores.keySet()); - - variantIndexCache = new VariantIndexCache(variantService.getVariantIndex(), infoStores); } public AbstractProcessor(PhenotypeMetaStore phenotypeMetaStore, LoadingCache> store, Map infoStores, List infoStoreColumns, - VariantService variantService, VariantIndexCache variantIndexCache, GenomicProcessor genomicProcessor) { + VariantService variantService, GenomicProcessor genomicProcessor) { this.phenotypeMetaStore = phenotypeMetaStore; this.store = store; this.infoStores = infoStores; this.infoStoreColumns = infoStoreColumns; this.variantService = variantService; - this.variantIndexCache = variantIndexCache; this.genomicProcessor = genomicProcessor; CACHE_SIZE = Integer.parseInt(System.getProperty("CACHE_SIZE", "100")); @@ -188,8 +183,19 @@ protected Set applyBooleanLogic(List> filteredIdSets) { * @return */ protected Set idSetsForEachFilter(Query query) { + DistributableQuery distributableQuery = getDistributableQuery(query); + + if (distributableQuery.hasFilters()) { + BigInteger patientMaskForVariantInfoFilters = genomicProcessor.getPatientMaskForVariantInfoFilters(distributableQuery); + return patientMaskToPatientIdSet(patientMaskForVariantInfoFilters); + } + + return distributableQuery.getPatientIds(); + } + + private DistributableQuery getDistributableQuery(Query query) { DistributableQuery distributableQuery = new DistributableQuery(); - List> patientIdSets = new ArrayList<>(); + List> patientIdSets = new ArrayList<>(); try { query.getAllAnyRecordOf().forEach(anyRecordOfFilterList -> { @@ -216,14 +222,8 @@ protected Set idSetsForEachFilter(Query query) { .collect(Collectors.toSet()); } distributableQuery.setVariantInfoFilters(query.getVariantInfoFilters()); - distributableQuery.setPatientIds(phenotypicPatientSet); - - if (distributableQuery.hasFilters()) { - BigInteger patientMaskForVariantInfoFilters = genomicProcessor.getPatientMaskForVariantInfoFilters(distributableQuery); - return patientMaskToPatientIdSet(patientMaskForVariantInfoFilters); - } - - return phenotypicPatientSet; + distributableQuery.setPatientIds(phenotypicPatientSet); + return distributableQuery; } public Set patientMaskToPatientIdSet(BigInteger patientMask) { @@ -323,7 +323,8 @@ private List> getIdSetsForCategoryFilters(Query query, Distributabl } protected Collection getVariantList(Query query) throws IOException { - return genomicProcessor.processVariantList(getPatientSubsetForQuery(query), query); + DistributableQuery distributableQuery = getDistributableQuery(query); + return genomicProcessor.processVariantList(distributableQuery); } public FileBackedByteIndexedInfoStore getInfoStore(String column) { diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessor.java index 24e8b258..4dae13e7 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessor.java @@ -13,7 +13,7 @@ public interface GenomicProcessor { BigInteger createMaskForPatientSet(Set patientSubset); - Collection processVariantList(Set patientSubsetForQuery, Query query); + Collection processVariantList(DistributableQuery distributableQuery); String[] getPatientIds(); } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java index dc0c596a..9758fa29 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java @@ -64,6 +64,8 @@ public GenomicProcessorNodeImpl(String genomicDataDirectory) { throw new RuntimeException(e); } }); + } else { + throw new IllegalArgumentException("Not a valid genomicDataDirectory: " + this.genomicDataDirectory); } infoStoreColumns = new ArrayList<>(infoStores.keySet()); @@ -222,7 +224,7 @@ private VariantIndex addVariantsForInfoFilter(VariantIndex unionOfInfoFilters, Q } @Override - public Collection processVariantList(Set patientSubsetForQuery, Query query) { + public Collection processVariantList(DistributableQuery query) { boolean queryContainsVariantInfoFilters = query.getVariantInfoFilters().stream().anyMatch(variantInfoFilter -> !variantInfoFilter.categoryVariantInfoFilters.isEmpty() || !variantInfoFilter.numericVariantInfoFilters.isEmpty() ); @@ -245,7 +247,7 @@ public Collection processVariantList(Set patientSubsetForQuery, return Integer.parseInt(id.trim()); }) .collect(Collectors.toList())); - Set patientSubset = Sets.intersection(patientSubsetForQuery, allPatients); + Set patientSubset = Sets.intersection(query.getPatientIds(), allPatients); // log.debug("Patient subset " + Arrays.deepToString(patientSubset.toArray())); // If we have all patients then no variants would be filtered, so no need to do further processing diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorParentImpl.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorParentImpl.java index 4c4af4e0..1e07186d 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorParentImpl.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorParentImpl.java @@ -55,9 +55,9 @@ public BigInteger createMaskForPatientSet(Set patientSubset) { } @Override - public Collection processVariantList(Set patientSubsetForQuery, Query query) { + public Collection processVariantList(DistributableQuery distributableQuery) { return nodes.parallelStream().flatMap(node -> - node.processVariantList(patientSubsetForQuery, query).stream()).collect(Collectors.toList() + node.processVariantList(distributableQuery).stream()).collect(Collectors.toList() ); } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java index f2373181..8f0df4e1 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/QueryProcessor.java @@ -105,6 +105,7 @@ private void processColumn(List paths, TreeSet ids, ResultStore Integer x) { String path = paths.get(x-1); if(VariantUtils.pathIsVariantSpec(path)) { + // todo: confirm this entire if block is even used. I don't think it is Optional masks = abstractProcessor.getMasks(path, new VariantBucketHolder<>()); String[] patientIds = abstractProcessor.getPatientIds(); int idPointer = 0; diff --git a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java index 772e2b86..0d0a16c7 100644 --- a/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java +++ b/processing/src/test/java/edu/harvard/hms/dbmi/avillach/hpds/processing/AbstractProcessorTest.java @@ -27,9 +27,6 @@ public class AbstractProcessorTest { @Mock private VariantService variantService; - @Mock - private VariantIndexCache variantIndexCache; - @Mock private GenomicProcessor genomicProcessor; @@ -65,14 +62,13 @@ public void setup() { infoStores, null, variantService, - variantIndexCache, genomicProcessor ); } @Test public void getPatientSubsetForQuery_oneVariantCategoryFilter_indexFound() { - when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4, 6))); + //when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4, 6))); ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(VariantIndex.class); //when(patientVariantJoinHandler.getPatientIdsForIntersectionOfVariantSets(any(), argumentCaptor.capture())).thenReturn(List.of(Set.of(42))); @@ -94,8 +90,8 @@ public void getPatientSubsetForQuery_oneVariantCategoryFilter_indexFound() { @Test public void getPatientSubsetForQuery_oneVariantCategoryFilterTwoValues_unionFilters() { - when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4))); - when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(1))).thenReturn(new SparseVariantIndex(Set.of(6))); + //when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4))); + //when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(1))).thenReturn(new SparseVariantIndex(Set.of(6))); ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(VariantIndex.class); //when(patientVariantJoinHandler.getPatientIdsForIntersectionOfVariantSets(any(), argumentCaptor.capture())).thenReturn(List.of(Set.of(42))); @@ -118,8 +114,8 @@ public void getPatientSubsetForQuery_oneVariantCategoryFilterTwoValues_unionFilt @Test public void getPatientSubsetForQuery_twoVariantCategoryFilters_intersectFilters() { - when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4, 6))); - when(variantIndexCache.get(VARIANT_SEVERITY_KEY, EXAMPLE_VARIANT_SEVERITIES.get(0))).thenReturn(new SparseVariantIndex(Set.of(4, 5, 6, 7))); + //when(variantIndexCache.get(GENE_WITH_VARIANT_KEY, EXAMPLE_GENES_WITH_VARIANT.get(0))).thenReturn(new SparseVariantIndex(Set.of(2, 4, 6))); + //when(variantIndexCache.get(VARIANT_SEVERITY_KEY, EXAMPLE_VARIANT_SEVERITIES.get(0))).thenReturn(new SparseVariantIndex(Set.of(4, 5, 6, 7))); ArgumentCaptor argumentCaptor = ArgumentCaptor.forClass(VariantIndex.class); //when(patientVariantJoinHandler.getPatientIdsForIntersectionOfVariantSets(any(), argumentCaptor.capture())).thenReturn(List.of(Set.of(42)));