diff --git a/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/BucketIndexBySample.java b/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/BucketIndexBySample.java index ade0cfce..1dcb7506 100644 --- a/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/BucketIndexBySample.java +++ b/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/BucketIndexBySample.java @@ -144,12 +144,12 @@ public BucketIndexBySample(VariantStore variantStore, String storageDir) throws * @return * @throws IOException */ - public Collection filterVariantSetForPatientSet(Set variantSet, List patientSet) throws IOException{ - - //a bitmask of which buckets contain any relevant variant. - BigInteger patientBucketMask = patientSet.size() == 0 ? - new BigInteger(new String(emptyBucketMaskChar()),2) : patientBucketMasks.get(patientSet.get(0)); - + public Set filterVariantSetForPatientSet(Set variantSet, Collection patientSet) throws IOException{ + + BigInteger patientBucketMask = patientSet.stream().findFirst() + .map(id -> patientBucketMasks.get(id)) + .orElseGet(() -> new BigInteger(new String(emptyBucketMaskChar()),2)); + BigInteger _defaultMask = patientBucketMask; List patientBucketmasksForSet = patientSet.parallelStream() .map((patientNum)-> patientBucketMasks.get(patientNum)) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/PatientVariantJoinHandler.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/PatientVariantJoinHandler.java index 346c94bb..ba8878b0 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/PatientVariantJoinHandler.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/PatientVariantJoinHandler.java @@ -45,7 +45,10 @@ public BigInteger getPatientIdsForIntersectionOfVariantSets(Set patient Set variantsInScope = intersectionOfInfoFilters.mapToVariantSpec(variantService.getVariantIndex()); - // todo: use BucketIndexBySample.filterVariantSetForPatientSet here? + // todo: determine ideal ratio to bother with this + if (patientsInScope.size() < variantService.getPatientIds().length) { + variantsInScope = variantService.filterVariantSetForPatientSet(variantsInScope, patientsInScope); + } Collection> values = variantsInScope.stream() .collect(Collectors.groupingByConcurrent((variantSpec) -> { return new VariantSpec(variantSpec).metadata.offset / 1000; diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java index 166bf1ee..6c49210c 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java @@ -44,7 +44,7 @@ public String[] getVariantIndex() { public BucketIndexBySample getBucketIndex() { return bucketIndex; } - public Collection filterVariantSetForPatientSet(Set variantSet, List patientSet) { + public Set filterVariantSetForPatientSet(Set variantSet, Collection patientSet) { try { return bucketIndex.filterVariantSetForPatientSet(variantSet, patientSet); } catch (IOException e) {