Skip to content

Commit

Permalink
Add bucket filtering optimization to genomic processing
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Jan 9, 2024
1 parent a63e6dd commit b241bed
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ public BucketIndexBySample(VariantStore variantStore, String storageDir) throws
* @return
* @throws IOException
*/
public Collection<String> filterVariantSetForPatientSet(Set<String> variantSet, List<Integer> patientSet) throws IOException{
//a bitmask of which buckets contain any relevant variant.
BigInteger patientBucketMask = patientSet.size() == 0 ?
new BigInteger(new String(emptyBucketMaskChar()),2) : patientBucketMasks.get(patientSet.get(0));
public Set<String> filterVariantSetForPatientSet(Set<String> variantSet, Collection<Integer> patientSet) throws IOException{

BigInteger patientBucketMask = patientSet.stream().findFirst()
.map(id -> patientBucketMasks.get(id))
.orElseGet(() -> new BigInteger(new String(emptyBucketMaskChar()),2));

BigInteger _defaultMask = patientBucketMask;
List<BigInteger> patientBucketmasksForSet = patientSet.parallelStream()
.map((patientNum)-> patientBucketMasks.get(patientNum))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,10 @@ public BigInteger getPatientIdsForIntersectionOfVariantSets(Set<Integer> patient

Set<String> variantsInScope = intersectionOfInfoFilters.mapToVariantSpec(variantService.getVariantIndex());

// todo: use BucketIndexBySample.filterVariantSetForPatientSet here?
// todo: determine ideal ratio to bother with this
if (patientsInScope.size() < variantService.getPatientIds().length) {
variantsInScope = variantService.filterVariantSetForPatientSet(variantsInScope, patientsInScope);
}
Collection<List<String>> values = variantsInScope.stream()
.collect(Collectors.groupingByConcurrent((variantSpec) -> {
return new VariantSpec(variantSpec).metadata.offset / 1000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public String[] getVariantIndex() {
public BucketIndexBySample getBucketIndex() {
return bucketIndex;
}
public Collection<String> filterVariantSetForPatientSet(Set<String> variantSet, List<Integer> patientSet) {
public Set<String> filterVariantSetForPatientSet(Set<String> variantSet, Collection<Integer> patientSet) {
try {
return bucketIndex.filterVariantSetForPatientSet(variantSet, patientSet);
} catch (IOException e) {
Expand Down

0 comments on commit b241bed

Please sign in to comment.