diff --git a/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantStore.java b/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantStore.java index ec917bbd..a498831b 100644 --- a/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantStore.java +++ b/data/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/data/genotype/VariantStore.java @@ -16,20 +16,18 @@ public class VariantStore implements Serializable { private static final long serialVersionUID = -6970128712587609414L; + public static final String VARIANT_STORE_JAVABIN_FILENAME = "variantStore.javabin"; + public static final String VARIANT_SPEC_INDEX_JAVABIN_FILENAME = "variantSpecIndex.javabin"; private static Logger log = LoggerFactory.getLogger(VariantStore.class); public static final int BUCKET_SIZE = 1000; - public static final String VARIANT_SPEC_INDEX_FILE = "variantSpecIndex.javabin"; + public static final String VARIANT_SPEC_INDEX_FILE = VARIANT_SPEC_INDEX_JAVABIN_FILENAME; private BigInteger emptyBitmask; private String[] patientIds; private transient String[] variantSpecIndex; - - private Integer variantStorageSize; - - private String[] vcfHeaders = new String[24]; - + private Map>> variantMaskStorage = new TreeMap<>(); public Map>> getVariantMaskStorage() { @@ -48,7 +46,7 @@ public void setVariantSpecIndex(String[] variantSpecIndex) { } public static VariantStore readInstance(String genomicDataDirectory) throws IOException, ClassNotFoundException { - ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(genomicDataDirectory + "variantStore.javabin"))); + ObjectInputStream ois = new ObjectInputStream(new GZIPInputStream(new FileInputStream(genomicDataDirectory + VARIANT_STORE_JAVABIN_FILENAME))); VariantStore variantStore = (VariantStore) ois.readObject(); ois.close(); variantStore.getVariantMaskStorage().values().forEach(store -> { @@ -60,14 +58,14 @@ public static VariantStore readInstance(String genomicDataDirectory) throws IOEx } public void writeInstance(String genomicDirectory) { - try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, "variantStore.javabin")); + try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, VARIANT_STORE_JAVABIN_FILENAME)); GZIPOutputStream gzos = new GZIPOutputStream(fos); ObjectOutputStream oos = new ObjectOutputStream(gzos);) { oos.writeObject(this); } catch (IOException e) { throw new UncheckedIOException(e); } - try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, "variantSpecIndex.javabin")); + try (FileOutputStream fos = new FileOutputStream(new File(genomicDirectory, VARIANT_SPEC_INDEX_JAVABIN_FILENAME)); GZIPOutputStream gzos = new GZIPOutputStream(fos); ObjectOutputStream oos = new ObjectOutputStream(gzos);) { oos.writeObject(Arrays.asList(variantSpecIndex)); @@ -76,41 +74,6 @@ public void writeInstance(String genomicDirectory) { } } - public Map countVariants() { - HashMap countOffsetMap = new HashMap(); - TreeMap counts = new TreeMap<>(); - for (String contig : variantMaskStorage.keySet()) { - counts.put(contig, new int[5]); - FileBackedJsonIndexStorage> storage = variantMaskStorage - .get(contig); - storage.keys().stream().forEach((Integer key) -> { - int[] contigCounts = counts.get(contig); - Collection values = storage.get(key).values(); - contigCounts[0] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> { - return masks.heterozygousMask != null ? 1 : 0; - })); - contigCounts[1] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> { - return masks.homozygousMask != null ? 1 : 0; - })); - contigCounts[2] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> { - return masks.heterozygousNoCallMask != null ? 1 : 0; - })); - contigCounts[3] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> { - return masks.homozygousNoCallMask != null ? 1 : 0; - })); - contigCounts[4] += values.stream().collect(Collectors.summingInt((VariableVariantMasks masks) -> { - return masks.heterozygousMask != null || masks.homozygousMask != null - || masks.heterozygousNoCallMask != null || masks.homozygousNoCallMask != null ? 1 : 0; - })); - }); - } - return counts; - } - - public String[] getVCFHeaders() { - return vcfHeaders; - } - public String[] getPatientIds() { return patientIds; } @@ -124,11 +87,6 @@ public Optional getMasks(String variant, VariantBucketHold int chrOffset = Integer.parseInt(segments[1]) / BUCKET_SIZE; String contig = segments[0]; -// if (Level.DEBUG.equals(log.getEffectiveLevel())) { -// log.debug("Getting masks for variant " + variant + " Same bucket test: " + (bucketCache.lastValue != null -// && contig.contentEquals(bucketCache.lastContig) && chrOffset == bucketCache.lastChunkOffset)); -// } - if (bucketCache.lastValue != null && contig.contentEquals(bucketCache.lastContig) && chrOffset == bucketCache.lastChunkOffset) { // TODO : This is a temporary efficiency hack, NOT THREADSAFE!!! @@ -166,10 +124,6 @@ public List getMasksForDbSnpSpec(String variant) { } } - public String[] getHeaders() { - return vcfHeaders; - } - public void open() { variantMaskStorage.values().stream().forEach((fbbis -> { if (fbbis != null) { @@ -190,34 +144,6 @@ public void setPatientIds(String[] patientIds) { this.patientIds = patientIds; } - public int getVariantStorageSize() { - return variantStorageSize; - } - - public void setVariantStorageSize(int variantStorageSize) { - this.variantStorageSize = variantStorageSize; - } - - public List getMasksForRangesOfChromosome(String contigForGene, List offsetsForGene, - RangeSet rangeSetsForGene) throws IOException { - FileBackedJsonIndexStorage masksForChromosome = variantMaskStorage.get(contigForGene); - Set bucketsForGene = offsetsForGene.stream().map((offset) -> { - return offset / BUCKET_SIZE; - }).collect(Collectors.toSet()); - List masks = new ArrayList(); - for (Integer bucket : bucketsForGene) { - Map variantMaskBucket = (Map) masksForChromosome.get(bucket); - variantMaskBucket.keySet().stream().filter((String spec) -> { - int offsetForVariant = Integer.parseInt(spec.split(",")[1]); - return rangeSetsForGene.contains(offsetForVariant); - }).forEach((spec) -> { - System.out.println(spec); - masks.add(variantMaskBucket.get(spec)); - }); - } - return masks; - } - public BigInteger emptyBitmask() { if (emptyBitmask == null || emptyBitmask.testBit(emptyBitmask.bitLength() / 2)) { String emptyVariantMask = ""; @@ -229,19 +155,16 @@ public BigInteger emptyBitmask() { return emptyBitmask; } + @SuppressWarnings("unchecked") public static String[] loadVariantIndexFromFile(String genomicDataDirectory) { try (ObjectInputStream objectInputStream = new ObjectInputStream(new GZIPInputStream(new FileInputStream(genomicDataDirectory + "/" + VARIANT_SPEC_INDEX_FILE)));){ List variants = (List) objectInputStream.readObject(); return variants.toArray(new String[0]); - } catch (FileNotFoundException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (ClassNotFoundException e) { + } catch (IOException | ClassNotFoundException e) { throw new RuntimeException(e); } - } + } }