diff --git a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/GenomicDatasetMerger.java b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/GenomicDatasetMerger.java index 0e7e0ed7..235dbb29 100644 --- a/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/GenomicDatasetMerger.java +++ b/etl/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/etl/genotype/GenomicDatasetMerger.java @@ -20,6 +20,8 @@ public class GenomicDatasetMerger { + public static final String INFO_STORE_JAVABIN_SUFFIX = "infoStore.javabin"; + public static final String VARIANT_SPEC_INDEX_FILENAME = "variantSpecIndex.javabin"; private static Logger log = LoggerFactory.getLogger(GenomicDatasetMerger.class); private final VariantStore variantStore1; @@ -64,7 +66,7 @@ public static void main(String[] args) throws IOException, ClassNotFoundExceptio public void merge() throws IOException { Map>> mergedChromosomeMasks = mergeChromosomeMasks(); mergeVariantStore(mergedChromosomeMasks); - Map mergedVariantIndexes = mergeVariantIndexes(); + mergeVariantIndexes(); } public void mergeVariantStore(Map>> mergedChromosomeMasks) { @@ -138,13 +140,12 @@ public Map mergeVariantIndexes() throws FileBackedByteIndexedStorage allValuesStore1 = infoStores1Entry.getValue().getAllValues(); FileBackedByteIndexedStorage allValuesStore2 = infoStore2.getAllValues(); - //FileBackedByteIndexedStorage mergedIndexedStorage = new FileBackedJavaIndexedStorage<>(String.class, String[].class, new File(outputDirectory)); ConcurrentHashMap> mergedInfoStoreValues = new ConcurrentHashMap<>(); - Sets.SetView allKeys = Sets.intersection(allValuesStore1.keys(), allValuesStore2.keys()); + Sets.SetView allKeys = Sets.union(allValuesStore1.keys(), allValuesStore2.keys()); for (String key : allKeys) { - Set store1Values = new HashSet<>(Arrays.asList(allValuesStore1.getOrELse(key, new Integer[]{}))); - Set store2Values = new HashSet<>(Arrays.asList(allValuesStore2.getOrELse(key, new Integer[]{}))); + Set store1Values = Set.of(allValuesStore1.getOrELse(key, new Integer[]{})); + Set store2Values = Set.of(allValuesStore2.getOrELse(key, new Integer[]{})); Set remappedValuesStore2 = store2Values.stream().map(value -> remappedIndexes[value]).collect(Collectors.toSet()); Set mergedValues = Sets.union(store1Values, remappedValuesStore2); @@ -155,10 +156,10 @@ public Map mergeVariantIndexes() throws infoStore.allValues = mergedInfoStoreValues; FileBackedByteIndexedInfoStore mergedStore = new FileBackedByteIndexedInfoStore(new File(outputDirectory), infoStore); mergedInfoStores.put(infoStores1Entry.getKey(), mergedStore); - mergedStore.write(new File(outputDirectory + infoStore.column_key + "_infoStore.javabin")); + mergedStore.write(new File(outputDirectory + infoStore.column_key + "_" + INFO_STORE_JAVABIN_SUFFIX)); } - try (FileOutputStream fos = new FileOutputStream(new File(outputDirectory, "variantSpecIndex.javabin")); + try (FileOutputStream fos = new FileOutputStream(new File(outputDirectory, VARIANT_SPEC_INDEX_FILENAME)); GZIPOutputStream gzos = new GZIPOutputStream(fos); ObjectOutputStream oos = new ObjectOutputStream(gzos);) { oos.writeObject(variantSpecList); @@ -171,7 +172,7 @@ private Map loadInfoStores(String direct Map infoStores = new HashMap<>(); File genomicDataDirectory = new File(directory); if(genomicDataDirectory.exists() && genomicDataDirectory.isDirectory()) { - Arrays.stream(genomicDataDirectory.list((file, filename)->{return filename.endsWith("infoStore.javabin");})) + Arrays.stream(genomicDataDirectory.list((file, filename)->{return filename.endsWith(INFO_STORE_JAVABIN_SUFFIX);})) .forEach((String filename)->{ try ( FileInputStream fis = new FileInputStream(directory + filename); @@ -181,7 +182,7 @@ private Map loadInfoStores(String direct log.info("loading " + filename); FileBackedByteIndexedInfoStore infoStore = (FileBackedByteIndexedInfoStore) ois.readObject(); infoStore.updateStorageDirectory(genomicDataDirectory); - infoStores.put(filename.replace("_infoStore.javabin", ""), infoStore); + infoStores.put(filename.replace("_" + INFO_STORE_JAVABIN_SUFFIX, ""), infoStore); } catch (IOException | ClassNotFoundException e) { throw new RuntimeException(e); }