Skip to content

Commit

Permalink
storage: Fix test. Avoid duplicated file entries #TASK-4642
Browse files Browse the repository at this point in the history
  • Loading branch information
j-coll committed Jul 5, 2023
1 parent 9d7c878 commit 2596e2f
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,7 @@ protected StudyEntry convert(List<VariantRow.SampleColumn> sampleDataMap,
}

Map<String, List<String>> alternateFileMap = new HashMap<>();
for (Pair<String, PhoenixArray> pair : filesMap) {
String fileId = pair.getKey();
PhoenixArray fileColumn = pair.getValue();
addFileEntry(studyMetadata, variant, studyEntry, fileId, fileColumn, alternateFileMap);
}
addFileEntries(filesMap, variant, studyMetadata, studyEntry, alternateFileMap);
addSecondaryAlternates(variant, studyEntry, studyMetadata, alternateFileMap);

fillEmptySamplesData(studyEntry, studyMetadata, fillMissingColumnValue);
Expand Down Expand Up @@ -386,8 +382,36 @@ private List<String> remapSamplesData(List<String> sampleData, int[] formatsMap)
}
}

private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEntry studyEntry, String fileIdStr,
PhoenixArray fileColumn, Map<String, List<String>> alternateFileMap) {
private void addFileEntries(List<Pair<String, PhoenixArray>> filesMap, Variant variant, StudyMetadata studyMetadata,
StudyEntry studyEntry, Map<String, List<String>> alternateFileMap) {
// Some file entries might be added only for their "OriginalCall" info.
// These would be added at the end, but only if the original call is not already present.
ArrayList<FileEntry> filesOnlyCall = new ArrayList<>();
for (Pair<String, PhoenixArray> pair : filesMap) {
String fileId = pair.getKey();
PhoenixArray fileColumn = pair.getValue();
addFileEntry(studyMetadata, variant, fileId, fileColumn, alternateFileMap, studyEntry.getFiles(), filesOnlyCall);
}
if (!filesOnlyCall.isEmpty()) {
// Create a set of original calls to avoid duplicates
Set<String> variantIds = new HashSet<>();
for (FileEntry fileEntry : studyEntry.getFiles()) {
if (fileEntry.getCall() != null) {
variantIds.add(fileEntry.getCall().getVariantId());
}
}
for (FileEntry fileEntry : filesOnlyCall) {
if (variantIds.add(fileEntry.getCall().getVariantId())) {
// Not seen, so add to the list of file entries
studyEntry.getFiles().add(fileEntry);
}
}
}
}

private void addFileEntry(StudyMetadata studyMetadata, Variant variant, String fileIdStr,
PhoenixArray fileColumn, Map<String, List<String>> alternateFileMap,
List<FileEntry> files, List<FileEntry> filesOnlyCall) {
int fileId = Integer.parseInt(fileIdStr);
String alternateRaw = (String) (fileColumn.getElement(FILE_SEC_ALTS_IDX));
String alternate = normalizeNonRefAlternateCoordinate(variant, alternateRaw);
Expand All @@ -401,7 +425,7 @@ private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEnt
&& !configuration.getProjection().getStudy(studyMetadata.getId()).getFiles().contains(fileId)) {
if (call != null && !call.isEmpty()) {
OriginalCall originalCall = parseOriginalCall(call);
studyEntry.getFiles().add(new FileEntry(fileName, originalCall, Collections.emptyMap()));
filesOnlyCall.add(new FileEntry(fileName, originalCall, Collections.emptyMap()));
}
return;
}
Expand All @@ -423,7 +447,7 @@ private void addFileEntry(StudyMetadata studyMetadata, Variant variant, StudyEnt
alternateCoordinate.getReference(),
alternateCoordinate.getAlternate()).toString(), 0);
}
studyEntry.getFiles().add(new FileEntry(fileName, originalCall, attributes));
files.add(new FileEntry(fileName, originalCall, attributes));
}

private OriginalCall parseOriginalCall(String call) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,8 @@ public void testLoadByRegion() throws Exception {
assertEquals(TaskMetadata.Status.NONE, sampleMetadata.getSampleIndexAnnotationStatus(1));
}

checkVariantsTable(studyId_split, studyId_normal, new VariantQuery().includeSample(ParamConstants.ALL), new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_FILES));
checkVariantsTable(studyId_split, studyId_normal, new VariantQuery().includeSample(ParamConstants.ALL), new QueryOptions(QueryOptions.EXCLUDE, VariantField.STUDIES_FILES),
v -> v.getStudies().get(0).getFiles().forEach(file -> file.setFileId("")));
checkSampleIndex(studyId_split, studyId_normal);
}

Expand Down

0 comments on commit 2596e2f

Please sign in to comment.