Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge Alteration Counts that have conflicting HugoSymbols for Study V… #11026

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -255,18 +255,18 @@ public Pair<List<CopyNumberCountByGene>, Long> getPatientCnaGeneCounts(List<Mole
@Override
public List<AlterationCountByGene> getMutatedGenes(StudyViewFilterContext studyViewFilterContext) {
var alterationCountByGenes = studyViewRepository.getMutatedGenes(studyViewFilterContext);
return populateAlterationCounts(alterationCountByGenes, studyViewFilterContext, AlterationType.MUTATION_EXTENDED);
return populateAlterationCounts(combineAlterationCountsWithConflictingHugoSymbols(alterationCountByGenes), studyViewFilterContext, AlterationType.MUTATION_EXTENDED);
}

public List<CopyNumberCountByGene> getCnaGenes(StudyViewFilterContext studyViewFilterContext) {
var copyNumberCountByGenes = studyViewRepository.getCnaGenes(studyViewFilterContext);
return populateAlterationCounts(copyNumberCountByGenes, studyViewFilterContext, AlterationType.COPY_NUMBER_ALTERATION);
return populateAlterationCounts(combineCopyNumberCountsWithConflictingHugoSymbols(copyNumberCountByGenes), studyViewFilterContext, AlterationType.COPY_NUMBER_ALTERATION);
}

@Override
public List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterContext studyViewFilterContext) {
var alterationCountByGenes = studyViewRepository.getStructuralVariantGenes(studyViewFilterContext);
return populateAlterationCounts(alterationCountByGenes, studyViewFilterContext, AlterationType.STRUCTURAL_VARIANT);
return populateAlterationCounts(combineAlterationCountsWithConflictingHugoSymbols(alterationCountByGenes), studyViewFilterContext, AlterationType.STRUCTURAL_VARIANT);
}

private < T extends AlterationCountByGene> List<T> populateAlterationCounts(@NonNull List<T> alterationCounts,
Expand Down Expand Up @@ -294,6 +294,55 @@ private < T extends AlterationCountByGene> List<T> populateAlterationCounts(@Non
return alterationCounts;
}

/**
* Combines alteration counts by Hugo gene symbols. If multiple entries exist for the same
* gene symbol, their number of altered cases and total counts are summed up. Returns a
* list of unique AlterationCountByGene objects where each gene symbol is represented only once.
*
* This appears in the Data where Genes have similar Hugo Gene Symbols but different Entrez Ids
*
* @param alterationCounts List of AlterationCountByGene objects, potentially with duplicate gene symbols
* @return List of AlterationCountByGene objects with unique gene symbols and combined counts
*/
private List<AlterationCountByGene> combineAlterationCountsWithConflictingHugoSymbols(@NonNull List<AlterationCountByGene> alterationCounts) {
Map<String, AlterationCountByGene> alterationCountByGeneMap = new HashMap<>();
for (var alterationCount : alterationCounts) {
if (alterationCountByGeneMap.containsKey(alterationCount.getHugoGeneSymbol())){
AlterationCountByGene toUpdate = alterationCountByGeneMap.get(alterationCount.getHugoGeneSymbol());
toUpdate.setNumberOfAlteredCases(toUpdate.getNumberOfAlteredCases() + alterationCount.getNumberOfAlteredCases());
toUpdate.setTotalCount(toUpdate.getTotalCount() + alterationCount.getTotalCount());
} else {
alterationCountByGeneMap.put(alterationCount.getHugoGeneSymbol(), alterationCount);
}
}
return alterationCountByGeneMap.values().stream().toList();
}

/**
* Combines alteration counts by Hugo gene symbols. If multiple entries exist for the same
* gene symbol, their number of altered cases and total counts are summed up. Returns a
* list of unique AlterationCountByGene objects where each gene symbol is represented only once.
*
* This appears in the Data where Genes have similar Hugo Gene Symbols but different Entrez Ids.
* This is a special case to handle Copy Number Mutations where the Alteration type should be a part of the key
*
* @param alterationCounts List of CopyNumberCountByGene objects, potentially with duplicate gene symbols
* @return List of AlterationCountByGene objects with unique gene symbols and combined counts
*/
private List<CopyNumberCountByGene> combineCopyNumberCountsWithConflictingHugoSymbols(@NonNull List<CopyNumberCountByGene> alterationCounts) {
Map<Pair<String, Integer>, CopyNumberCountByGene> alterationCountByGeneMap = new HashMap<>();
for (var alterationCount : alterationCounts) {
var copyNumberKey = Pair.create(alterationCount.getHugoGeneSymbol(), alterationCount.getAlteration());
if (alterationCountByGeneMap.containsKey(copyNumberKey)) {
AlterationCountByGene toUpdate = alterationCountByGeneMap.get(copyNumberKey);
toUpdate.setNumberOfAlteredCases(toUpdate.getNumberOfAlteredCases() + alterationCount.getNumberOfAlteredCases());
toUpdate.setTotalCount(toUpdate.getTotalCount() + alterationCount.getTotalCount());
} else {
alterationCountByGeneMap.put(copyNumberKey, alterationCount);
}
}
return alterationCountByGeneMap.values().stream().toList();
}

private boolean hasGenePanelData(@NonNull Set<String> matchingGenePanelIds) {
return matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING)
Expand Down
Loading