Skip to content

Commit

Permalink
Fix clinical data NA counts for multiple studies (cBioPortal#10978)
Browse files Browse the repository at this point in the history
fix clinical data NA counts for multiple studies
---------

Co-authored-by: lismana <[email protected]>
  • Loading branch information
onursumer and alisman authored Sep 11, 2024
1 parent 10c4a87 commit 59d84c9
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public interface StudyViewRepository {

int getFilteredSamplesCount(StudyViewFilterContext studyViewFilterContext);

int getFilteredPatientCount(StudyViewFilterContext studyViewFilterContext);

Map<String, Set<String>> getMatchingGenePanelIds(StudyViewFilterContext studyViewFilterContext, String alterationType);

int getTotalProfiledCountsByAlterationType(StudyViewFilterContext studyViewFilterContext, String alterationType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ public interface StudyViewMapper {

int getFilteredSamplesCount(@Param("studyViewFilterHelper") StudyViewFilterHelper studyViewFilterHelper);

int getFilteredPatientsCount(@Param("studyViewFilterHelper") StudyViewFilterHelper studyViewFilterHelper);

List<GenePanelToGene> getMatchingGenePanelIds(StudyViewFilterHelper studyViewFilterHelper, String alterationType);

int getTotalProfiledCountByAlterationType(StudyViewFilterHelper studyViewFilterHelper, String alterationType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ public int getFilteredSamplesCount(StudyViewFilterContext studyViewFilterContext
return mapper.getFilteredSamplesCount(createStudyViewFilterHelper(studyViewFilterContext));
}

@Override
public int getFilteredPatientCount(StudyViewFilterContext studyViewFilterContext) {
return mapper.getFilteredPatientsCount(createStudyViewFilterHelper(studyViewFilterContext));
}

@Override
public Map<String, Set<String>> getMatchingGenePanelIds(StudyViewFilterContext studyViewFilterContext, String alterationType) {
return mapper.getMatchingGenePanelIds(createStudyViewFilterHelper(studyViewFilterContext), alterationType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,17 @@ public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap() {

@Override
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes) {
return generateDataCountItemsFromDataCounts(studyViewRepository.getClinicalDataCounts(createContext(studyViewFilter), filteredAttributes));
StudyViewFilterContext studyViewFilterContext = createContext(studyViewFilter);
List<ClinicalDataCount> dataCounts = studyViewRepository.getClinicalDataCounts(studyViewFilterContext, filteredAttributes);
List<ClinicalDataCountItem> clinicalDataCountItems = generateDataCountItemsFromDataCounts(dataCounts);

return calculateMissingNaCountsForClinicalDataCountItems(
clinicalDataCountItems,
filteredAttributes,
this.getClinicalAttributeDatatypeMap(),
studyViewRepository.getFilteredSamplesCount(studyViewFilterContext),
studyViewRepository.getFilteredPatientCount(studyViewFilterContext)
);
}

@Override
Expand Down Expand Up @@ -170,6 +180,76 @@ private List<ClinicalDataCountItem> generateDataCountItemsFromDataCounts(List<Cl
}).toList();
}

public static List<ClinicalDataCountItem> calculateMissingNaCountsForClinicalDataCountItems(
List<ClinicalDataCountItem> clinicalDataCountItems,
List<String> filteredAttributes,
Map<String, ClinicalDataType> clinicalAttributeDatatypeMap,
int filteredSamplesCount,
int filteredPatientsCount
) {
// Postprocess clinical data count items to adjust NA counts
List<ClinicalDataCountItem> combinedClinicalDataCountItems = new ArrayList<>();

Map<String, ClinicalDataCountItem> clinicalDataCountItemMap = clinicalDataCountItems
.stream()
.collect(Collectors.toMap(
ClinicalDataCountItem::getAttributeId,
item -> item
));

// go over all filtered attributes, not just attributes found in clinicalDataCountItems
for (String attributeId: filteredAttributes) {
ClinicalDataCountItem clinicalDataCountItem = clinicalDataCountItemMap.get(attributeId);
boolean isItemMissing = false;

if (clinicalDataCountItem == null) {
isItemMissing = true;
clinicalDataCountItem = new ClinicalDataCountItem();
clinicalDataCountItem.setAttributeId(attributeId);
clinicalDataCountItem.setCounts(new ArrayList<>());
}

Integer totalClinicalDataCount = clinicalDataCountItem
.getCounts()
.stream()
.map(ClinicalDataCount::getCount)
.reduce(0, Integer::sum);
// depending on clinical data type we either use filtered sample count or filtered patient count
int filteredCount = clinicalAttributeDatatypeMap.get(clinicalDataCountItem.getAttributeId()) == ClinicalDataType.SAMPLE ?
filteredSamplesCount: filteredPatientsCount;
int casesWithoutClinicalData = filteredCount - totalClinicalDataCount;

if (casesWithoutClinicalData > 0) {
// some of these attributes may be completely missing in clinicalDataCountItem
// in case the only attribute value is NA.
// we need to manually add those missing items to make sure we have NA counts.
if (isItemMissing) {
combinedClinicalDataCountItems.add(clinicalDataCountItem);
}

// find "NA" or else create a new one
ClinicalDataCount naClinicalDataCount = clinicalDataCountItem
.getCounts()
.stream()
.filter(c -> c.getValue().equals("NA"))
.findFirst()
.orElseGet(() -> {
// this should only happen when there are multiple studies
ClinicalDataCount count = new ClinicalDataCount();
count.setAttributeId(attributeId);
count.setValue("NA");
count.setCount(0);
return count;
});

naClinicalDataCount.setCount(naClinicalDataCount.getCount() + casesWithoutClinicalData);
}
}

combinedClinicalDataCountItems.addAll(clinicalDataCountItems);
return combinedClinicalDataCountItems;
}

public static List<CaseListDataCount> mergeCaseListCounts(List<CaseListDataCount> counts) {
Map<String, List<CaseListDataCount>> countsPerListType = counts.stream()
.collect((Collectors.groupingBy(CaseListDataCount::getValue)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,13 @@
FROM sample_derived
WHERE sample_unique_id IN (<include refid="sampleUniqueIdsFromStudyViewFilter"/>)
</sql>


<sql id="getTotalPatientCount">
SELECT count(distinct patient_unique_id) as count
FROM sample_derived sd
WHERE sample_unique_id IN (<include refid="sampleUniqueIdsFromStudyViewFilter"/>)
</sql>

<!-- for /genomic-data-counts/fetch - (returns GenomicDataCountItem objects) -->
<select id="getCNACounts" resultMap="GenomicDataCountItemResultMap">
<bind name="profileType" value="genomicDataFilters[0].profileType" />
Expand Down Expand Up @@ -396,7 +402,11 @@
<select id="getFilteredSamplesCount" resultType="int">
<include refid="getTotalSampleCount"/>
</select>


<select id="getFilteredPatientsCount" resultType="int">
<include refid="getTotalPatientCount"/>
</select>

<!-- Get Matching Gene Panel Ids for all Samples after study view filter -->
<select id="getMatchingGenePanelIds" resultType="org.cbioportal.model.GenePanelToGene">
SELECT
Expand Down
1 change: 0 additions & 1 deletion test/api-e2e/specs/clinical-data-filters.json
Original file line number Diff line number Diff line change
Expand Up @@ -3178,7 +3178,6 @@
{
"hash": -308561323,
"filterString": "Diagnosis Age: < x ≤ 4570Mutation Count: < x ≤ 2080Ethnicity Category:Hispanic Or Latino",
"skip": "Should be fixed by NA backend fix",
"data": {
"attributes": [
{
Expand Down

0 comments on commit 59d84c9

Please sign in to comment.