Skip to content

Commit

Permalink
Molecular-profile-counts-service
Browse files Browse the repository at this point in the history
* add unit tests
* fix multiple-study scenario

Update method name getGenomicDataCounts -> getMolecularProfileSampleCounts
  • Loading branch information
alisman committed Aug 9, 2024
1 parent d62c6e5 commit b74a07a
Show file tree
Hide file tree
Showing 12 changed files with 181 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public interface StudyViewRepository {

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter);
List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter);

List<ClinicalAttribute> getClinicalAttributes();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
public interface StudyViewMapper {
List<Sample> getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);
List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters);

List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter,
boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import org.cbioportal.web.parameter.StudyViewFilter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Repository;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -78,10 +80,33 @@ public List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilter studyViewFi
}

@Override
public List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter) {
public List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter) {
CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter);
return mapper.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter,
var counts = mapper.getMolecularProfileSampleCounts(studyViewFilter, categorizedClinicalDataCountFilter,
shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter));

Map<String, List<GenomicDataCount>> postsPerType = counts.stream()
.collect((Collectors.groupingBy(GenomicDataCount::getValue)));

// different cancer studies combined into one cohort will have separate molecular profiles
// of a given type (e.g. mutation). We need to merge the counts for these
// different profiles based on the type and choose a label
// this code just picks the first label, which assumes that the labels will match
// across studies.
List<GenomicDataCount> mergedOutput = new ArrayList<>();
for (Map.Entry<String,List<GenomicDataCount>> entry : postsPerType.entrySet()) {
var dc = new GenomicDataCount();
dc.setValue(entry.getKey());
// here just snatch the label of the first profile
dc.setLabel(entry.getValue().get(0).getLabel());
Integer sum = entry.getValue().stream()
.map(x -> x.getCount())
.collect(Collectors.summingInt(Integer::intValue));
dc.setCount(sum);
mergedOutput.add(dc);
}
return mergedOutput;

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public interface StudyViewColumnarService {

List<ClinicalData> getSampleClinicalData(StudyViewFilter studyViewFilter, List<String> attributeIds);

List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter);
List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter);

List<ClinicalEventTypeCount> getClinicalEventTypeCounts(StudyViewFilter studyViewFilter);
PatientTreatmentReport getPatientTreatmentReport(StudyViewFilter studyViewFilter);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ public List<AlterationCountByGene> getMutatedGenes(StudyViewFilter studyViewFilt
}

@Override
public List<GenomicDataCount> getGenomicDataCounts(StudyViewFilter studyViewFilter) {
return studyViewRepository.getGenomicDataCounts(studyViewFilter);
public List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilter studyViewFilter) {
return studyViewRepository.getMolecularProfileSampleCounts(studyViewFilter);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ public List<GenomicDataCount> getGenomicDataCounts(List<String> studyIds, List<S
.collect(Collectors.toMap(MolecularProfile::getStableId, Function.identity()));

// get gene panels
// not that if it's a patient level profile it counts distinct patient ids
// not sure if this is correct though. if it's patient level, wouldn't you want to
// count all the samples for that given patient?
Map<String, Integer> molecularProfileCaseCountSet = genePanelService
.fetchGenePanelDataInMultipleMolecularProfiles(molecularProfileSampleIdentifiers)
.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public ResponseEntity<List<GenomicDataCount>> fetchMolecularProfileSampleCounts(
)
{
return new ResponseEntity<List<GenomicDataCount>>(
studyViewColumnarService.getGenomicDataCounts(interceptedStudyViewFilter)
studyViewColumnarService.getMolecularProfileSampleCounts(interceptedStudyViewFilter)
, HttpStatus.OK);
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/resources/db-scripts/clickhouse/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -282,4 +282,5 @@ OPTIMIZE TABLE gene_panel_to_gene_derived;
OPTIMIZE TABLE sample_derived;
OPTIMIZE TABLE genomic_event_derived;
OPTIMIZE TABLE clinical_data_derived;
OPTIMIZE TABLE clinical_event_derived;
OPTIMIZE TABLE clinical_event_derived;

Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,20 @@
</select>

<!-- for /molecular-profile-sample-counts/fetch (returns GenomicDataCount) which will then be converted to clinicalDataCountItems -->
<select id="getGenomicDataCounts" resultType="org.cbioportal.model.GenomicDataCount">
<select id="getMolecularProfileSampleCounts" resultType="org.cbioportal.model.GenomicDataCount">
--we need to derive the alteration type from the stable_id by removing cancer study id
--this should probaby be refactored at some point but we need to maintain api interface
SELECT replaceOne(genetic_profile.stable_id, concat(sample_derived.cancer_study_identifier,'_'), '') AS value,
genetic_profile.stable_id,
genetic_profile.name AS label,
count(sample_profile.genetic_profile_id) AS count FROM sample_profile
LEFT JOIN sample_derived ON sample_profile.sample_id=sample_derived.internal_id
LEFT JOIN genetic_profile on sample_profile.genetic_profile_id = genetic_profile.genetic_profile_id
<where>
SELECT replaceOne(genetic_profile.stable_id, concat(sample_derived.cancer_study_identifier,'_'), '') AS value,
genetic_profile.name AS label,
count(sample_profile.genetic_profile_id) AS count FROM sample_profile
LEFT JOIN sample_derived ON sample_profile.sample_id=sample_derived.internal_id
LEFT JOIN genetic_profile on sample_profile.genetic_profile_id = genetic_profile.genetic_profile_id
<where>
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
</include>
</where>
GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier;
GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier
</select>


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public static void beforeAll() {

@ClassRule
public static final ClickHouseContainer clickhouseContainer =
new ClickHouseContainer("clickhouse/clickhouse-server:22.6")
new ClickHouseContainer("clickhouse/clickhouse-server:24.5")
.withUsername("cbio_user")
.withPassword("P@ssword1")
.withClasspathResourceMapping("clickhouse_cgds.sql", "/docker-entrypoint-initdb.d/a_schema.sql",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package org.cbioportal.persistence.mybatisclickhouse;

import org.cbioportal.model.AlterationFilter;
import org.cbioportal.model.MutationEventType;
import org.cbioportal.persistence.helper.AlterationFilterHelper;
import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig;
import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter;
import org.cbioportal.web.parameter.DataFilter;
import org.cbioportal.web.parameter.DataFilterValue;
import org.cbioportal.web.parameter.StudyViewFilter;
import org.cbioportal.web.parameter.filter.AndedPatientTreatmentFilters;
import org.cbioportal.web.parameter.filter.OredPatientTreatmentFilters;
import org.cbioportal.web.parameter.filter.PatientTreatmentFilter;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.autoconfigure.jdbc.AutoConfigureTestDatabase;
import org.springframework.boot.test.autoconfigure.orm.jpa.DataJpaTest;
import org.springframework.context.annotation.Import;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

@RunWith(SpringRunner.class)
@Import(MyBatisConfig.class)
@DataJpaTest
@AutoConfigureTestDatabase(replace= AutoConfigureTestDatabase.Replace.NONE)
@ContextConfiguration(initializers = AbstractTestcontainers.Initializer.class)
public class MolecularProfileCountTest extends AbstractTestcontainers {

private static final String STUDY_TCGA_PUB = "study_tcga_pub";
private static final String STUDY_ACC_TCGA = "acc_tcga";

@Autowired
private StudyViewMapper studyViewMapper;

@Test
public void getMolecularProfileCounts() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var profiles = new ArrayList<String>(Arrays.asList("mutations"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profiles));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var size = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, size);

}

@Test
public void getMolecularProfileCountsMultipleStudies() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB, STUDY_ACC_TCGA));

var profiles = new ArrayList<String>(Arrays.asList("mutations"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profiles));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var size = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, size);

}

@Test
public void getMolecularProfileCountsMultipleProfilesUnion() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var profiles = new ArrayList<String>(Arrays.asList("mutations","mrna"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profiles));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var sizeMutations = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(10, sizeMutations);

var sizeMrna = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mrna"))
.findFirst().get().getCount().intValue();
assertEquals(9, sizeMrna);

}

@Test
public void getMolecularProfileCountsMultipleProfilesIntersect() {
StudyViewFilter studyViewFilter = new StudyViewFilter();
studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB));

var profile1 = new ArrayList<String>(Arrays.asList("mutations"));
var profile2 = new ArrayList<String>(Arrays.asList("mrna"));
var profileGroups = new ArrayList<List<String>>(Arrays.asList(profile1, profile2));

studyViewFilter.setGenomicProfiles(profileGroups);

var molecularProfileCounts = studyViewMapper.getMolecularProfileSampleCounts(studyViewFilter,
CategorizedClinicalDataCountFilter.getBuilder().build(), false );

var sizeMutations = molecularProfileCounts.stream().filter(gc->gc.getValue().equals("mutations"))
.findFirst().get().getCount().intValue();
assertEquals(9, sizeMutations);



}




}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
Expand Down

0 comments on commit b74a07a

Please sign in to comment.