From 0dfb346f18443270023b462f744978168ce8de97 Mon Sep 17 00:00:00 2001 From: Onur Sumer Date: Thu, 11 Jul 2024 16:28:53 -0400 Subject: [PATCH 1/6] Simplify clinical data binning related SQL (#10823) * simplify clinical data binning related SQL * fix numericalClinicalDataCountFilter --- .../persistence/StudyViewRepository.java | 3 + .../mybatisclickhouse/StudyViewMapper.java | 6 - .../StudyViewMyBatisRepository.java | 20 ++ .../service/StudyViewColumnarService.java | 7 +- .../impl/StudyViewColumnarServiceImpl.java | 9 +- .../web/columnar/ClinicalDataBinner.java | 71 +++---- .../columnar/util/NewClinicalDataBinUtil.java | 76 +++---- .../web/util/ClinicalDataBinUtil.java | 70 +++++-- .../org/cbioportal/web/util/DataBinner.java | 185 +++++++++++++++--- .../StudyViewFilterMapper.xml | 37 ++-- .../mybatisclickhouse/StudyViewMapper.xml | 63 +++--- 11 files changed, 373 insertions(+), 174 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 98c1a17633e..696d8a4bdd5 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -8,6 +8,7 @@ import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; +import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.StudyViewFilter; import java.util.List; @@ -32,6 +33,8 @@ public interface StudyViewRepository { List getClinicalAttributes(); + Map getClinicalAttributeDatatypeMap(); + List getCaseListDataCounts(StudyViewFilter studyViewFilter); Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 71d0e76a1ab..a739590e5be 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -32,12 +32,6 @@ List getCnaGenes(StudyViewFilter studyViewFilter, Categor List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); - List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, - boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); - - List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, - boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues ); - List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index ebe94444794..0df436bff4f 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -12,6 +12,7 @@ import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; +import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -83,6 +84,25 @@ public List getClinicalAttributes() { return mapper.getClinicalAttributes(); } + @Override + public Map getClinicalAttributeDatatypeMap() { + if (clinicalAttributesMap.isEmpty()) { + buildClinicalAttributeNameMap(); + } + + Map attributeDatatypeMap = new HashMap<>(); + + clinicalAttributesMap + .get(ClinicalAttributeDataSource.SAMPLE) + .forEach(attribute -> attributeDatatypeMap.put(attribute.getAttrId(), ClinicalDataType.SAMPLE)); + + clinicalAttributesMap + .get(ClinicalAttributeDataSource.PATIENT) + .forEach(attribute -> attributeDatatypeMap.put(attribute.getAttrId(), ClinicalDataType.PATIENT)); + + return attributeDatatypeMap; + } + @Override public List getCaseListDataCounts(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index 0298b061377..7018f0e3e26 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -7,9 +7,11 @@ import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; +import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.StudyViewFilter; import java.util.List; +import java.util.Map; public interface StudyViewColumnarService { @@ -19,6 +21,8 @@ public interface StudyViewColumnarService { List getCnaGenes(StudyViewFilter interceptedStudyViewFilter); List getStructuralVariantGenes(StudyViewFilter studyViewFilter); + Map getClinicalAttributeDatatypeMap(); + List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes); List getCaseListDataCounts(StudyViewFilter studyViewFilter); @@ -28,6 +32,5 @@ public interface StudyViewColumnarService { List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds); List getGenomicDataCounts(StudyViewFilter studyViewFilter); - - + } diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 5202b2589c6..e1759dc47b7 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -11,12 +11,14 @@ import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.service.AlterationCountService; import org.cbioportal.service.StudyViewColumnarService; +import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; @Service @@ -58,6 +60,11 @@ public List getStructuralVariantGenes(StudyViewFilter stu return alterationCountService.getStructuralVariantGenes(studyViewFilter); } + @Override + public Map getClinicalAttributeDatatypeMap() { + return studyViewRepository.getClinicalAttributeDatatypeMap(); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { return studyViewRepository.getClinicalDataCounts(studyViewFilter, filteredAttributes) @@ -85,6 +92,4 @@ public List getPatientClinicalData(StudyViewFilter studyViewFilter public List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { return studyViewRepository.getSampleClinicalData(studyViewFilter, attributeIds); } - - } diff --git a/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java b/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java index cc91a840a3c..11b06a7edda 100644 --- a/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java +++ b/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java @@ -11,7 +11,6 @@ import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; @Component public class ClinicalDataBinner { @@ -27,6 +26,15 @@ public ClinicalDataBinner( this.dataBinner = dataBinner; } + private List convertCountsToData(List clinicalDataCounts) + { + return clinicalDataCounts + .stream() + .map(NewClinicalDataBinUtil::generateClinicalDataFromClinicalDataCount) + .flatMap(Collection::stream) + .toList(); + } + @Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()") public List fetchClinicalDataBinCounts( DataBinMethod dataBinMethod, @@ -43,53 +51,30 @@ public List fetchClinicalDataBinCounts( List attributeIds = attributes.stream().map(ClinicalDataBinFilter::getAttributeId).collect(Collectors.toList()); // a new StudyView filter to partially filter by study and sample ids only - StudyViewFilter partialFilter = new StudyViewFilter(); - partialFilter.setStudyIds(studyViewFilter.getStudyIds()); - partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers()); - - // filter only by study id and sample identifiers, ignore rest // we need this additional partial filter because we always need to know the bins generated for the initial state // which allows us to keep the number of bins and bin ranges consistent even if there are additional data filters. // we only want to update the counts for each bin, we don't want to regenerate the bins for the filtered data. // NOTE: partial filter is only needed when dataBinMethod == DataBinMethod.STATIC but that's always the case // for the frontend implementation. we can't really use dataBinMethod == DataBinMethod.DYNAMIC because of the // complication it brings to the frontend visualization and filtering - List unfilteredSamples = studyViewColumnarService.getFilteredSamples(partialFilter); - List filteredSamples = studyViewColumnarService.getFilteredSamples(studyViewFilter); - - // TODO make sure unique sample and patient keys don't need to be distinct - List unfilteredUniqueSampleKeys = unfilteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList()); - List filteredUniqueSampleKeys = filteredSamples.stream().map(Sample::getUniqueSampleKey).collect(Collectors.toList()); - List unfilteredUniquePatientKeys = unfilteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList()); - List filteredUniquePatientKeys = filteredSamples.stream().map(Sample::getUniquePatientKey).collect(Collectors.toList()); + StudyViewFilter partialFilter = new StudyViewFilter(); + partialFilter.setStudyIds(studyViewFilter.getStudyIds()); + partialFilter.setSampleIdentifiers(studyViewFilter.getSampleIdentifiers()); - // TODO make sure we don't need a distinction between sample vs patient attribute ids here - // ideally we shouldn't because we have patient clinical data separated from sample clinical data in clickhouse - // we need the clinical data for the partial filter in order to generate the bins for initial state - // we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data - List unfilteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(partialFilter, attributeIds); - List filteredClinicalDataForSamples = studyViewColumnarService.getSampleClinicalData(studyViewFilter, attributeIds); - List unfilteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(partialFilter, attributeIds); - List filteredClinicalDataForPatients = studyViewColumnarService.getPatientClinicalData(studyViewFilter, attributeIds); + // we use the filtered data to calculate the counts for each bin, we do not regenerate bins for the filtered data + List unfilteredClinicalDataCounts = studyViewColumnarService.getClinicalDataCounts(partialFilter, attributeIds); + List filteredClinicalDataCounts = studyViewColumnarService.getClinicalDataCounts(studyViewFilter, attributeIds); - Map attributeDatatypeMap = NewClinicalDataBinUtil.toAttributeDatatypeMap( - unfilteredClinicalDataForSamples.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()), - unfilteredClinicalDataForPatients.stream().map(ClinicalData::getAttrId).collect(Collectors.toList()), - Collections.emptyList() // TODO ignoring conflictingPatientAttributeIds for now + // TODO ignoring conflictingPatientAttributeIds for now + List unfilteredClinicalData = convertCountsToData( + unfilteredClinicalDataCounts.stream().flatMap(c -> c.getCounts().stream()).toList() + ); + List filteredClinicalData = convertCountsToData( + filteredClinicalDataCounts.stream().flatMap(c -> c.getCounts().stream()).toList() ); - List unfilteredClinicalData = Stream.of( - unfilteredClinicalDataForSamples, - unfilteredClinicalDataForPatients - // unfilteredClinicalDataForConflictingPatientAttributes /// TODO ignoring conflictingPatientAttributeIds for now - ).flatMap(Collection::stream).collect(Collectors.toList()); - - List filteredClinicalData = Stream.of( - filteredClinicalDataForSamples, - filteredClinicalDataForPatients - // filteredClinicalDataForConflictingPatientAttributes // TODO ignoring conflictingPatientAttributeIds for now - ).flatMap(Collection::stream).collect(Collectors.toList()); + Map attributeDatatypeMap = studyViewColumnarService.getClinicalAttributeDatatypeMap(); Map> unfilteredClinicalDataByAttributeId = unfilteredClinicalData.stream().collect(Collectors.groupingBy(Binnable::getAttrId)); @@ -100,17 +85,13 @@ public List fetchClinicalDataBinCounts( List clinicalDataBins = Collections.emptyList(); if (dataBinMethod == DataBinMethod.STATIC) { - if (!unfilteredSamples.isEmpty() && !unfilteredClinicalData.isEmpty()) { + if (!unfilteredClinicalData.isEmpty()) { clinicalDataBins = NewClinicalDataBinUtil.calculateStaticDataBins( dataBinner, attributes, attributeDatatypeMap, unfilteredClinicalDataByAttributeId, - filteredClinicalDataByAttributeId, - unfilteredUniqueSampleKeys, - unfilteredUniquePatientKeys, - filteredUniqueSampleKeys, - filteredUniquePatientKeys + filteredClinicalDataByAttributeId ); } } @@ -123,9 +104,7 @@ public List fetchClinicalDataBinCounts( dataBinner, attributes, attributeDatatypeMap, - filteredClinicalDataByAttributeId, - filteredUniqueSampleKeys, - filteredUniquePatientKeys + filteredClinicalDataByAttributeId ); } } diff --git a/src/main/java/org/cbioportal/web/columnar/util/NewClinicalDataBinUtil.java b/src/main/java/org/cbioportal/web/columnar/util/NewClinicalDataBinUtil.java index f5434e3df2c..fdc7afcd4b8 100644 --- a/src/main/java/org/cbioportal/web/columnar/util/NewClinicalDataBinUtil.java +++ b/src/main/java/org/cbioportal/web/columnar/util/NewClinicalDataBinUtil.java @@ -1,7 +1,9 @@ package org.cbioportal.web.columnar.util; import org.cbioportal.model.Binnable; +import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataBin; +import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.DataBin; import org.cbioportal.web.parameter.ClinicalDataBinCountFilter; import org.cbioportal.web.parameter.ClinicalDataBinFilter; @@ -15,7 +17,6 @@ import java.util.Map; import static java.util.Collections.emptyList; -import static java.util.stream.Collectors.toList; public class NewClinicalDataBinUtil { public static StudyViewFilter removeSelfFromFilter(ClinicalDataBinCountFilter dataBinCountFilter) { @@ -70,33 +71,21 @@ public static List calculateStaticDataBins( List attributes, Map attributeDatatypeMap, Map> unfilteredClinicalDataByAttributeId, - Map> filteredClinicalDataByAttributeId, - List unfilteredUniqueSampleKeys, - List unfilteredUniquePatientKeys, - List filteredUniqueSampleKeys, - List filteredUniquePatientKeys + Map> filteredClinicalDataByAttributeId ) { List clinicalDataBins = new ArrayList<>(); for (ClinicalDataBinFilter attribute : attributes) { if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) { - ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId()); - List filteredIds = clinicalDataType == ClinicalDataType.PATIENT ? filteredUniquePatientKeys - : filteredUniqueSampleKeys; - List unfilteredIds = clinicalDataType == ClinicalDataType.PATIENT - ? unfilteredUniquePatientKeys - : unfilteredUniqueSampleKeys; - List dataBins = dataBinner - .calculateClinicalDataBins(attribute, clinicalDataType, - filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), - emptyList()), - unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), - emptyList()), - filteredIds, unfilteredIds) + .calculateClinicalDataBins( + attribute, + filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList()), + unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList()) + ) .stream() .map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin)) - .collect(toList()); + .toList(); clinicalDataBins.addAll(dataBins); } @@ -109,9 +98,7 @@ public static List calculateDynamicDataBins( DataBinner dataBinner, List attributes, Map attributeDatatypeMap, - Map> filteredClinicalDataByAttributeId, - List filteredUniqueSampleKeys, - List filteredUniquePatientKeys + Map> filteredClinicalDataByAttributeId ) { List clinicalDataBins = new ArrayList<>(); @@ -119,23 +106,46 @@ public static List calculateDynamicDataBins( // if there is clinical data for requested attribute if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) { - ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId()); - List filteredIds = clinicalDataType == ClinicalDataType.PATIENT - ? filteredUniquePatientKeys - : filteredUniqueSampleKeys; - List dataBins = dataBinner - .calculateDataBins(attribute, clinicalDataType, - filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), - emptyList()), - filteredIds) + .calculateDataBins( + attribute, + filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), emptyList()) + ) .stream() .map(dataBin -> dataBinToClinicalDataBin(attribute, dataBin)) - .collect(toList()); + .toList(); clinicalDataBins.addAll(dataBins); } } return clinicalDataBins; } + + /** + * Generate a list of ClinicalData from the given data count instance. + * Size of the generated list is equal to 'dataCount.count', + * and each ClinicalData in the list contains the same value 'dataCount.value' + * + * This method improves the performance of the data binning because it allows us to fetch only + * the clinical data counts data which is a lot more compact and faster to generated than the actual clinical data. + * We only need the attribute id and the value of the clinical data to generate data bins. + * Constructing the clinical data in memory by using clinical data counts significantly improves the performance, + * and it also allows us to use the exact same SQL used by the clinical data counts endpoint. + * + * @param dataCount ClinicalDataCount instance containing the count and the value + * @return a list of ClinicalData with size 'dataCount.count' and value 'dataCount.value' + */ + public static List generateClinicalDataFromClinicalDataCount(ClinicalDataCount dataCount) + { + List data = new ArrayList<>(dataCount.getCount()); + + for (int i=0; i < dataCount.getCount(); i++) { + ClinicalData d = new ClinicalData(); + d.setAttrId(dataCount.getAttributeId()); + d.setAttrValue(dataCount.getValue()); + data.add(d); + } + + return data; + } } diff --git a/src/main/java/org/cbioportal/web/util/ClinicalDataBinUtil.java b/src/main/java/org/cbioportal/web/util/ClinicalDataBinUtil.java index abe02b5388a..bf90b2de08b 100644 --- a/src/main/java/org/cbioportal/web/util/ClinicalDataBinUtil.java +++ b/src/main/java/org/cbioportal/web/util/ClinicalDataBinUtil.java @@ -310,17 +310,33 @@ public List calculateStaticDataBins( List filteredUniqueSampleKeys, List filteredUniquePatientKeys ) { - return NewClinicalDataBinUtil.calculateStaticDataBins( - dataBinner, - attributes, - attributeDatatypeMap, - unfilteredClinicalDataByAttributeId, - filteredClinicalDataByAttributeId, - unfilteredUniqueSampleKeys, - unfilteredUniquePatientKeys, - filteredUniqueSampleKeys, - filteredUniquePatientKeys - ); + List clinicalDataBins = new ArrayList<>(); + + for (ClinicalDataBinFilter attribute : attributes) { + if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) { + ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId()); + List filteredIds = clinicalDataType == ClinicalDataType.PATIENT ? filteredUniquePatientKeys + : filteredUniqueSampleKeys; + List unfilteredIds = clinicalDataType == ClinicalDataType.PATIENT + ? unfilteredUniquePatientKeys + : unfilteredUniqueSampleKeys; + + List dataBins = dataBinner + .calculateClinicalDataBins(attribute, clinicalDataType, + filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), + emptyList()), + unfilteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), + emptyList()), + filteredIds, unfilteredIds) + .stream() + .map(dataBin -> NewClinicalDataBinUtil.dataBinToClinicalDataBin(attribute, dataBin)) + .toList(); + + clinicalDataBins.addAll(dataBins); + } + } + + return clinicalDataBins; } public List calculateDynamicDataBins( @@ -330,14 +346,30 @@ public List calculateDynamicDataBins( List filteredUniqueSampleKeys, List filteredUniquePatientKeys ) { - return NewClinicalDataBinUtil.calculateDynamicDataBins( - dataBinner, - attributes, - attributeDatatypeMap, - filteredClinicalDataByAttributeId, - filteredUniqueSampleKeys, - filteredUniquePatientKeys - ); + List clinicalDataBins = new ArrayList<>(); + + for (ClinicalDataBinFilter attribute : attributes) { + + // if there is clinical data for requested attribute + if (attributeDatatypeMap.containsKey(attribute.getAttributeId())) { + ClinicalDataType clinicalDataType = attributeDatatypeMap.get(attribute.getAttributeId()); + List filteredIds = clinicalDataType == ClinicalDataType.PATIENT + ? filteredUniquePatientKeys + : filteredUniqueSampleKeys; + + List dataBins = dataBinner + .calculateDataBins(attribute, clinicalDataType, + filteredClinicalDataByAttributeId.getOrDefault(attribute.getAttributeId(), + emptyList()), + filteredIds) + .stream() + .map(dataBin -> NewClinicalDataBinUtil.dataBinToClinicalDataBin(attribute, dataBin)) + .toList(); + clinicalDataBins.addAll(dataBins); + } + } + + return clinicalDataBins; } private Map toAttributeDatatypeMap(BinningIds binningIds) { return toAttributeDatatypeMap( diff --git a/src/main/java/org/cbioportal/web/util/DataBinner.java b/src/main/java/org/cbioportal/web/util/DataBinner.java index 6002f2df9b6..2807f6c1211 100644 --- a/src/main/java/org/cbioportal/web/util/DataBinner.java +++ b/src/main/java/org/cbioportal/web/util/DataBinner.java @@ -31,12 +31,20 @@ public class DataBinner { @Autowired private LogScaleDataBinner logScaleDataBinner; - public List calculateClinicalDataBins(T dataBinFilter, + /** + * This method should only be invoked by legacy endpoints because it requires sample/patient ids. + * + * @deprecated + */ + @Deprecated + public List calculateClinicalDataBins( + T dataBinFilter, ClinicalDataType clinicalDataType, List filteredClinicalData, List unfilteredClinicalData, List filteredIds, - List unfilteredIds) { + List unfilteredIds + ) { // calculate data bins for unfiltered clinical data List dataBins = calculateDataBins( dataBinFilter, clinicalDataType, unfilteredClinicalData, unfilteredIds); @@ -45,10 +53,52 @@ public List calculateClinicalDataBins(T dataB return recalcBinCount(dataBins, clinicalDataType, filteredClinicalData, filteredIds); } - public List recalcBinCount(List dataBins, - ClinicalDataType clinicalDataType, - List clinicalData, - List ids) { + public List calculateClinicalDataBins( + T dataBinFilter, + List filteredClinicalData, + List unfilteredClinicalData + ) { + // calculate data bins for unfiltered clinical data + // we need this additional calculation to know the bins generated for the initial state. + // this allows us to keep the number of bins and bin ranges consistent. + // we only want to update the counts for each bin, we don't want to regenerate the bins for the filtered data. + List dataBins = calculateDataBins( + dataBinFilter, + unfilteredClinicalData + ); + + // recount + return recalcBinCount( + dataBins, + filteredClinicalData, + countNAs(filteredClinicalData) + ); + } + + /** + * This method should only be invoked by legacy endpoints because it requires sample/patient ids. + * + * @deprecated + */ + @Deprecated + public List recalcBinCount( + List dataBins, + ClinicalDataType clinicalDataType, + List clinicalData, + List caseIds + ) { + return recalcBinCount( + dataBins, + clinicalData, + countNAs(clinicalData, clinicalDataType, caseIds) + ); + } + + public List recalcBinCount( + List dataBins, + List clinicalData, + Long naCount + ) { List numericalValues = clinicalData == null ? Collections.emptyList() : filterNumericalValues(clinicalData); List nonNumericalValues = clinicalData == null ? @@ -83,33 +133,77 @@ public List recalcBinCount(List dataBins, } } if ("NA".equalsIgnoreCase(dataBin.getSpecialValue())) { - dataBin.setCount(countNAs(clinicalData, clinicalDataType, ids).intValue()); + dataBin.setCount(naCount.intValue()); } } return dataBins; } + /** + * This method should only be invoked by legacy endpoints because it requires sample/patient ids. + * + * @deprecated + */ + @Deprecated public List calculateDataBins( T dataBinFilter, ClinicalDataType clinicalDataType, List clinicalData, - List ids + List caseIds ) { return calculateDataBins( dataBinFilter, clinicalDataType, clinicalData, - ids, + caseIds, DEFAULT_DISTINCT_VALUE_THRESHOLD ); } + public List calculateDataBins( + T dataBinFilter, + List clinicalData + ) { + return calculateDataBins( + dataBinFilter, + clinicalData, + DEFAULT_DISTINCT_VALUE_THRESHOLD + ); + } + + public List calculateDataBins( + T dataBinFilter, + List clinicalData, + Integer distinctValueThreshold + ) { + DataBin naDataBin = calcNaDataBin(clinicalData); + + return calculateDataBins(dataBinFilter, clinicalData, naDataBin, distinctValueThreshold); + } + + /** + * This method should only be invoked by legacy endpoints because it requires sample/patient ids. + * + * @deprecated + */ + @Deprecated public List calculateDataBins( T dataBinFilter, ClinicalDataType clinicalDataType, List clinicalData, - List ids, + List caseIds, + Integer distinctValueThreshold + ) { + DataBin naDataBin = calcNaDataBin(clinicalData, clinicalDataType, caseIds); + + return calculateDataBins(dataBinFilter, clinicalData, naDataBin, distinctValueThreshold); + } + + public List calculateDataBins( + T dataBinFilter, + List clinicalData, + DataBin naDataBin, Integer distinctValueThreshold ) { boolean numericalOnly = false; @@ -167,7 +261,6 @@ dataBins, filterNumericalValues(clinicalData), filterSpecialRanges(clinicalData) dataBins.addAll(calcNonNumericalClinicalDataBins(clinicalData)); - DataBin naDataBin = calcNaDataBin(clinicalData, clinicalDataType, ids); if (!naDataBin.getCount().equals(0)) { dataBins.add(naDataBin); } @@ -514,32 +607,58 @@ public List adjustCustomBins( * NA count is: Number of clinical data marked actually as "NA" + Number of patients/samples without clinical data. * Assuming that clinical data is for a single attribute. * + * This method should only be invoked by legacy endpoints because it requires sample/patient ids. + * * @param clinicalData clinical data list for a single attribute - * @param ids sample/patient ids + * @param caseIds sample/patient ids * * @return 'NA' clinical data count as a DataBin instance + * @deprecated */ - public DataBin calcNaDataBin(List clinicalData, - ClinicalDataType clinicalDataType, - List ids) { - DataBin bin = new DataBin(); - - bin.setSpecialValue("NA"); + @Deprecated + public DataBin calcNaDataBin( + List clinicalData, + ClinicalDataType clinicalDataType, + List caseIds + ) { + DataBin bin = initNaDataBin(); + bin.setCount(countNAs(clinicalData, clinicalDataType, caseIds).intValue()); - Long count = countNAs(clinicalData, clinicalDataType, ids); + return bin; + } - bin.setCount(count.intValue()); + /** + * This function assumes that all the NA values are already in the provided clinical data list. + * + * @param clinicalData clinical data list for a single attribute + * + * @return 'NA' clinical data count as a DataBin instance + */ + public DataBin calcNaDataBin( + List clinicalData + ) { + DataBin bin = initNaDataBin(); + bin.setCount(countNAs(clinicalData).intValue()); return bin; } + + public DataBin initNaDataBin() { + DataBin bin = new DataBin(); + bin.setSpecialValue("NA"); + return bin; + } - public Long countNAs(List clinicalData, ClinicalDataType clinicalDataType, List ids) { - // Calculate number of clinical data marked actually as "NA", "NAN", or "N/A" - - Long count = clinicalData == null ? 0 : - clinicalData.stream() - .filter(c -> dataBinHelper.isNA(c.getAttrValue())) - .count(); + /** + * This method should only be invoked by legacy endpoints because it requires sample/patient ids + * + * @deprecated + */ + @Deprecated + public Long countNAs(List clinicalData, ClinicalDataType clinicalDataType, List caseIds) { + // Calculate the number of clinical data marked actually as "NA", "NAN", or "N/A" + + Long count = countNAs(clinicalData); // Calculate number of patients/samples without clinical data @@ -555,7 +674,7 @@ public Long countNAs(List clinicalData, ClinicalDataType clinicalDataT uniqueClinicalDataIds = Collections.emptySet(); } - Set uniqueInputIds = new HashSet<>(ids); + Set uniqueInputIds = new HashSet<>(caseIds); // remove the ids with existing clinical data, // size of the difference (of two sets) is the count we need @@ -565,6 +684,16 @@ public Long countNAs(List clinicalData, ClinicalDataType clinicalDataT return count; } + /** + * Calculate number of clinical data marked actually as "NA", "NAN", or "N/A" + */ + public Long countNAs(List clinicalData) { + return clinicalData == null ? 0 : + clinicalData.stream() + .filter(c -> dataBinHelper.isNA(c.getAttrValue())) + .count(); + } + private String computeUniqueCaseId(Binnable clinicalData, ClinicalDataType clinicalDataType) { return clinicalData.getStudyId() + (clinicalDataType == ClinicalDataType.PATIENT ? clinicalData.getPatientId() diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index 3be2106a679..ca63efa765b 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -136,26 +136,31 @@ SELECT ${unique_id} FROM ${table_name} WHERE attribute_name = '${clinicalDataFilter.attributeId}' AND - type='${type}' AND - (attribute_value = '' OR match(attribute_value, '^[\d\.]+$')) + type='${type}' - AND attribute_value = '' + AND + + + + + + AND match(attribute_value, '^[\d\.]+$') + + + AND abs(minus(cast(attribute_value as float), ${dataFilterValue.start})) < exp(-11) + + + + AND cast(attribute_value as float) > ${dataFilterValue.start} + + + AND cast(attribute_value as float) <= ${dataFilterValue.end} + + + - - - AND abs(minus(cast(attribute_value as float), ${dataFilterValue.start})) < exp(-11) - - - - AND cast(attribute_value as float) > ${dataFilterValue.start} - - - AND cast(attribute_value as float) <= ${dataFilterValue.end} - - - diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index b907d2a9f16..040ccedfc90 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -74,7 +74,6 @@ GROUP BY hugo_gene_symbol; - - - + - + - - + SELECT attribute_name as attributeId, - if(attribute_value='', 'NA', attribute_value) AS value, + + + + , count(value) as count FROM clinical_data_derived @@ -199,10 +196,13 @@ value - + SELECT attribute_name as attributeId, - if(attribute_value='', 'NA', attribute_value) AS value, + + + + , count(value) as count FROM clinical_data_derived @@ -331,4 +331,23 @@ AND patient_unique_id IN () + + + + ${attribute_value}='' + OR upperUTF8(${attribute_value})='NA' + OR upperUTF8(${attribute_value})='NAN' + OR upperUTF8(${attribute_value})='N/A' + + + + if( + + + , + 'NA', + ${attribute_value} + ) + AS ${as_value} + From 6df137f734f86090db9796ae8c3ece7457dd47c9 Mon Sep 17 00:00:00 2001 From: alisman Date: Thu, 18 Jul 2024 11:54:08 -0400 Subject: [PATCH 2/6] Fix patient_derived table population (patient stable id was getting base64) (#10898) --- src/main/resources/db-scripts/clickhouse/clickhouse.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index fcbe2fe1d01..83c517f15c7 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -67,8 +67,8 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_uniqu base64Encode(sample.stable_id) AS sample_unique_id_base64, sample.stable_id AS sample_stable_id, concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, - p.stable_id AS patient_stable_id, base64Encode(p.stable_id) AS patient_unique_id_base64, + p.stable_id AS patient_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, sample.internal_id AS internal_id FROM sample From 3455f97967abf741bc4faa87652b3139c6c9647a Mon Sep 17 00:00:00 2001 From: alisman Date: Thu, 18 Jul 2024 11:56:21 -0400 Subject: [PATCH 3/6] Always load frontend over https (localdev mode) (#10901) --- src/main/resources/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/templates/index.html b/src/main/resources/templates/index.html index 34cb24cc2f3..3b828ef82bd 100644 --- a/src/main/resources/templates/index.html +++ b/src/main/resources/templates/index.html @@ -32,7 +32,7 @@ window.netlify = localStorage.netlify; if (window.localdev || window.localdist) { - window.frontendConfig.frontendUrl = "//localhost:3000/" + window.frontendConfig.frontendUrl = "https://localhost:3000/" localStorage.setItem("e2etest", "true"); } else if (window.netlify) { window.frontendConfig.frontendUrl = ['//',localStorage.netlify,'.netlify.app','/'].join(''); From 77bf77fa5725d11dc15c857265e6012fb8e76497 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Fri, 19 Jul 2024 11:21:48 -0400 Subject: [PATCH 4/6] Rfc80/clinical event timeline endpoint (#10899) * Add Clinical Events Timeline counts endpoint * Add comments for StudyViewFilter --- .../persistence/StudyViewRepository.java | 3 + .../mybatisclickhouse/StudyViewMapper.java | 3 + .../StudyViewMyBatisRepository.java | 41 ++-- .../service/StudyViewColumnarService.java | 3 + .../impl/StudyViewColumnarServiceImpl.java | 8 +- .../StudyViewColumnStoreController.java | 27 ++- .../db-scripts/clickhouse/clickhouse.sql | 29 +++ .../StudyViewFilterMapper.xml | 18 ++ .../mybatisclickhouse/StudyViewMapper.xml | 183 +++++++++++------- .../StudyViewMapperTest.java | 38 +++- src/test/resources/clickhouse_cgds.sql | 6 +- src/test/resources/clickhouse_data.sql | 2 +- 12 files changed, 271 insertions(+), 90 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 696d8a4bdd5..1c93f3ef57d 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -5,6 +5,7 @@ import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; @@ -44,4 +45,6 @@ public interface StudyViewRepository { Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, String alterationType); int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, String alterationType); + + List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index a739590e5be..0e762a1d07a 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -6,6 +6,7 @@ import org.cbioportal.model.ClinicalAttribute; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; @@ -51,4 +52,6 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C List getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getTotalProfiledCountByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + + List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 0df436bff4f..a0709db0e78 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -4,6 +4,7 @@ import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; @@ -39,14 +40,14 @@ public StudyViewMyBatisRepository(StudyViewMapper mapper) { @Override public List getFilteredSamples(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return mapper.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + return mapper.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter)); } @Override public List getMutatedGenes(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @@ -54,7 +55,7 @@ public List getMutatedGenes(StudyViewFilter studyViewFilt public List getCnaGenes(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @@ -62,21 +63,23 @@ public List getCnaGenes(StudyViewFilter studyViewFilter) public List getStructuralVariantGenes(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return mapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + return mapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES ); } @Override public List getGenomicDataCounts(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return mapper.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + return mapper.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter)); } @Override @@ -106,46 +109,47 @@ public Map getClinicalAttributeDatatypeMap() { @Override public List getCaseListDataCounts(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return mapper.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + return mapper.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter)); } - private boolean shouldApplyPatientIdFilters(CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - return categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty() + private boolean shouldApplyPatientIdFilters(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return studyViewFilter.getClinicalEventFilters() != null && !studyViewFilter.getClinicalEventFilters().isEmpty() + || categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty() || categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters().isEmpty(); } @Override public List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return mapper.getSampleClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds); + return mapper.getSampleClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), attributeIds); } @Override public List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return mapper.getPatientClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds); + return mapper.getPatientClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), attributeIds); } @Override public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); } @Override public int getFilteredSamplesCount(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter)); } @Override public Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, String alterationType) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType) + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType) .stream() .collect(Collectors.groupingBy(GenePanelToGene::getHugoGeneSymbol, Collectors.mapping(GenePanelToGene::getGenePanelId, Collectors.toSet()))); @@ -155,7 +159,14 @@ public Map> getMatchingGenePanelIds(StudyViewFilter studyVie public int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, String alterationType) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getTotalProfiledCountByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); + } + + @Override + public List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return mapper.getClinicalEventTypeCounts(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter)); } private void buildClinicalAttributeNameMap() { diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index 7018f0e3e26..8ae5ea20608 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -4,6 +4,7 @@ import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCountItem; +import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; @@ -32,5 +33,7 @@ public interface StudyViewColumnarService { List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds); List getGenomicDataCounts(StudyViewFilter studyViewFilter); + + List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index e1759dc47b7..b73ca064803 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -5,6 +5,7 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.ClinicalDataCountItem; +import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; @@ -50,7 +51,12 @@ public List getMutatedGenes(StudyViewFilter studyViewFilt public List getGenomicDataCounts(StudyViewFilter studyViewFilter) { return studyViewRepository.getGenomicDataCounts(studyViewFilter); } - + + @Override + public List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter) { + return studyViewRepository.getClinicalEventTypeCounts(studyViewFilter); + } + public List getCnaGenes(StudyViewFilter studyViewFilter) { return alterationCountService.getCnaGenes(studyViewFilter); } diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index 6393b061164..3ec1ccf90a6 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -13,9 +13,10 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataBin; import org.cbioportal.model.ClinicalDataCountItem; +import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.ClinicalViolinPlotData; -import org.cbioportal.model.DensityPlotData; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.DensityPlotData; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.service.ClinicalDataDensityPlotService; @@ -325,4 +326,28 @@ public ResponseEntity fetchClinicalDataViolinPlots( return new ResponseEntity<>(result, HttpStatus.OK); } + + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") + @PostMapping(value = "/column-store/clinical-event-type-counts/fetch", consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(description = "Get Counts of Clinical Event Types by Study View Filter") + @ApiResponse(responseCode = "200", description = "OK", + content = @Content(array = @ArraySchema(schema = @Schema(implementation = ClinicalEventTypeCount.class)))) + public ResponseEntity> getClinicalEventTypeCounts( + @Parameter(required = true, description = "Study view filter") + @Valid + @RequestBody(required = false) + StudyViewFilter studyViewFilter, + + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface + @RequestAttribute(required = false, value = "involvedCancerStudies") + Collection involvedCancerStudies, + + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above. + @Valid + @RequestAttribute(required = false, value = "interceptedStudyViewFilter") + StudyViewFilter interceptedStudyViewFilter + ) { + return new ResponseEntity<>(studyViewColumnarService.getClinicalEventTypeCounts(interceptedStudyViewFilter), HttpStatus.OK); + } } diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index 83c517f15c7..5cad1670464 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS gene_panel_to_gene_derived; DROP TABLE IF EXISTS sample_derived; DROP TABLE IF EXISTS genomic_event_derived; DROP TABLE IF EXISTS clinical_data_derived; +DROP TABLE IF EXISTS clinical_event_derived; CREATE TABLE sample_to_gene_panel_derived @@ -249,8 +250,36 @@ FROM patient AS p ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) WHERE cam.patient_attribute = 1; +CREATE TABLE clinical_event_derived +( + patient_unique_id String, + key String, + value String, + start_date Int32, + stop_date Int32 DEFAULT 0, + event_type LowCardinality(String), + cancer_study_identifier LowCardinality(String) +) +ENGINE = MergeTree + ORDER BY (event_type, patient_unique_id, cancer_study_identifier); + +INSERT INTO clinical_event_derived +SELECT + concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, + ced.key AS key, + ced.value AS value, + ce.start_date AS start_date, + ifNull(ce.stop_date, 0) AS stop_date, + ce.event_type AS event_type, + cs.cancer_study_identifier +FROM clinical_event ce + LEFT JOIN clinical_event_data ced ON ce.clinical_event_id = ced.clinical_event_id + INNER JOIN patient p ON ce.patient_id = p.internal_id + INNER JOIN cancer_study cs ON p.cancer_study_id = cs.cancer_study_id; + OPTIMIZE TABLE sample_to_gene_panel_derived; OPTIMIZE TABLE gene_panel_to_gene_derived; OPTIMIZE TABLE sample_derived; OPTIMIZE TABLE genomic_event_derived; OPTIMIZE TABLE clinical_data_derived; +OPTIMIZE TABLE clinical_event_derived; \ No newline at end of file diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index ca63efa765b..44d3cd5523c 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -109,6 +109,20 @@ + + + SELECT patient_unique_id + FROM clinical_event_derived + + + event_type = '${dataFilterValue.value}' + + + + + @@ -129,6 +143,10 @@ + + + + diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 040ccedfc90..576c5b7378b 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -13,13 +13,10 @@ patient_unique_id_base64 as uniquePatientKey FROM sample_derived - sample_unique_id IN ( ) - - - AND patient_unique_id IN () - + + + - ORDER BY sample_stable_id ASC; @@ -32,7 +29,9 @@ FROM genomic_event_derived variant_type = 'mutation' AND - + + + @@ -52,7 +51,9 @@ FROM genomic_event_derived variant_type = 'cna' AND - + + + @@ -69,7 +70,9 @@ FROM genomic_event_derived variant_type = 'structural_variant' AND - + + + GROUP BY hugo_gene_symbol; @@ -83,12 +86,9 @@ cancer_study_identifier as studyId FROM clinical_data_derived - sample_unique_id IN ( - - - INTERSECT - - ) + + + AND attribute_name IN @@ -107,12 +107,9 @@ cancer_study_identifier as studyId FROM clinical_data_derived - patient_unique_id IN ( - - - INTERSECT - - ) + + + AND attribute_name IN @@ -141,7 +138,9 @@ LEFT JOIN sample_derived ON sample_profile.sample_id=sample_derived.internal_id LEFT JOIN genetic_profile on sample_profile.genetic_profile_id = genetic_profile.genetic_profile_id - sample_unique_id IN ( ) + + + GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier; @@ -157,7 +156,9 @@ LEFT JOIN sample_derived s ON sll.sample_id=s.internal_id LEFT JOIN sample_list sl on sll.list_id=sl.list_id - sample_unique_id IN ( ) + + + GROUP BY s.cancer_study_identifier, sl.stable_id, sl.name; @@ -174,13 +175,9 @@ FROM clinical_data_derived type='sample' AND - sample_unique_id IN ( - - INTERSECT - SELECT sample_unique_id from sample_derived WHERE sample_derived.patient_unique_id IN - () - - ) + + + AND UPPER(value) NOT IN @@ -207,11 +204,9 @@ FROM clinical_data_derived type='patient' AND - patient_unique_id IN ( - - INTERSECT - - ) + + + AND UPPER(value) NOT IN @@ -227,22 +222,6 @@ value - - SELECT sample_unique_id - FROM sample_derived - - patient_unique_id IN () - - - - - SELECT patient_unique_id - FROM sample_derived - - sample_unique_id IN () - - - @@ -282,11 +259,9 @@ stgp.alteration_type = '${alterationType}' AND - sample_unique_id IN ( - - INTERSECT - - ) + + + @@ -296,7 +271,9 @@ COUNT(*) FROM sample_derived - + + + @@ -313,24 +290,94 @@ alteration_type = '${alterationType}' AND - sample_unique_id IN ( - - INTERSECT - - ) + + + ) GROUP BY gene, gene_panel_id; + + + + + + + + + + + sample_unique_id IN ( ) + + AND patient_unique_id IN () + + + + + + + + patient_unique_id in ( + SELECT patient_unique_id + FROM sample_derived + + sample_unique_id IN () + + + + INTERSECT + + + ) + + + + sample_unique_id IN ( ) - AND patient_unique_id IN () + INTERSECT + + + + SELECT sample_unique_id + FROM sample_derived + + patient_unique_id IN () + + diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java index dfde0006df9..f8019f72e54 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java @@ -5,6 +5,8 @@ import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.persistence.mybatisclickhouse.config.MyBatisConfig; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; +import org.cbioportal.web.parameter.DataFilter; +import org.cbioportal.web.parameter.DataFilterValue; import org.cbioportal.web.parameter.StudyViewFilter; import org.junit.Test; import org.junit.runner.RunWith; @@ -22,6 +24,8 @@ import java.util.Objects; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; @RunWith(SpringRunner.class) @Import(MyBatisConfig.class) @@ -112,7 +116,39 @@ public void getTotalProfiledCountsByGene() { var akt2TotalProfiledCounts = totalProfiledCountsMap.get("akt2"); assertEquals(4, akt2TotalProfiledCounts.getNumberOfProfiledCases().intValue()); - } + + @Test + public void getClinicalEventTypeCounts() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB)); + + var clinicalEventTypeCounts = studyViewMapper.getClinicalEventTypeCounts(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false); + + assertEquals(4, clinicalEventTypeCounts.size()); + + var clinicalEventTypeCountOptional = clinicalEventTypeCounts.stream().filter(ce -> ce.getEventType().equals("treatment")) + .findFirst(); + + assertTrue(clinicalEventTypeCountOptional.isPresent()); + assertEquals(1, clinicalEventTypeCountOptional.get().getCount().intValue()); + + DataFilter dataFilter = new DataFilter(); + DataFilterValue dataFilterValue = new DataFilterValue(); + dataFilterValue.setValue("treatment"); + dataFilter.setValues(List.of(dataFilterValue)); + studyViewFilter.setClinicalEventFilters(List.of(dataFilter)); + + clinicalEventTypeCounts = studyViewMapper.getClinicalEventTypeCounts(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), true); + + assertEquals(2, clinicalEventTypeCounts.size()); + + clinicalEventTypeCountOptional = clinicalEventTypeCounts.stream().filter(ce -> ce.getEventType().equals("status")) + .findFirst(); + + assertFalse(clinicalEventTypeCountOptional.isPresent()); + } } \ No newline at end of file diff --git a/src/test/resources/clickhouse_cgds.sql b/src/test/resources/clickhouse_cgds.sql index b5a3f2ed8f4..0b7945d4cfa 100644 --- a/src/test/resources/clickhouse_cgds.sql +++ b/src/test/resources/clickhouse_cgds.sql @@ -543,10 +543,10 @@ CREATE TABLE cosmic_mutation CREATE TABLE clinical_event ( clinical_event_id Int32, - patient_id Nullable(Int32), + patient_id Int, event_type String, - start_date Nullable(DateTime), - stop_date Nullable(DateTime) + start_date Int32 default 0, + stop_date Int32 default 0 ) ENGINE = MergeTree() ORDER BY clinical_event_id; -- -------------------------------------------------------- diff --git a/src/test/resources/clickhouse_data.sql b/src/test/resources/clickhouse_data.sql index bbc0fb76143..16193741624 100644 --- a/src/test/resources/clickhouse_data.sql +++ b/src/test/resources/clickhouse_data.sql @@ -454,7 +454,7 @@ insert into gistic_to_gene (gistic_roi_id,entrez_gene_id) values (1,208); insert into gistic_to_gene (gistic_roi_id,entrez_gene_id) values (2,207); insert into gistic_to_gene (gistic_roi_id,entrez_gene_id) values (3,208); -insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (1,1,123,null,'status'); +insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (1,1,123,0,'status'); insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (2,1,233,345,'specimen'); insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (3,2,213,445,'treatment'); insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (4,2,211,441,'seqencing'); From 18a5ee78fac3c8fa86ee2c16894b9cdedc6e291c Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:21:30 -0400 Subject: [PATCH 5/6] Fix alteration count services (#10911) * Fix alteration count services * Only add genes that are protein coding to gene_panel_to_gene table for WES * fix test * fix sonar issues * Add documentation for filtering out WES --- .../persistence/StudyViewRepository.java | 4 ++- .../mybatisclickhouse/StudyViewMapper.java | 5 ++-- .../StudyViewMyBatisRepository.java | 15 ++++++++-- .../impl/AlterationCountServiceImpl.java | 29 +++++++------------ .../db-scripts/clickhouse/clickhouse.sql | 2 +- .../mybatisclickhouse/StudyViewMapper.xml | 28 ++++++++++++++---- .../StudyViewMapperTest.java | 5 ++-- 7 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 1c93f3ef57d..cc2b2629a58 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -38,7 +38,7 @@ public interface StudyViewRepository { List getCaseListDataCounts(StudyViewFilter studyViewFilter); - Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType); + Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType); int getFilteredSamplesCount(StudyViewFilter studyViewFilter); @@ -46,5 +46,7 @@ public interface StudyViewRepository { int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, String alterationType); + int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, String alterationType); + List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 0e762a1d07a..a249333641e 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -44,14 +44,15 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C List getPatientClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds); - @MapKey("hugoGeneSymbol") - Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + List getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); List getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getTotalProfiledCountByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + + int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index a0709db0e78..1690ba82a8b 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -132,10 +132,13 @@ public List getPatientClinicalData(StudyViewFilter studyViewFilter } @Override - public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType) { + public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType) + .stream() + .collect(Collectors.groupingBy(AlterationCountByGene::getHugoGeneSymbol, + Collectors.mapping(AlterationCountByGene::getNumberOfProfiledCases, Collectors.summingInt(Integer::intValue)))); } @Override @@ -162,6 +165,14 @@ public int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilte shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); } + @Override + public int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, String alterationType) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return mapper.getSampleProfileCountWithoutPanelData(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter), alterationType); + } + + @Override public List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index 29f94f0fd0f..a8aa0dbe723 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -273,37 +273,28 @@ public List getStructuralVariantGenes(StudyViewFilter stu private < T extends AlterationCountByGene> List populateAlterationCounts(@NonNull List alterationCounts, @NonNull StudyViewFilter studyViewFilter, @NonNull AlterationType alterationType) { - var updatedAlterationCounts = alterationCounts.stream().map(SerializationUtils::clone).toList(); - var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter, - alterationType.toString()); - var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, alterationType.toString()); - var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, alterationType.toString()); + final int profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, alterationType.toString()); + var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter, alterationType.toString()); + final var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, alterationType.toString()); + final int sampleProfileCountWithoutGenePanelData = studyViewRepository.getSampleProfileCountWithoutPanelData(studyViewFilter, alterationType.toString()); - updatedAlterationCounts.parallelStream() + alterationCounts.parallelStream() .forEach(alterationCountByGene -> { String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol(); Set matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? matchingGenePanelIdsMap.get(hugoGeneSymbol) : Collections.emptySet(); - - int totalProfiledCount = getTotalProfiledCount(hugoGeneSymbol, - profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds); + + int totalProfiledCount = hasGenePanelData(matchingGenePanelIds) + ? profiledCountsMap.getOrDefault(hugoGeneSymbol, 0) + sampleProfileCountWithoutGenePanelData + : profiledCountWithoutGenePanelData; alterationCountByGene.setNumberOfProfiledCases(totalProfiledCount); alterationCountByGene.setMatchingGenePanelIds(matchingGenePanelIds); }); - return updatedAlterationCounts; + return alterationCounts; } - private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map profiledCountsMap, - int profiledCountWithoutGenePanelData, @NonNull Set matchingGenePanelIds) { - int totalProfiledCount = profiledCountWithoutGenePanelData; - - if (hasGenePanelData(matchingGenePanelIds) && profiledCountsMap.containsKey(hugoGeneSymbol)) { - totalProfiledCount = profiledCountsMap.get(hugoGeneSymbol).getNumberOfProfiledCases(); - } - return totalProfiledCount; - } private boolean hasGenePanelData(@NonNull Set matchingGenePanelIds) { return matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index 5cad1670464..3c4b42cbc1f 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -47,7 +47,7 @@ SELECT 'WES' AS gene_panel_id, gene.hugo_gene_symbol AS gene FROM gene -WHERE gene.entrez_gene_id > 0; +WHERE gene.entrez_gene_id > 0 AND gene.type = 'protein-coding'; CREATE TABLE sample_derived ( diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 576c5b7378b..e4e226be8c2 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -232,15 +232,22 @@ JOIN cancer_study cs on cs.cancer_study_id = cam.cancer_study_id - - + SELECT gene as hugoGeneSymbol, COUNT(*) as numberOfProfiledCases FROM sample_to_gene_panel_derived stgp INNER JOIN gene_panel_to_gene_derived gptg on stgp.gene_panel_id = gptg.gene_panel_id - stgp.alteration_type = '${alterationType}' + stgp.alteration_type = '${alterationType}' + AND stgp.gene_panel_id != 'WES' AND @@ -248,7 +255,18 @@ GROUP BY gptg.gene; - + + diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java index f8019f72e54..0a6279ae937 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java @@ -114,8 +114,9 @@ public void getTotalProfiledCountsByGene() { assertEquals(3, totalProfiledCountsMap.size()); - var akt2TotalProfiledCounts = totalProfiledCountsMap.get("akt2"); - assertEquals(4, akt2TotalProfiledCounts.getNumberOfProfiledCases().intValue()); + var akt2TotalProfiledCounts = totalProfiledCountsMap.stream().filter(c -> c.getHugoGeneSymbol().equals("akt2")).findFirst(); + assertTrue(akt2TotalProfiledCounts.isPresent()); + assertEquals(4, akt2TotalProfiledCounts.get().getNumberOfProfiledCases().intValue()); } @Test From 8746a34ae30069d07b98f106ecf9a3f257895393 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:30:22 -0400 Subject: [PATCH 6/6] Rfc80/treatments patients endpoint (#10903) * Create new endpoint treatment/patient-counts/fetch * Create StudyViewFilterHelper class * :arrow_up: Upgrade CH client Version * Fix Merge Conflict issue * Update Treatment event name * Update to handle patient treatment filtering * Fix sonar issues * Fix sonar comments 2 --- pom.xml | 2 +- .../cbioportal/model/PatientTreatment.java | 7 ++ .../model/PatientTreatmentReport.java | 11 +++ .../persistence/StudyViewRepository.java | 6 ++ .../helper/StudyViewFilterHelper.java | 78 +++++++++++++++++++ .../mybatisclickhouse/StudyViewMapper.java | 5 ++ .../StudyViewMyBatisRepository.java | 30 ++++++- .../service/StudyViewColumnarService.java | 3 +- .../impl/StudyViewColumnarServiceImpl.java | 6 ++ .../StudyViewColumnStoreController.java | 32 ++++++++ ...volvedCancerStudyExtractorInterceptor.java | 4 +- .../StudyViewFilterMapper.xml | 22 +++++- .../mybatisclickhouse/StudyViewMapper.xml | 59 ++++++++++++-- .../StudyViewMapperTest.java | 43 +++++++++- src/test/resources/clickhouse_data.sql | 4 +- 15 files changed, 295 insertions(+), 17 deletions(-) create mode 100644 src/main/java/org/cbioportal/model/PatientTreatment.java create mode 100644 src/main/java/org/cbioportal/model/PatientTreatmentReport.java create mode 100644 src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java diff --git a/pom.xml b/pom.xml index 33883b323d0..66d86c5536e 100644 --- a/pom.xml +++ b/pom.xml @@ -361,7 +361,7 @@ com.clickhouse clickhouse-jdbc - 0.5.0 + 0.6.2 all diff --git a/src/main/java/org/cbioportal/model/PatientTreatment.java b/src/main/java/org/cbioportal/model/PatientTreatment.java new file mode 100644 index 00000000000..c838d23777e --- /dev/null +++ b/src/main/java/org/cbioportal/model/PatientTreatment.java @@ -0,0 +1,7 @@ +package org.cbioportal.model; + +import java.io.Serializable; + +public record PatientTreatment (String treatment, int count) implements Serializable { + +} diff --git a/src/main/java/org/cbioportal/model/PatientTreatmentReport.java b/src/main/java/org/cbioportal/model/PatientTreatmentReport.java new file mode 100644 index 00000000000..d09a2276f06 --- /dev/null +++ b/src/main/java/org/cbioportal/model/PatientTreatmentReport.java @@ -0,0 +1,11 @@ +package org.cbioportal.model; + +import java.io.Serializable; +import java.util.Collections; +import java.util.List; + +public record PatientTreatmentReport (int totalPatients, int totalSamples, List patientTreatments) implements Serializable { + public PatientTreatmentReport(int totalPatients, int totalSamples) { + this(totalPatients, totalSamples, Collections.emptyList()); + } +} diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index cc2b2629a58..59deedce6d4 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -8,6 +8,8 @@ import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.PatientTreatment; +import org.cbioportal.model.PatientTreatmentReport; import org.cbioportal.model.Sample; import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.StudyViewFilter; @@ -49,4 +51,8 @@ public interface StudyViewRepository { int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, String alterationType); List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter); + + List getPatientTreatments(StudyViewFilter studyViewFilter); + + PatientTreatmentReport getPatientTreatmentReport(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java b/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java new file mode 100644 index 00000000000..5d63ac03b90 --- /dev/null +++ b/src/main/java/org/cbioportal/persistence/helper/StudyViewFilterHelper.java @@ -0,0 +1,78 @@ +package org.cbioportal.persistence.helper; + +import org.cbioportal.model.ClinicalAttribute; +import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; +import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; +import org.cbioportal.web.parameter.StudyViewFilter; +import org.springframework.lang.NonNull; +import org.springframework.lang.Nullable; + +import java.util.EnumMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class StudyViewFilterHelper { + public static StudyViewFilterHelper build(@Nullable StudyViewFilter studyViewFilter, @Nullable EnumMap> clinicalAttributesMap) { + if (Objects.isNull(studyViewFilter)) { + studyViewFilter = new StudyViewFilter(); + } + if (Objects.isNull(clinicalAttributesMap)) { + clinicalAttributesMap = new EnumMap<>(ClinicalAttributeDataSource.class); + } + return new StudyViewFilterHelper(studyViewFilter, clinicalAttributesMap); + } + + private final StudyViewFilter studyViewFilter; + private final CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter; + + + private StudyViewFilterHelper(@NonNull StudyViewFilter studyViewFilter, @NonNull Map> clinicalAttributesMap ) { + this.studyViewFilter = studyViewFilter; + this.categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter, clinicalAttributesMap); + } + + private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter, Map> clinicalAttributesMap) { + + if (studyViewFilter.getClinicalDataFilters() == null || clinicalAttributesMap.isEmpty()) { + return CategorizedClinicalDataCountFilter.getBuilder().build(); + } + + List patientCategoricalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.PATIENT) + .stream().filter(ca -> ca.getDatatype().equals("STRING")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + List patientNumericalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.PATIENT) + .stream().filter(ca -> ca.getDatatype().equals("NUMBER")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + List sampleCategoricalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.SAMPLE) + .stream().filter(ca -> ca.getDatatype().equals("STRING")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + List sampleNumericalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.SAMPLE) + .stream().filter(ca -> ca.getDatatype().equals("NUMBER")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + return CategorizedClinicalDataCountFilter.getBuilder() + .setPatientCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters() + .stream().filter(clinicalDataFilter -> patientCategoricalAttributes.contains(clinicalDataFilter.getAttributeId())) + .toList()) + .setPatientNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> patientNumericalAttributes.contains(clinicalDataFilter.getAttributeId())) + .toList()) + .setSampleCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> sampleCategoricalAttributes.contains(clinicalDataFilter.getAttributeId())) + .toList()) + .setSampleNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> sampleNumericalAttributes.contains(clinicalDataFilter.getAttributeId())) + .toList()) + .build(); + } + + +} diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index a249333641e..bd9d4b602c3 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -10,6 +10,8 @@ import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.PatientTreatment; +import org.cbioportal.model.PatientTreatmentReport; import org.cbioportal.model.Sample; import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; @@ -55,4 +57,7 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C int getSampleProfileCountWithoutPanelData(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); + + List getPatientTreatments(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); + PatientTreatmentReport getPatientTreatmentCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 1690ba82a8b..7da568dabf2 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -8,6 +8,8 @@ import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.PatientTreatment; +import org.cbioportal.model.PatientTreatmentReport; import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; @@ -114,8 +116,9 @@ public List getCaseListDataCounts(StudyViewFilter studyViewFi private boolean shouldApplyPatientIdFilters(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - return studyViewFilter.getClinicalEventFilters() != null && !studyViewFilter.getClinicalEventFilters().isEmpty() - || categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty() + return studyViewFilter.getClinicalEventFilters() != null && !studyViewFilter.getClinicalEventFilters().isEmpty() + || studyViewFilter.getPatientTreatmentFilters() != null && studyViewFilter.getPatientTreatmentFilters().getFilters()!= null && !studyViewFilter.getPatientTreatmentFilters().getFilters().isEmpty() + || categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty() || categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters().isEmpty(); } @@ -180,12 +183,35 @@ public List getClinicalEventTypeCounts(StudyViewFilter s shouldApplyPatientIdFilters(studyViewFilter,categorizedClinicalDataCountFilter)); } + @Override + public List getPatientTreatments(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return mapper.getPatientTreatments(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(studyViewFilter, categorizedClinicalDataCountFilter)); + } + + @Override + public PatientTreatmentReport getPatientTreatmentReport(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + var patientTreatmentCounts = mapper.getPatientTreatmentCounts(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter, categorizedClinicalDataCountFilter)); + var patientTreatments = mapper.getPatientTreatments(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(studyViewFilter, categorizedClinicalDataCountFilter)); + return new PatientTreatmentReport(patientTreatmentCounts.totalPatients(), patientTreatmentCounts.totalSamples(), patientTreatments); + } + private void buildClinicalAttributeNameMap() { clinicalAttributesMap = this.getClinicalAttributes() .stream() .collect(Collectors.groupingBy(ca -> ca.getPatientAttribute() ? ClinicalAttributeDataSource.PATIENT : ClinicalAttributeDataSource.SAMPLE)); } + private Map> getClinicalAttributeNameMap() { + if (clinicalAttributesMap.isEmpty()) { + buildClinicalAttributeNameMap(); + } + return clinicalAttributesMap; + } + private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) { if (clinicalAttributesMap.isEmpty()) { buildClinicalAttributeNameMap(); diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index 8ae5ea20608..f332b586361 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -7,6 +7,7 @@ import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.PatientTreatmentReport; import org.cbioportal.model.Sample; import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.StudyViewFilter; @@ -35,5 +36,5 @@ public interface StudyViewColumnarService { List getGenomicDataCounts(StudyViewFilter studyViewFilter); List getClinicalEventTypeCounts(StudyViewFilter studyViewFilter); - + PatientTreatmentReport getPatientTreatmentReport(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index b73ca064803..31dc8f01ac0 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -8,6 +8,7 @@ import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.PatientTreatmentReport; import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.service.AlterationCountService; @@ -57,6 +58,11 @@ public List getClinicalEventTypeCounts(StudyViewFilter s return studyViewRepository.getClinicalEventTypeCounts(studyViewFilter); } + @Override + public PatientTreatmentReport getPatientTreatmentReport(StudyViewFilter studyViewFilter) { + return studyViewRepository.getPatientTreatmentReport(studyViewFilter); + } + public List getCnaGenes(StudyViewFilter studyViewFilter) { return alterationCountService.getCnaGenes(studyViewFilter); } diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index 3ec1ccf90a6..e8b3a6181a3 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -13,11 +13,14 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataBin; import org.cbioportal.model.ClinicalDataCountItem; +import org.cbioportal.model.ClinicalEventKeyCode; import org.cbioportal.model.ClinicalEventTypeCount; import org.cbioportal.model.ClinicalViolinPlotData; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.DensityPlotData; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.PatientTreatmentReport; +import org.cbioportal.model.PatientTreatmentRow; import org.cbioportal.model.Sample; import org.cbioportal.service.ClinicalDataDensityPlotService; import org.cbioportal.service.StudyViewColumnarService; @@ -350,4 +353,33 @@ public ResponseEntity> getClinicalEventTypeCounts( ) { return new ResponseEntity<>(studyViewColumnarService.getClinicalEventTypeCounts(interceptedStudyViewFilter), HttpStatus.OK); } + + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") + @PostMapping(value = "/column-store/treatments/patient-counts/fetch", produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(description = "Get all patient level treatments") + @ApiResponse(responseCode = "200", description = "OK", + content = @Content(array = @ArraySchema(schema = @Schema(implementation = PatientTreatmentRow.class)))) + public ResponseEntity getPatientTreatmentCounts( + @Parameter(required = false ) + @RequestParam(name = "tier", required = false, defaultValue = "Agent") + ClinicalEventKeyCode tier, + + @Parameter(required = true, description = "Study view filter") + @Valid + @RequestBody(required = false) + StudyViewFilter studyViewFilter, + + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface + @RequestAttribute(required = false, value = "involvedCancerStudies") + Collection involvedCancerStudies, + + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above. + @Valid + @RequestAttribute(required = false, value = "interceptedStudyViewFilter") + StudyViewFilter interceptedStudyViewFilter + ) { + return new ResponseEntity<>(studyViewColumnarService.getPatientTreatmentReport(interceptedStudyViewFilter), + HttpStatus.OK); + } + } diff --git a/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java b/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java index 32b90daf01c..82c95a09517 100644 --- a/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java +++ b/src/main/java/org/cbioportal/web/util/InvolvedCancerStudyExtractorInterceptor.java @@ -132,6 +132,7 @@ public class InvolvedCancerStudyExtractorInterceptor implements HandlerIntercept public static final String GENERIC_ASSAY_CATEGORICAL_ENRICHMENT_FETCH_PATH = "/generic-assay-categorical-enrichments/fetch"; public static final String GENERIC_ASSAY_BINARY_ENRICHMENT_FETCH_PATH = "/generic-assay-binary-enrichments/fetch"; public static final String CLINICAL_EVENT_TYPE_COUNT_FETCH_PATH = "/clinical-event-type-counts/fetch"; + public static final String TREATMENTS_PATIENT_COUNT_FETCH_PATH = "/treatments/patient-counts/fetch"; @Override public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler) { if (!request.getMethod().equals("POST")) { @@ -176,7 +177,8 @@ public class InvolvedCancerStudyExtractorInterceptor implements HandlerIntercept } else if (Arrays.asList(STUDY_VIEW_CLINICAL_DATA_DENSITY_PATH, STUDY_VIEW_CLINICAL_DATA_VIOLIN_PATH, STUDY_VIEW_CNA_GENES, STUDY_VIEW_FILTERED_SAMPLES, STUDY_VIEW_MUTATED_GENES, STUDY_VIEW_STRUCTURAL_VARIANT_GENES, STUDY_VIEW_STRUCTURAL_VARIANT_COUNTS, STUDY_VIEW_SAMPLE_COUNTS, STUDY_VIEW_SAMPLE_LIST_COUNTS_PATH, STUDY_VIEW_CLINICAL_TABLE_DATA_FETCH_PATH, - TREATMENTS_PATIENT_PATH, TREATMENTS_SAMPLE_PATH, STUDY_VIEW_PROFILE_SAMPLE_COUNTS_PATH, CLINICAL_EVENT_TYPE_COUNT_FETCH_PATH + TREATMENTS_PATIENT_PATH, TREATMENTS_SAMPLE_PATH, STUDY_VIEW_PROFILE_SAMPLE_COUNTS_PATH, CLINICAL_EVENT_TYPE_COUNT_FETCH_PATH, + TREATMENTS_PATIENT_COUNT_FETCH_PATH ).contains(requestPathInfo)) { return extractAttributesFromStudyViewFilter(request); } else if (requestPathInfo.equals(CLINICAL_DATA_ENRICHMENT_FETCH_PATH)) { diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index 44d3cd5523c..2c2a7cd1dc4 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -122,6 +122,22 @@ + + + + SELECT patient_unique_id + FROM clinical_event_derived + + + event_type = 'Treatment' + AND key = 'AGENT' + AND value = '${patientTreatmentFilter.treatment}' + + + + @@ -144,9 +160,13 @@ - + + + + + diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index e4e226be8c2..fcfce0e9439 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -17,6 +17,7 @@ + ORDER BY sample_stable_id ASC; @@ -329,6 +330,56 @@ GROUP BY event_type; + + + + + + + + + + + + + + + + patient_unique_id in ( SELECT patient_unique_id @@ -377,10 +425,7 @@ ) - - + sample_unique_id IN ( ) diff --git a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java index 0a6279ae937..0045bb65ea4 100644 --- a/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java +++ b/src/test/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapperTest.java @@ -8,6 +8,9 @@ import org.cbioportal.web.parameter.DataFilter; import org.cbioportal.web.parameter.DataFilterValue; import org.cbioportal.web.parameter.StudyViewFilter; +import org.cbioportal.web.parameter.filter.AndedPatientTreatmentFilters; +import org.cbioportal.web.parameter.filter.OredPatientTreatmentFilters; +import org.cbioportal.web.parameter.filter.PatientTreatmentFilter; import org.junit.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; @@ -129,7 +132,7 @@ public void getClinicalEventTypeCounts() { assertEquals(4, clinicalEventTypeCounts.size()); - var clinicalEventTypeCountOptional = clinicalEventTypeCounts.stream().filter(ce -> ce.getEventType().equals("treatment")) + var clinicalEventTypeCountOptional = clinicalEventTypeCounts.stream().filter(ce -> ce.getEventType().equals("Treatment")) .findFirst(); assertTrue(clinicalEventTypeCountOptional.isPresent()); @@ -137,7 +140,7 @@ public void getClinicalEventTypeCounts() { DataFilter dataFilter = new DataFilter(); DataFilterValue dataFilterValue = new DataFilterValue(); - dataFilterValue.setValue("treatment"); + dataFilterValue.setValue("Treatment"); dataFilter.setValues(List.of(dataFilterValue)); studyViewFilter.setClinicalEventFilters(List.of(dataFilter)); @@ -151,5 +154,41 @@ public void getClinicalEventTypeCounts() { assertFalse(clinicalEventTypeCountOptional.isPresent()); } + + @Test + public void getPatientTreatmentReportCounts() { + StudyViewFilter studyViewFilter = new StudyViewFilter(); + studyViewFilter.setStudyIds(List.of(STUDY_TCGA_PUB)); + + + var patientTreatmentCounts = studyViewMapper.getPatientTreatmentCounts(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false ); + + var patientTreatments = studyViewMapper.getPatientTreatments(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), false ); + + assertEquals(1, patientTreatmentCounts.totalPatients()); + assertEquals("madeupanib", patientTreatments.get(0).treatment()); + + PatientTreatmentFilter filter = new PatientTreatmentFilter(); + filter.setTreatment("madeupanib"); + + OredPatientTreatmentFilters oredPatientTreatmentFilters = new OredPatientTreatmentFilters(); + oredPatientTreatmentFilters.setFilters(List.of(filter)); + + AndedPatientTreatmentFilters andedPatientTreatmentFilters = new AndedPatientTreatmentFilters(); + andedPatientTreatmentFilters.setFilters(List.of(oredPatientTreatmentFilters)); + studyViewFilter.setPatientTreatmentFilters(andedPatientTreatmentFilters); + + patientTreatmentCounts = studyViewMapper.getPatientTreatmentCounts(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), true ); + + patientTreatments = studyViewMapper.getPatientTreatments(studyViewFilter, + CategorizedClinicalDataCountFilter.getBuilder().build(), true ); + + assertEquals(1, patientTreatmentCounts.totalPatients()); + assertEquals("madeupanib", patientTreatments.get(0).treatment()); + + } } \ No newline at end of file diff --git a/src/test/resources/clickhouse_data.sql b/src/test/resources/clickhouse_data.sql index 16193741624..810e1b742ad 100644 --- a/src/test/resources/clickhouse_data.sql +++ b/src/test/resources/clickhouse_data.sql @@ -456,7 +456,7 @@ insert into gistic_to_gene (gistic_roi_id,entrez_gene_id) values (3,208); insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (1,1,123,0,'status'); insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (2,1,233,345,'specimen'); -insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (3,2,213,445,'treatment'); +insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (3,2,213,445,'Treatment'); insert into clinical_event (clinical_event_id,patient_id,start_date,stop_date,event_type) values (4,2,211,441,'seqencing'); insert into clinical_event_data (clinical_event_id,key,value) values (1,'status','radiographic_progression'); @@ -464,7 +464,7 @@ insert into clinical_event_data (clinical_event_id,key,value) values (1,'sample_ insert into clinical_event_data (clinical_event_id,key,value) values (2,'surgery','oa ii initial'); insert into clinical_event_data (clinical_event_id,key,value) values (2,'sample_id','tcga-a1-a0sb-01'); insert into clinical_event_data (clinical_event_id,key,value) values (3,'event_type_detailed','aa iii recurrence1'); -insert into clinical_event_data (clinical_event_id,key,value) values (3,'agent','madeupanib'); +insert into clinical_event_data (clinical_event_id,key,value) values (3,'AGENT','madeupanib'); insert into clinical_event_data (clinical_event_id,key,value) values (3,'agent_target','directly to forehead, elbow'); insert into clinical_event_data (clinical_event_id,key,value) values (3,'sample_id','tcga-a1-a0sd-01'); insert into clinical_event_data (clinical_event_id,key,value) values (4,'sample_id','tcga-a1-a0sd-01');