From bb5456a6cd8427c4ecf0c2be7fc5a70752ba61a3 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Thu, 20 Jun 2024 15:32:01 -0400 Subject: [PATCH 1/7] Rfc80/add cna genes endpoint (#10845) * :sparkles: Add CNA Gene Endpoint * :bug: Fix StudyViewFilterMapper.xml to allow ability to filter on gene and alteration * Fix merge conflict * Address comments * Fix unit tests * Fix sonar issues --- .../persistence/StudyViewRepository.java | 6 +- .../helper/AlterationFilterHelper.java | 17 +- .../mybatisclickhouse/StudyViewMapper.java | 5 + .../StudyViewMyBatisRepository.java | 14 ++ .../service/AlterationCountService.java | 1 + .../service/StudyViewColumnarService.java | 2 + .../impl/AlterationCountServiceImpl.java | 37 +++- .../impl/StudyViewColumnarServiceImpl.java | 13 +- .../StudyViewColumnStoreController.java | 29 +-- .../db-scripts/clickhouse/clickhouse.sql | 47 ++-- .../clickhouse/clickhouse_migration.sql | 207 ++++++++--------- .../db-scripts/clickhouse/derived_tables.sql | 8 +- .../clickhouse/materialized_views.sql | 208 ------------------ .../StudyViewAlterationFilterMapper.xml | 22 +- .../StudyViewFilterMapper.xml | 13 +- .../mybatisclickhouse/StudyViewMapper.xml | 62 ++++-- .../helper/AlterationFilterHelperTest.java | 2 +- 17 files changed, 294 insertions(+), 399 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index dcedba98211..3ff72d01829 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -4,6 +4,7 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; import org.cbioportal.persistence.enums.ClinicalAttributeDataType; @@ -22,6 +23,8 @@ public interface StudyViewRepository { List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); @@ -37,5 +40,6 @@ public interface StudyViewRepository { int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); - + + int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java b/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java index 75bd6e1cdbf..1d8f63a55f2 100644 --- a/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java +++ b/src/main/java/org/cbioportal/persistence/helper/AlterationFilterHelper.java @@ -20,10 +20,13 @@ public static AlterationFilterHelper build(@Nullable AlterationFilter alteration private final AlterationFilter alterationFilter; private final Select mappedMutationTypes; + + private final Select mappedCnaTypes; private AlterationFilterHelper(@NonNull AlterationFilter alterationFilter){ this.alterationFilter = alterationFilter; this.mappedMutationTypes = buildMutationTypeList(); + this.mappedCnaTypes = buildCnaTypeList(); } private Select buildMutationTypeList() { @@ -42,8 +45,12 @@ private Select buildMutationTypeList() { public Select getMutationTypeList() { return mappedMutationTypes; } - + public Select getCnaTypeList() { + return mappedCnaTypes; + } + + public Select buildCnaTypeList() { if (alterationFilter.getCNAEventTypeSelect().hasNone()) { return Select.none(); } @@ -128,11 +135,17 @@ public boolean isSomeTierOptionsSelected() { return !isAllTierOptionsSelected() && !isNoTierOptionsSelected(); } - public boolean shouldApply() { + public boolean shouldApplyMutationAlterationFilter() { return isSomeDriverAnnotationsSelected() || isSomeMutationStatusSelected() || isSomeTierOptionsSelected() || mappedMutationTypes.hasNone() || (!mappedMutationTypes.hasNone() && !mappedMutationTypes.hasAll()); } + + public boolean shouldApplyCnaAlterationFilter() { + return isSomeDriverAnnotationsSelected() + || mappedCnaTypes.hasNone() + || (!mappedCnaTypes.hasNone() && !mappedCnaTypes.hasAll()); + } } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 9100cb29b1b..8aa0d71321e 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -5,6 +5,7 @@ import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; @@ -21,6 +22,9 @@ public interface StudyViewMapper { List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); + List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); + List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); @@ -44,4 +48,5 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C @MapKey("hugoGeneSymbol") Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + int getTotalProfiledCountByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 5383d500900..309c26b087a 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -3,6 +3,7 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; @@ -39,6 +40,13 @@ public List getMutatedGenes(StudyViewFilter studyViewFilt AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } + @Override + public List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return mapper.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { return mapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), @@ -100,4 +108,10 @@ public Map getMatchingGenePanelIds(StudyViewFilte shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); } + @Override + public int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { + return mapper.getTotalProfiledCountByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); + } + } \ No newline at end of file diff --git a/src/main/java/org/cbioportal/service/AlterationCountService.java b/src/main/java/org/cbioportal/service/AlterationCountService.java index 2ce5c649158..0b927e591e1 100644 --- a/src/main/java/org/cbioportal/service/AlterationCountService.java +++ b/src/main/java/org/cbioportal/service/AlterationCountService.java @@ -78,5 +78,6 @@ Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); } diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index 7b2af099500..a8966d8fc2a 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -4,6 +4,7 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCountItem; import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; import org.cbioportal.web.parameter.StudyViewFilter; @@ -14,6 +15,7 @@ public interface StudyViewColumnarService { List getFilteredSamples(StudyViewFilter studyViewFilter); List getMutatedGenes(StudyViewFilter interceptedStudyViewFilter); + List getCnaGenes(StudyViewFilter interceptedStudyViewFilter); List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes); diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index 6fc13d12294..7a57276013f 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -1,5 +1,6 @@ package org.cbioportal.service.impl; +import org.apache.commons.lang3.SerializationUtils; import org.apache.commons.math3.util.Pair; import org.cbioportal.model.AlterationCountBase; import org.cbioportal.model.AlterationCountByGene; @@ -257,36 +258,48 @@ public Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { var alterationCountByGenes = studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return populateAlterationCounts(alterationCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED); + } + + public List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + var copyNumberCountByGenes = studyViewRepository.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return populateAlterationCounts(copyNumberCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.COPY_NUMBER_ALTERATION); + } + + private < T extends AlterationCountByGene> List populateAlterationCounts(@NonNull List alterationCounts, + @NonNull StudyViewFilter studyViewFilter, + @NonNull CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + @NonNull AlterationType alterationType) { + var updatedAlterationCounts = alterationCounts.stream().map(SerializationUtils::clone).toList(); var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter, - AlterationType.MUTATION_EXTENDED.toString()); - var profiledCountWithoutGenePanelData = studyViewRepository.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter); + alterationType.toString()); + var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, alterationType.toString()); var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED.toString()); - alterationCountByGenes.parallelStream() + updatedAlterationCounts .forEach(alterationCountByGene -> { String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol(); - var matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? + var matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? matchingGenePanelIdsMap.get(hugoGeneSymbol).getMatchingGenePanelIds() : null; - + int totalProfiledCount = getTotalProfiledCount(alterationCountByGene.getHugoGeneSymbol(), profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds); - + alterationCountByGene.setNumberOfProfiledCases(totalProfiledCount); - + alterationCountByGene.setMatchingGenePanelIds(matchingGenePanelIds); - }); - - return alterationCountByGenes; + }); + return updatedAlterationCounts; } private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map profiledCountsMap, int profiledCountWithoutGenePanelData, @Nullable Set matchingGenePanelIds) { int totalProfiledCount = profiledCountWithoutGenePanelData; - if (hasGenePanelData(matchingGenePanelIds)) { - totalProfiledCount = profiledCountsMap.get(hugoGeneSymbol).getNumberOfProfiledCases(); + if (hasGenePanelData(matchingGenePanelIds) && profiledCountsMap.containsKey(hugoGeneSymbol)) { + totalProfiledCount = profiledCountsMap.get(hugoGeneSymbol).getNumberOfProfiledCases(); } return totalProfiledCount; } diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 554cdca0705..42c59686090 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -1,6 +1,12 @@ package org.cbioportal.service.impl; -import org.cbioportal.model.*; +import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalData; +import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.ClinicalDataCountItem; +import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.GenomicDataCount; +import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; import org.cbioportal.persistence.enums.ClinicalAttributeDataType; @@ -50,6 +56,11 @@ public List getGenomicDataCounts(StudyViewFilter studyViewFilt return studyViewRepository.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter); } + public List getCnaGenes(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return alterationCountService.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index 1daa69843e8..cfba847c2c8 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -2,6 +2,7 @@ import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; +import io.swagger.v3.oas.annotations.media.ArraySchema; import io.swagger.v3.oas.annotations.media.Content; import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.responses.ApiResponse; @@ -11,23 +12,11 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataBin; import org.cbioportal.model.ClinicalDataCountItem; +import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.DensityPlotData; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.service.ClinicalDataDensityPlotService; -import io.swagger.v3.oas.annotations.Operation; -import io.swagger.v3.oas.annotations.Parameter; -import io.swagger.v3.oas.annotations.media.ArraySchema; -import io.swagger.v3.oas.annotations.media.Content; -import io.swagger.v3.oas.annotations.media.Schema; -import io.swagger.v3.oas.annotations.responses.ApiResponse; -import jakarta.validation.Valid; -import org.apache.commons.collections4.CollectionUtils; -import org.cbioportal.model.AlterationCountByGene; -import org.cbioportal.model.AlterationFilter; -import org.cbioportal.model.ClinicalDataBin; -import org.cbioportal.model.ClinicalDataCountItem; -import org.cbioportal.model.Sample; import org.cbioportal.service.StudyViewColumnarService; import org.cbioportal.service.exception.StudyNotFoundException; import org.cbioportal.web.columnar.util.NewStudyViewFilterUtil; @@ -39,7 +28,6 @@ import org.cbioportal.web.parameter.StudyViewFilter; import org.cbioportal.web.util.DensityPlotParameters; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.cache.annotation.Cacheable; import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; @@ -131,6 +119,19 @@ public ResponseEntity> fetchMolecularProfileSampleCounts( , HttpStatus.OK); } + @PostMapping(value = "/column-store/cna-genes/fetch", + consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + public ResponseEntity> fetchCnaGenes( + @RequestBody(required = false) StudyViewFilter studyViewFilter, + @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, + @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter + ) { + return new ResponseEntity<>( + studyViewColumnarService.getCnaGenes(interceptedStudyViewFilter), + HttpStatus.OK + ); + } + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") @PostMapping(value = "/column-store/clinical-data-counts/fetch", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index fb8f2c94885..a6c749d9e20 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -1,47 +1,38 @@ -DROP TABLE IF EXISTS genomic_event_mutation; -DROP TABLE IF EXISTS genomic_event; +DROP TABLE IF EXISTS genomic_event_derived; DROP TABLE IF EXISTS sample_to_gene_panel; DROP TABLE IF EXISTS gene_panel_to_gene; -CREATE TABLE IF NOT EXISTS genomic_event +CREATE TABLE IF NOT EXISTS genomic_event_derived ( sample_unique_id String, - variant String, - variant_type String, hugo_gene_symbol String, - gene_panel_stable_id String, - cancer_study_identifier String, - genetic_profile_stable_id String + gene_panel_stable_id LowCardinality(String), + cancer_study_identifier LowCardinality(String), + genetic_profile_stable_id LowCardinality(String), + variant_type LowCardinality(String), + mutation_variant String, + mutation_type LowCardinality(String), + mutation_status LowCardinality(String), + driver_filter LowCardinality(String), + driver_tiers_filter LowCardinality(String), + cna_alteration Nullable(Int8), + cna_cytoband String, + sv_event_info String ) ENGINE = MergeTree -ORDER BY ( variant_type, sample_unique_id, hugo_gene_symbol); - -CREATE TABLE IF NOT EXISTS genomic_event_mutation -( - sample_unique_id String, - variant String, - hugo_gene_symbol String, - gene_panel_stable_id String, - cancer_study_identifier String, - genetic_profile_stable_id String, - mutation_type String, - mutation_status String, - driver_filter String, - driver_tiers_filter String -) ENGINE = MergeTree -ORDER BY ( hugo_gene_symbol, genetic_profile_stable_id); +ORDER BY ( variant_type, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id); CREATE TABLE sample_to_gene_panel ( sample_unique_id String, - alteration_type String, - gene_panel_id String, - cancer_study_identifier String + alteration_type LowCardinality(String), + gene_panel_id LowCardinality(String), + cancer_study_identifier LowCardinality(String) ) ENGINE = MergeTree() ORDER BY (gene_panel_id, alteration_type, sample_unique_id); CREATE TABLE gene_panel_to_gene ( - gene_panel_id String, + gene_panel_id LowCardinality(String), gene String ) ENGINE = MergeTree() ORDER BY (gene_panel_id); \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql index 5595c729f8e..2a508ee1584 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql @@ -1,52 +1,70 @@ --- Genomic Event Mutation Data -Insert into genomic_event_mutation -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - me.protein_change as variant, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - g.stable_id as genetic_profile_stable_id, - me.mutation_type as mutation_type, - mutation.mutation_status as mutation_status, - 'NA' as driver_filter, - 'NA' as drivet_tiers_filter -FROM mutation - INNER JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id - INNER JOIN sample_profile sp - on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id - LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id - LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id - INNER JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id - INNER JOIN sample on mutation.sample_id = sample.internal_id - LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id; +INSERT INTO sample_to_gene_panel +SELECT + concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + genetic_alteration_type AS alteration_type, + -- If a mutation is found in a gene that is not in a gene panel we assume Whole Exome Sequencing WES + ifnull(gene_panel.stable_id, 'WES') AS gene_panel_id, + cs.cancer_study_identifier AS cancer_study_identifier +FROM sample_profile sp + INNER JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id + LEFT JOIN gene_panel ON sp.panel_id = gene_panel.internal_id + INNER JOIN sample ON sp.sample_id = sample.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id; + +INSERT INTO gene_panel_to_gene +SELECT + gp.stable_id AS gene_panel_id, + g.hugo_gene_symbol AS gene +FROM gene_panel gp + INNER JOIN gene_panel_list gpl ON gp.internal_id = gpl.internal_id + INNER JOIN gene g ON g.entrez_gene_id = gpl.gene_id +UNION ALL +SELECT + 'WES' AS gene_panel_id, + gene.hugo_gene_symbol AS gene +FROM gene +WHERE gene.entrez_gene_id > 0; --- Genomic Event Data -Insert into genomic_event -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - me.protein_change as variant, - 'mutation' as variant_type, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - g.stable_id as genetic_profile_stable_id +INSERT INTO genomic_event_derived +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + gp.stable_id AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + g.stable_id AS genetic_profile_stable_id, + 'mutation' AS variant_type, + me.protein_change AS mutation_variant, + me.mutation_type AS mutation_type, + mutation.mutation_status AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + NULL AS cna_alteration, + '' AS cna_cytoband, + '' AS sv_event_info FROM mutation - INNER JOIN mutation_event as me on mutation.mutation_event_id = me.mutation_event_id + INNER JOIN mutation_event AS me ON mutation.mutation_event_id = me.mutation_event_id INNER JOIN sample_profile sp - on mutation.sample_id = sp.sample_id and mutation.genetic_profile_id = sp.genetic_profile_id - LEFT JOIN gene_panel gp on sp.panel_id = gp.internal_id - LEFT JOIN genetic_profile g on sp.genetic_profile_id = g.genetic_profile_id - INNER JOIN cancer_study cs on g.cancer_study_id = cs.cancer_study_id - INNER JOIN sample on mutation.sample_id = sample.internal_id - LEFT JOIN gene on mutation.entrez_gene_id = gene.entrez_gene_id + ON mutation.sample_id = sp.sample_id AND mutation.genetic_profile_id = sp.genetic_profile_id + LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id + LEFT JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id + INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id + INNER JOIN sample ON mutation.sample_id = sample.internal_id + LEFT JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id UNION ALL -SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - toString(ce.alteration) as variant, - 'cna' as variant_type, - gene.hugo_gene_symbol as hugo_gene_symbol, - gp.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - g.stable_id as genetic_profile_stable_id +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + gp.stable_id AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + g.stable_id AS genetic_profile_stable_id, + 'cna' AS variant_type, + 'NA' AS mutation_variant, + 'NA' AS mutation_type, + 'NA' AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + ce.alteration AS cna_alteration, + rgg.cytoband AS cna_cytoband, + '' AS sv_event_info FROM cna_event ce INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id @@ -55,63 +73,48 @@ FROM cna_event ce INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id INNER JOIN sample ON sce.sample_id = sample.internal_id INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id + INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id UNION ALL -SELECT - concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene2.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM - structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id -UNION ALL -SELECT - concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - event_info as variant, - 'structural_variant' as variant_type, - gene1.hugo_gene_symbol as hugo_gene_symbol, - gene_panel.stable_id as gene_panel_stable_id, - cs.cancer_study_identifier as cancer_study_identifier, - gp.stable_id as genetic_profile_stable_id -FROM - structural_variant sv - INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id - INNER JOIN sample s ON sv.sample_id = s.internal_id - INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id - INNER JOIN sample_profile on s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel on sample_profile.panel_id = gene_panel.internal_id; - -INSERT INTO sample_to_gene_panel -select - concat(cs.cancer_study_identifier, '_', sample.stable_id) as sample_unique_id, - genetic_alteration_type as alteration_type, - ifnull(gene_panel.stable_id, 'WES') as gene_panel_id, - cs.cancer_study_identifier as cancer_study_identifier -from sample_profile sp - inner join genetic_profile gp on sample_profile.genetic_profile_id = gp.genetic_profile_id - left join gene_panel on sp.panel_id = gene_panel.internal_id - inner join sample on sp.sample_id = sample.internal_id - inner join cancer_study cs on gp.cancer_study_id = cs.cancer_study_id; - -INSERT INTO gene_panel_to_gene -select - gp.stable_id as gene_panel_id, - g.hugo_gene_symbol as gene -from gene_panel gp - inner join gene_panel_list gpl ON gp.internal_id = gpl.internal_id - inner join gene g ON g.entrez_gene_id = gpl.gene_id +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, + gene2.hugo_gene_symbol AS hugo_gene_symbol, + gene_panel.stable_id AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + gp.stable_id AS genetic_profile_stable_id, + 'structural_variant' AS variant_type, + 'NA' AS mutation_variant, + 'NA' AS mutation_type, + 'NA' AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + NULL AS cna_alteration, + '' AS cna_cytoband, + event_info AS sv_event_info +FROM structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id + INNER JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id UNION ALL -select - 'WES' as gene_panel_id, - gene.hugo_gene_symbol as gene -from gene -where gene.entrez_gene_id > 0; \ No newline at end of file +SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, + gene1.hugo_gene_symbol AS hugo_gene_symbol, + gene_panel.stable_id AS gene_panel_stable_id, + cs.cancer_study_identifier AS cancer_study_identifier, + gp.stable_id AS genetic_profile_stable_id, + 'structural_variant' AS variant_type, + 'NA' AS mutation_variant, + 'NA' AS mutation_type, + 'NA' AS mutation_status, + 'NA' AS driver_filter, + 'NA' AS drivet_tiers_filter, + NULL AS cna_alteration, + '' AS cna_cytoband, + event_info AS sv_event_info +FROM structural_variant sv + INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id + INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id + INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id + INNER JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id; \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/derived_tables.sql b/src/main/resources/db-scripts/clickhouse/derived_tables.sql index 3b2ffbcd17c..73a35cd4f69 100644 --- a/src/main/resources/db-scripts/clickhouse/derived_tables.sql +++ b/src/main/resources/db-scripts/clickhouse/derived_tables.sql @@ -1,5 +1,5 @@ -- clinical_data_derived -DROP TABLE IF EXISTS clinical_data_derived +DROP TABLE IF EXISTS clinical_data_derived; CREATE TABLE IF NOT EXISTS clinical_data_derived ( sample_unique_id String, @@ -10,7 +10,7 @@ CREATE TABLE IF NOT EXISTS clinical_data_derived type String ) ENGINE=MergeTree - ORDER BY sample_unique_id + ORDER BY sample_unique_id; -- Insert sample attribute data INSERT INTO TABLE clinical_data_derived @@ -27,7 +27,7 @@ FROM sling_db_2024_05_23_original.sample_mv AS sm ON cs.cancer_study_id = cam.cancer_study_id FULL OUTER JOIN sling_db_2024_05_23_original.clinical_sample AS csamp ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id) -WHERE cam.patient_attribute = 0 +WHERE cam.patient_attribute = 0; -- INSERT patient attribute data INSERT INTO TABLE clinical_data_derived @@ -43,4 +43,4 @@ FROM sling_db_2024_05_23_original.patient AS p ON cs.cancer_study_id = cam.cancer_study_id FULL OUTER JOIN sling_db_2024_05_23_original.clinical_patient AS clinpat ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) -WHERE cam.patient_attribute = 1 +WHERE cam.patient_attribute = 1; diff --git a/src/main/resources/db-scripts/clickhouse/materialized_views.sql b/src/main/resources/db-scripts/clickhouse/materialized_views.sql index c5168ae4b14..8595d6edb4d 100644 --- a/src/main/resources/db-scripts/clickhouse/materialized_views.sql +++ b/src/main/resources/db-scripts/clickhouse/materialized_views.sql @@ -1,216 +1,8 @@ DROP TABLE IF EXISTS sample_columnstore; DROP TABLE IF EXISTS sample_list_columnstore; -DROP TABLE IF EXISTS genomic_event; DROP VIEW IF EXISTS sample_columnstore_mv; DROP VIEW IF EXISTS sample_list_columnstore_mv; -CREATE TABLE sample_clinical_attribute_numeric -( - sample_unique_id VARCHAR(45), - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value FLOAT, - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (sample_unique_id, patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO sample_clinical_attribute_numeric -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - clinical_sample.attr_id as attribute_name, - cast(clinical_sample.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id -WHERE match(clinical_sample.attr_value, '^[\d\.]+$'); - -CREATE MATERIALIZED VIEW sample_clinical_attribute_numeric_mv - TO sample_clinical_attribute_numeric AS -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - clinical_sample.attr_id as attribute_name, - cast(clinical_sample.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample ON s.internal_id = clinical_sample.internal_id -WHERE match(clinical_sample.attr_value, '^[\d\.]+$'); - -CREATE TABLE sample_clinical_attribute_categorical -( - sample_unique_id VARCHAR(45), - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (sample_unique_id, patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO sample_clinical_attribute_categorical -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cl.attr_id as attribute_name, - cl.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id -WHERE NOT match(cl.attr_value, '^[\d\.]+$'); - -CREATE MATERIALIZED VIEW sample_clinical_attribute_categorical_mv - TO sample_clinical_attribute_categorical AS -SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) as sample_unique_id, - concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cl.attr_id as attribute_name, - cl.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN sample s on p.internal_id = s.patient_id - INNER JOIN clinical_sample cl on s.internal_id = cl.internal_id -WHERE NOT match(cl.attr_value, '^[\d\.]+$'); - -CREATE TABLE patient_clinical_attribute_numeric -( - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value FLOAT, - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO patient_clinical_attribute_numeric -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cast(cp.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE match(cp.attr_value, '^[\d\.]+$'); - -CREATE MATERIALIZED VIEW patient_clinical_attribute_numeric_mv - TO patient_clinical_attribute_numeric AS -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cast(cp.attr_value as float) as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE match(cp.attr_value, '^[\d\.]+$'); - -CREATE TABLE patient_clinical_attribute_categorical -( - patient_unique_id VARCHAR(45), - attribute_name VARCHAR(45), - attribute_value VARCHAR(45), - cancer_study_identifier VARCHAR(45) -) - ENGINE = MergeTree() - ORDER BY (patient_unique_id, attribute_name, cancer_study_identifier); - -INSERT INTO patient_clinical_attribute_categorical -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) as patient_unique_id, - cp.attr_id as attribute_name, - cp.attr_value as attribute_value, - cs.cancer_study_identifier as cancer_study_identifier -FROM cancer_study cs - INNER JOIN patient p on cs.cancer_study_id = p.cancer_study_id - INNER JOIN clinical_patient cp on p.internal_id = cp.internal_id -WHERE NOT match(cp.attr_value, '^[\d\.]+$'); - ---patient_clinical_attribute_categorical_mv -DROP VIEW IF EXISTS patient_clinical_attribute_categorical_mv; -CREATE MATERIALIZED VIEW patient_clinical_attribute_categorical_mv - ENGINE = MergeTree() - ORDER BY cancer_study_identifier - SETTINGS allow_nullable_key = 1 - POPULATE -AS -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, - cp.attr_id AS attribute_name, - cp.attr_value AS attribute_value, - cs.cancer_study_identifier AS cancer_study_identifier -FROM clinical_patient AS cp - INNER JOIN patient AS p ON cp.internal_id = p.internal_id - INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id - INNER JOIN clinical_attribute_meta AS cam - ON (cp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id) -WHERE cam.datatype = 'STRING' - - ---patient_clinical_attribute_numeric_mv -DROP VIEW IF EXISTS patient_clinical_attribute_numeric_mv; -CREATE MATERIALIZED VIEW patient_clinical_attribute_numeric_mv - ENGINE = MergeTree() - ORDER BY cancer_study_identifier - SETTINGS allow_nullable_key = 1 - POPULATE -AS -SELECT concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, - cp.attr_id AS attribute_name, - cp.attr_value AS attribute_value, - cs.cancer_study_identifier AS cancer_study_identifier -FROM sling_db_2024_05_23_original.clinical_patient AS cp - INNER JOIN sling_db_2024_05_23_original.patient AS p ON cp.internal_id = p.internal_id - INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id - INNER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam - ON (cp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id) -WHERE cam.datatype = 'NUMBER' - --- sample_clinical_attribute_categorical_mv -DROP VIEW IF EXISTS sample_clinical_attribute_categorical_mv; -CREATE MATERIALIZED VIEW sample_clinical_attribute_categorical_mv - ENGINE = MergeTree() - ORDER BY cancer_study_identifier - SETTINGS allow_nullable_key = 1 - POPULATE -AS -SELECT s.sample_unique_id, - s.patient_unique_id, - csamp.attr_id AS attribute_name, - csamp.attr_value AS attribute_value, - s.cancer_study_identifier AS cancer_study_identifier -FROM sling_db_2024_05_23_original.clinical_sample AS csamp - INNER JOIN sling_db_2024_05_23_original.sample_mv AS s ON csamp.internal_id = s.internal_id - INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs - ON s.cancer_study_identifier = cs.cancer_study_identifier - INNER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam - ON (csamp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id) -WHERE cam.datatype = 'STRING' - --- sample_clinical_attribute_numeric_mv -DROP VIEW IF EXISTS sample_clinical_attribute_numeric_mv; -CREATE MATERIALIZED VIEW sample_clinical_attribute_numeric_mv - ENGINE = MergeTree() - ORDER BY cancer_study_identifier - SETTINGS allow_nullable_key = 1 - POPULATE -AS -SELECT s.sample_unique_id, - s.patient_unique_id, - csamp.attr_id AS attribute_name, - csamp.attr_value AS attribute_value, - s.cancer_study_identifier AS cancer_study_identifier -FROM clinical_sample AS csamp - INNER JOIN sample_mv AS s ON csamp.internal_id = s.internal_id - INNER JOIN cancer_study AS cs - ON s.cancer_study_identifier = cs.cancer_study_identifier - INNER JOIN clinical_attribute_meta AS cam - ON (csamp.attr_id = cam.attr_id) AND (cs.cancer_study_id = cam.cancer_study_id) -WHERE cam.datatype = 'NUMBER' - - - - -- sample_columnstore CREATE TABLE IF NOT EXISTS sample_columnstore ( diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml index 28e537b309b..a8a8a27283a 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewAlterationFilterMapper.xml @@ -7,7 +7,7 @@ NULL AND - lower(genomic_event_mutation.mutation_type) + lower(genomic_event_derived.mutation_type) NOT IN @@ -24,6 +24,20 @@ + + + + NULL + + cna_event.ALTERATION IN + + #{type} + + + + + @@ -31,15 +45,15 @@ OR - lower(genomic_event_mutation.mutation_status) LIKE '%germline%' + lower(genomic_event_derived.mutation_status) LIKE '%germline%' OR - lower(genomic_event_mutation.mutation_status) = 'somatic' + lower(genomic_event_derived.mutation_status) = 'somatic' OR - lower(genomic_event_mutation.mutation_status) != 'somatic' AND lower(genomic_event_mutation.mutation_status) NOT LIKE '%germline%' + lower(genomic_event_derived.mutation_status) != 'somatic' AND lower(genomic_event_derived.mutation_status) NOT LIKE '%germline%' diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index f85569a049c..a4e8ef679d8 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -53,19 +53,20 @@ SELECT sample_unique_id - FROM genomic_event_mutation + FROM genomic_event_derived genetic_profile_stable_id IN #{molecularProfileId} - AND hugo_gene_symbol IN ( - - #{geneFilterQuery.hugoGeneSymbol} + + + hugo_gene_symbol = #{geneFilterQuery.hugoGeneSymbol} + + cna_alteration = #{alteration.code} + - ) - diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 2c204a3c64c..73376a25f6d 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -29,14 +29,11 @@ 1 as entrezGeneId, COUNT(DISTINCT sample_unique_id) as numberOfAlteredCases, COUNT(*) as totalCount - FROM genomic_event_mutation + FROM genomic_event_derived - sample_unique_id IN ( - - INTERSECT - - ) - + variant_type = 'mutation' AND + + @@ -44,6 +41,27 @@ ORDER BY totalCount DESC; + + + + SELECT + COUNT(DISTINCT sample_unique_id) + FROM sample_to_gene_panel stgp + + stgp.alteration_type = '${alterationType}' + AND + + + + + @@ -288,10 +314,7 @@ from sample_to_gene_panel alteration_type = '${alterationType}' AND - sample_unique_id IN ( ) - - AND patient_unique_id IN () - + ) @@ -306,4 +329,11 @@ + + + sample_unique_id IN ( ) + + AND patient_unique_id IN () + + diff --git a/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java b/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java index d0cd5f244bc..c2884acab53 100644 --- a/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java +++ b/src/test/java/org/cbioportal/persistence/helper/AlterationFilterHelperTest.java @@ -161,7 +161,7 @@ public void shouldApply() { alterationFilter.setIncludeDriver(true); alterationFilter.setIncludeVUS(false); alterationFilter.setIncludeUnknownOncogenicity(false); - assertTrue(AlterationFilterHelper.build(alterationFilter).shouldApply()); + assertTrue(AlterationFilterHelper.build(alterationFilter).shouldApplyMutationAlterationFilter()); } } \ No newline at end of file From 505847b19802785b582376a8210e07c0d6d3a549 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:36:45 -0400 Subject: [PATCH 2/7] :sparkles: Add StructuralVariant-genes endpoint (#10854) * :sparkles: Add StructuralVariant-genes endpoint * Fix sonar issues * Update MatchingGenePanel request to return list * Create and use sample_derive * Update where sample_derived is stored to fix unit test --- .../persistence/StudyViewRepository.java | 6 ++- .../mybatisclickhouse/StudyViewMapper.java | 6 ++- .../StudyViewMyBatisRepository.java | 17 +++++- .../typehandler/GenePanelIdsTypeHandler.java | 39 -------------- .../service/AlterationCountService.java | 2 + .../service/StudyViewColumnarService.java | 1 + .../impl/AlterationCountServiceImpl.java | 16 ++++-- .../impl/StudyViewColumnarServiceImpl.java | 6 +++ .../StudyViewColumnStoreController.java | 16 ++++++ .../db-scripts/clickhouse/clickhouse.sql | 16 +++++- .../clickhouse/clickhouse_migration.sql | 42 ++++++++++----- .../db-scripts/clickhouse/derived_tables.sql | 17 +++--- .../StudyViewFilterMapper.xml | 12 ++--- .../mybatisclickhouse/StudyViewMapper.xml | 52 ++++++++++--------- 14 files changed, 146 insertions(+), 102 deletions(-) delete mode 100644 src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 3ff72d01829..f866cf1442c 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -3,8 +3,8 @@ import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; -import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; import org.cbioportal.persistence.enums.ClinicalAttributeDataType; @@ -13,6 +13,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; public interface StudyViewRepository { List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); @@ -24,6 +25,7 @@ public interface StudyViewRepository { List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); @@ -39,7 +41,7 @@ public interface StudyViewRepository { int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); - Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); + Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 8aa0d71321e..7a279943289 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -24,6 +24,9 @@ List getMutatedGenes(StudyViewFilter studyViewFilter, Cat List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); + + List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); @@ -45,8 +48,7 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); - @MapKey("hugoGeneSymbol") - Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + List getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getTotalProfiledCountByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 309c26b087a..6dbc760d097 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -2,6 +2,7 @@ import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; @@ -16,6 +17,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; @Repository public class StudyViewMyBatisRepository implements StudyViewRepository { @@ -47,6 +50,13 @@ public List getCnaGenes(StudyViewFilter studyViewFilter, AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } + @Override + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return mapper.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { return mapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), @@ -103,9 +113,12 @@ public int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedC } @Override - public Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { + public Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { return mapper.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType) + .stream() + .collect(Collectors.groupingBy(GenePanelToGene::getHugoGeneSymbol, + Collectors.mapping(GenePanelToGene::getGenePanelId, Collectors.toSet()))); } @Override diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java deleted file mode 100644 index 397cdf01bd2..00000000000 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java +++ /dev/null @@ -1,39 +0,0 @@ -package org.cbioportal.persistence.mybatisclickhouse.typehandler; - -import org.apache.ibatis.type.BaseTypeHandler; -import org.apache.ibatis.type.JdbcType; - -import java.sql.CallableStatement; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.HashSet; -import java.util.Set; -import java.util.Arrays; - -public class GenePanelIdsTypeHandler extends BaseTypeHandler> { - - @Override - public void setNonNullParameter(PreparedStatement ps, int i, Set parameter, JdbcType jdbcType) throws SQLException { - // Convert Set to array for storage (if needed) - throw new UnsupportedOperationException("Storage of GenePanelIds not supported"); - } - - @Override - public Set getNullableResult(ResultSet rs, String columnName) throws SQLException { - String[] array = (String[]) rs.getArray(columnName).getArray(); - return new HashSet<>(Arrays.asList(array)); - } - - @Override - public Set getNullableResult(ResultSet rs, int columnIndex) throws SQLException { - String[] array = (String[]) rs.getArray(columnIndex).getArray(); - return new HashSet<>(Arrays.asList(array)); - } - - @Override - public Set getNullableResult(CallableStatement cs, int columnIndex) throws SQLException { - String[] array = (String[]) cs.getArray(columnIndex).getArray(); - return new HashSet<>(Arrays.asList(array)); - } -} diff --git a/src/main/java/org/cbioportal/service/AlterationCountService.java b/src/main/java/org/cbioportal/service/AlterationCountService.java index 0b927e591e1..2e02a4be518 100644 --- a/src/main/java/org/cbioportal/service/AlterationCountService.java +++ b/src/main/java/org/cbioportal/service/AlterationCountService.java @@ -80,4 +80,6 @@ Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + } diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index a8966d8fc2a..91ed7b8c647 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -16,6 +16,7 @@ public interface StudyViewColumnarService { List getMutatedGenes(StudyViewFilter interceptedStudyViewFilter); List getCnaGenes(StudyViewFilter interceptedStudyViewFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter); List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes); diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index 7a57276013f..c1770e41e18 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -265,7 +265,13 @@ public List getCnaGenes(StudyViewFilter studyViewFilter, var copyNumberCountByGenes = studyViewRepository.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); return populateAlterationCounts(copyNumberCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.COPY_NUMBER_ALTERATION); } - + + @Override + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + var alterationCountByGenes = studyViewRepository.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return populateAlterationCounts(alterationCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.STRUCTURAL_VARIANT); + } + private < T extends AlterationCountByGene> List populateAlterationCounts(@NonNull List alterationCounts, @NonNull StudyViewFilter studyViewFilter, @NonNull CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, @@ -276,13 +282,13 @@ private < T extends AlterationCountByGene> List populateAlterationCounts(@Non alterationType.toString()); var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, alterationType.toString()); var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, - categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED.toString()); + categorizedClinicalDataCountFilter, alterationType.toString()); - updatedAlterationCounts + updatedAlterationCounts.parallelStream() .forEach(alterationCountByGene -> { String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol(); - var matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? - matchingGenePanelIdsMap.get(hugoGeneSymbol).getMatchingGenePanelIds() : null; + Set matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? + matchingGenePanelIdsMap.get(hugoGeneSymbol) : Collections.emptySet(); int totalProfiledCount = getTotalProfiledCount(alterationCountByGene.getHugoGeneSymbol(), profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds); diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 42c59686090..83916d0c369 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -61,6 +61,12 @@ public List getCnaGenes(StudyViewFilter studyViewFilter) return alterationCountService.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); } + @Override + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return alterationCountService.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index cfba847c2c8..cf06ef4ae76 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -132,6 +132,22 @@ public ResponseEntity> fetchCnaGenes( ); } + @PostMapping(value = "/column-store/structuralvariant-genes/fetch", + consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(description = "Fetch structural variant genes by study view filter") + @ApiResponse(responseCode = "200", description = "OK", + content = @Content(array = @ArraySchema(schema = @Schema(implementation = AlterationCountByGene.class)))) + public ResponseEntity> fetchStructuralVariantGenes( + @Parameter(required = true, description = "Study view filter") + @Valid @RequestBody(required = false) StudyViewFilter studyViewFilter, + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. This attribute is needed for the @PreAuthorize tag above. + @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. + @Valid @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter + ) { + return new ResponseEntity<>(studyViewColumnarService.getStructuralVariantGenes(interceptedStudyViewFilter), HttpStatus.OK); + } + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") @PostMapping(value = "/column-store/clinical-data-counts/fetch", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index a6c749d9e20..828c358d9f1 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -35,4 +35,18 @@ CREATE TABLE gene_panel_to_gene gene_panel_id LowCardinality(String), gene String ) ENGINE = MergeTree() -ORDER BY (gene_panel_id); \ No newline at end of file +ORDER BY (gene_panel_id); + +CREATE TABLE sample_derived +( + sample_unique_id String, + sample_unique_id_base64 String, + sample_stable_id String, + patient_unique_id String, + patient_unique_id_base64 String, + patient_stable_id String, + cancer_study_identifier LowCardinality(String), + internal_id Int +) + ENGINE = MergeTree + ORDER BY (cancer_study_identifier, sample_unique_id); \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql index 2a508ee1584..55c8e48d9b3 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql @@ -29,7 +29,7 @@ WHERE gene.entrez_gene_id > 0; INSERT INTO genomic_event_derived SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, gene.hugo_gene_symbol AS hugo_gene_symbol, - gp.stable_id AS gene_panel_stable_id, + ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, g.stable_id AS genetic_profile_stable_id, 'mutation' AS variant_type, @@ -53,7 +53,7 @@ FROM mutation UNION ALL SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, gene.hugo_gene_symbol AS hugo_gene_symbol, - gp.stable_id AS gene_panel_stable_id, + ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, g.stable_id AS genetic_profile_stable_id, 'cna' AS variant_type, @@ -68,7 +68,7 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_uniqu FROM cna_event ce INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id - INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id + LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id INNER JOIN sample ON sce.sample_id = sample.internal_id @@ -76,8 +76,8 @@ FROM cna_event ce INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id UNION ALL SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, - gene2.hugo_gene_symbol AS hugo_gene_symbol, - gene_panel.stable_id AS gene_panel_stable_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, gp.stable_id AS genetic_profile_stable_id, 'structural_variant' AS variant_type, @@ -93,13 +93,13 @@ FROM structural_variant sv INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id INNER JOIN sample s ON sv.sample_id = s.internal_id INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id - INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id + INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id + LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id UNION ALL SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, - gene1.hugo_gene_symbol AS hugo_gene_symbol, - gene_panel.stable_id AS gene_panel_stable_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, gp.stable_id AS genetic_profile_stable_id, 'structural_variant' AS variant_type, @@ -115,6 +115,22 @@ FROM structural_variant sv INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id INNER JOIN sample s ON sv.sample_id = s.internal_id INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id - INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id; \ No newline at end of file + INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id + LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id +WHERE + sv.site2_entrez_gene_id != sv.site1_entrez_gene_id + OR sv.site1_entrez_gene_id IS NULL; + +INSERT INTO sample_derived +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + base64Encode(sample.stable_id) AS sample_unique_id_base64, + sample.stable_id AS sample_stable_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, + p.stable_id AS patient_stable_id, + base64Encode(p.stable_id) AS patient_unique_id_base64, + cs.cancer_study_identifier AS cancer_study_identifier, + sample.internal_id AS internal_id +FROM sample + INNER JOIN patient AS p ON sample.patient_id = p.internal_id + INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/derived_tables.sql b/src/main/resources/db-scripts/clickhouse/derived_tables.sql index 73a35cd4f69..0e982ed106f 100644 --- a/src/main/resources/db-scripts/clickhouse/derived_tables.sql +++ b/src/main/resources/db-scripts/clickhouse/derived_tables.sql @@ -20,12 +20,12 @@ SELECT sm.sample_unique_id AS sample_unique_id, csamp.attr_value AS attribute_value, cs.cancer_study_identifier AS cancer_study_identifier, 'sample' AS type -FROM sling_db_2024_05_23_original.sample_mv AS sm - INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs +FROM sample_mv AS sm + INNER JOIN cancer_study AS cs ON sm.cancer_study_identifier = cs.cancer_study_identifier - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam + FULL OUTER JOIN clinical_attribute_meta AS cam ON cs.cancer_study_id = cam.cancer_study_id - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_sample AS csamp + FULL OUTER JOIN clinical_sample AS csamp ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id) WHERE cam.patient_attribute = 0; @@ -37,10 +37,11 @@ SELECT NULL AS sample_unique_id, clinpat.attr_value AS attribute_value, cs.cancer_study_identifier AS cancer_study_identifier, 'patient' AS type -FROM sling_db_2024_05_23_original.patient AS p - INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam +FROM patient AS p + INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id + FULL OUTER JOIN clinical_attribute_meta AS cam ON cs.cancer_study_id = cam.cancer_study_id - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_patient AS clinpat + FULL OUTER JOIN clinical_patient AS clinpat ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) WHERE cam.patient_attribute = 1; + diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index a4e8ef679d8..58cbfe48b0d 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -5,9 +5,9 @@ - INTERSECT + INTERSECT SELECT sample_unique_id - FROM sample_mv + FROM sample_derived WHERE cancer_study_identifier IN #{studyId} @@ -19,13 +19,13 @@ INTERSECT SELECT * FROM ( - SELECT sample_mv.sample_unique_id + SELECT sample_derived.sample_unique_id FROM sample_profile JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - JOIN sample_mv on sample_profile.sample_id = sample_mv.internal_id + JOIN sample_derived on sample_profile.sample_id = sample_derived.internal_id - sample_mv.cancer_study_identifier IN + sample_derived.cancer_study_identifier IN #{studyId} @@ -43,7 +43,7 @@ INTERSECT SELECT sample_unique_id - FROM sample_mv + FROM sample_derived WHERE sample_unique_id IN '${sampleIdentifier.studyId}_${sampleIdentifier.sampleId}' diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 73376a25f6d..55b8b529857 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -11,7 +11,7 @@ cancer_study_identifier as cancerStudyIdentifier, sample_unique_id_base64 as uniqueSampleKey, patient_unique_id_base64 as uniquePatientKey - FROM sample_mv + FROM sample_derived sample_unique_id IN ( ) @@ -23,7 +23,7 @@ - SELECT hugo_gene_symbol as hugoGeneSymbol, 1 as entrezGeneId, @@ -37,8 +37,7 @@ - GROUP BY hugo_gene_symbol - ORDER BY totalCount DESC; + GROUP BY hugo_gene_symbol; @@ -58,8 +57,21 @@ - GROUP BY hugo_gene_symbol, alteration, cytoband - ORDER BY totalCount DESC; + GROUP BY hugo_gene_symbol, alteration, cytoband; + + + @@ -127,16 +139,16 @@ @@ -176,7 +188,7 @@ sample_unique_id IN ( INTERSECT - SELECT sample_unique_id from sample_mv WHERE sample_mv.patient_unique_id IN + SELECT sample_unique_id from sample_derived WHERE sample_derived.patient_unique_id IN () ) @@ -239,7 +251,7 @@ SELECT sample_unique_id - FROM sample_mv + FROM sample_derived patient_unique_id IN () @@ -247,7 +259,7 @@ SELECT patient_unique_id - FROM sample_mv + FROM sample_derived sample_unique_id IN () @@ -296,17 +308,17 @@ - SELECT gene as hugoGeneSymbol, - arrayFilter(x -> x != '', groupArray( distinct gene_panel_id)) as matchingGenePanelIds + gene_panel_id as genePanelId FROM gene_panel_to_gene gene_panel_id in ( @@ -318,17 +330,9 @@ ) - GROUP BY gene; + GROUP BY gene, gene_panel_id; - - - - - - - - sample_unique_id IN ( ) From 74b735d35f3ed7656c013ef672b73551beb495b1 Mon Sep 17 00:00:00 2001 From: alisman Date: Mon, 24 Jun 2024 13:46:14 -0400 Subject: [PATCH 3/7] Implement study view sample-lists-counts endpoint in Clickhouse (#10849) Co-authored-by: Bryan Lai --- .../persistence/StudyViewRepository.java | 4 ++++ .../mybatisclickhouse/StudyViewMapper.java | 6 ++++- .../StudyViewMyBatisRepository.java | 6 +++++ .../service/StudyViewColumnarService.java | 3 +++ .../impl/StudyViewColumnarServiceImpl.java | 9 ++++++++ .../web/columnar/ClinicalDataBinner.java | 2 ++ .../StudyViewColumnStoreController.java | 17 ++++++++++++++ .../StudyViewFilterMapper.xml | 23 ++++++++++++++++--- .../mybatisclickhouse/StudyViewMapper.xml | 15 ++++++++++++ 9 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index f866cf1442c..51ff94c7a17 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -1,6 +1,7 @@ package org.cbioportal.persistence; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.CopyNumberCountByGene; @@ -35,6 +36,9 @@ public interface StudyViewRepository { List getGenomicDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + + List getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType); Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 7a279943289..0c7e40d3524 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -1,11 +1,12 @@ package org.cbioportal.persistence.mybatisclickhouse; -import org.cbioportal.model.*; import org.apache.ibatis.annotations.MapKey; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; @@ -14,6 +15,7 @@ import java.util.List; import java.util.Map; + public interface StudyViewMapper { List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); @@ -36,6 +38,8 @@ List getSampleClinicalDataCounts(StudyViewFilter studyViewFil List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); + + List getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); List getClinicalAttributeNames(String tableName); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 6dbc760d097..01bd7aa6bb3 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -1,5 +1,6 @@ package org.cbioportal.persistence.mybatisclickhouse; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.GenePanelToGene; @@ -82,6 +83,11 @@ public List getPatientClinicalDataCounts(StudyViewFilter stud filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES); } + @Override + public List getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return mapper.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); + } + @Override public List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType) { String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase() + "_mv"; diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index 91ed7b8c647..0298b061377 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -1,6 +1,7 @@ package org.cbioportal.service; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCountItem; import org.cbioportal.model.GenomicDataCount; @@ -20,6 +21,8 @@ public interface StudyViewColumnarService { List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes); + List getCaseListDataCounts(StudyViewFilter studyViewFilter); + List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds); List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds); diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 83916d0c369..6a48daf03ad 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -1,6 +1,7 @@ package org.cbioportal.service.impl; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.ClinicalDataCountItem; @@ -15,6 +16,7 @@ import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service; import java.util.HashMap; @@ -38,6 +40,7 @@ public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository, Alt this.alterationCountService = alterationCountService; } + @Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()") @Override public List getFilteredSamples(StudyViewFilter studyViewFilter) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); @@ -83,6 +86,12 @@ public List getClinicalDataCounts(StudyViewFilter studyVi }).collect(Collectors.toList()); } + @Override + public List getCaseListDataCounts(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return studyViewRepository.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter); + } + private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) { if (clinicalAttributeNameMap.isEmpty()) { buildClinicalAttributeNameMap(); diff --git a/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java b/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java index 77562cd44d6..cc91a840a3c 100644 --- a/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java +++ b/src/main/java/org/cbioportal/web/columnar/ClinicalDataBinner.java @@ -6,6 +6,7 @@ import org.cbioportal.web.parameter.*; import org.cbioportal.web.util.DataBinner; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Component; import java.util.*; @@ -26,6 +27,7 @@ public ClinicalDataBinner( this.dataBinner = dataBinner; } + @Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()") public List fetchClinicalDataBinCounts( DataBinMethod dataBinMethod, ClinicalDataBinCountFilter dataBinCountFilter, diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index cf06ef4ae76..ccb193cb9af 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -9,6 +9,7 @@ import jakarta.validation.Valid; import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.AlterationFilter; +import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataBin; import org.cbioportal.model.ClinicalDataCountItem; @@ -170,6 +171,22 @@ public ResponseEntity> fetchClinicalDataCounts( } + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") + @RequestMapping(value = "/column-store/sample-lists-counts/fetch", method = RequestMethod.POST, + consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(description = "Fetch case list sample counts by study view filter") + public List fetchCaseListCounts( + @Parameter(required = true, description = "Study view filter") + @Valid @RequestBody(required = false) StudyViewFilter studyViewFilter, + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface + @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. this attribute is needed for the @PreAuthorize tag above. + @Valid @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter) { + + return studyViewColumnarService.getCaseListDataCounts(interceptedStudyViewFilter); + + } + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") @RequestMapping(value = "/column-store/clinical-data-bin-counts/fetch", method = RequestMethod.POST, consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index 58cbfe48b0d..29d52a9e7df 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -13,8 +13,26 @@ #{studyId} - - + + + INTERSECT + -- case list filtering allows both UNION (OR) and INTERSECTION (AND) LOGIC + -- caseLists is an array of arrays wherein the top level is INTERSECTION + -- AND THE INTERNAL ARRAYS ARE UNION (OR) + SELECT * FROM ( + + SELECT sample_unique_id + FROM sample_list_list sll + LEFT JOIN sample_mv s ON sll.sample_id=s.internal_id + LEFT JOIN sample_list sl on sll.list_id=sl.list_id + WHERE + + sl.stable_id LIKE '%_${list}' + + + ) + + INTERSECT SELECT * FROM ( @@ -34,7 +52,6 @@ gp.stable_id LIKE '%_${genomicProfileId}' - ) diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 55b8b529857..9170b676872 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -151,6 +151,21 @@ GROUP BY genetic_profile.stable_id, genetic_profile.name, sample_derived.cancer_study_identifier; + + + SELECT From 110236b64b69c19447442ac08aab0f94a8effdfb Mon Sep 17 00:00:00 2001 From: alisman Date: Mon, 24 Jun 2024 14:34:28 -0400 Subject: [PATCH 4/7] Fix class import (#10856) --- .../persistence/mybatisclickhouse/StudyViewMapper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 0c7e40d3524..3407f40c145 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -6,6 +6,7 @@ import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.persistence.helper.AlterationFilterHelper; From a484e2ee5e6d719f5433c3016aa2c4a06d4d2e5e Mon Sep 17 00:00:00 2001 From: Onur Sumer Date: Mon, 24 Jun 2024 15:33:59 -0400 Subject: [PATCH 5/7] Move ClinicalCategoricalFilter to repository (#10847) * use clinical_data_derived instead of sample_clinical_attribute_numeric_mv and patient_clinical_attribute_numeric_mv * use clinical_attribute_meta instead of sample_clinical_attribute_numeric_mv and patient_clinical_attribute_numeric_mv * remove unused clinical data count methods and SQL * fix numericalClinicalDataCountFilter * Move CategoricalClinicalAttributeFilter to repository * remove unused columns * Add override to methods --------- Co-authored-by: haynescd --- .../persistence/StudyViewRepository.java | 37 +++--- .../mybatisclickhouse/StudyViewMapper.java | 3 +- .../StudyViewMyBatisRepository.java | 119 +++++++++++++----- .../service/AlterationCountService.java | 6 +- .../impl/AlterationCountServiceImpl.java | 26 ++-- .../impl/StudyViewColumnarServiceImpl.java | 78 ++---------- .../StudyViewFilterMapper.xml | 16 +-- .../mybatisclickhouse/StudyViewMapper.xml | 50 ++------ 8 files changed, 146 insertions(+), 189 deletions(-) diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 51ff94c7a17..98c1a17633e 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -1,15 +1,13 @@ package org.cbioportal.persistence; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalAttribute; import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; -import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; -import org.cbioportal.persistence.enums.ClinicalAttributeDataType; -import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; import java.util.List; @@ -17,35 +15,30 @@ import java.util.Set; public interface StudyViewRepository { - List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getFilteredSamples(StudyViewFilter studyViewFilter); - List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds); - List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds); - List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getMutatedGenes(StudyViewFilter studyViewFilter); - List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); - List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter); + List getCnaGenes(StudyViewFilter studyViewFilter); - List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); + List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes); - List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); + List getGenomicDataCounts(StudyViewFilter studyViewFilter); - List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); - - List getGenomicDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); - - - List getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getClinicalAttributes(); - List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType); + List getCaseListDataCounts(StudyViewFilter studyViewFilter); - Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); + Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType); - int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + int getFilteredSamplesCount(StudyViewFilter studyViewFilter); - Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); + Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, String alterationType); - int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); + int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 3407f40c145..71d0e76a1ab 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -3,6 +3,7 @@ import org.apache.ibatis.annotations.MapKey; import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.CaseListDataCount; +import org.cbioportal.model.ClinicalAttribute; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.CopyNumberCountByGene; @@ -42,7 +43,7 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C List getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); - List getClinicalAttributeNames(String tableName); + List getClinicalAttributes(); List getSampleClinicalDataFromStudyViewFilter(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 01bd7aa6bb3..ebe94444794 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -1,5 +1,6 @@ package org.cbioportal.persistence.mybatisclickhouse; import org.cbioportal.model.AlterationCountByGene; +import org.cbioportal.model.ClinicalAttribute; import org.cbioportal.model.CaseListDataCount; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; @@ -9,13 +10,13 @@ import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; -import org.cbioportal.persistence.enums.ClinicalAttributeDataType; import org.cbioportal.persistence.helper.AlterationFilterHelper; import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -24,6 +25,9 @@ @Repository public class StudyViewMyBatisRepository implements StudyViewRepository { + private Map> clinicalAttributesMap = new HashMap<>(); + + private static final List FILTERED_CLINICAL_ATTR_VALUES = Collections.emptyList(); private final StudyViewMapper mapper; @@ -32,94 +36,94 @@ public StudyViewMyBatisRepository(StudyViewMapper mapper) { this.mapper = mapper; } @Override - public List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - + public List getFilteredSamples(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); } @Override - public List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + public List getMutatedGenes(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @Override - public List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + public List getCnaGenes(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @Override - public List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } @Override - public List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { + public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES ); } @Override - public List getGenomicDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + public List getGenomicDataCounts(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); } - - @Override - public List getSampleClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { - return mapper.getSampleClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), - filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES ); - } - - @Override - public List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { - return mapper.getPatientClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), - filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES); + public List getClinicalAttributes() { + return mapper.getClinicalAttributes(); } @Override - public List getCaseListDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + public List getCaseListDataCounts(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); } - @Override - public List getClinicalDataAttributeNames(ClinicalAttributeDataSource clinicalAttributeDataSource, ClinicalAttributeDataType dataType) { - String tableName = clinicalAttributeDataSource.getValue().toLowerCase() + "_clinical_attribute_" + dataType.getValue().toLowerCase() + "_mv"; - return mapper.getClinicalAttributeNames(tableName); - } private boolean shouldApplyPatientIdFilters(CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { return categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientCategoricalClinicalDataFilters().isEmpty() || categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters() != null && !categorizedClinicalDataCountFilter.getPatientNumericalClinicalDataFilters().isEmpty(); } - public List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + @Override + public List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getSampleClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds); } - public List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + @Override + public List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getPatientClinicalDataFromStudyViewFilter(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), attributeIds); } - public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { + @Override + public Map getTotalProfiledCounts(StudyViewFilter studyViewFilter, String alterationType) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getTotalProfiledCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); } @Override - public int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + public int getFilteredSamplesCount(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getFilteredSamplesCount(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter)); } @Override - public Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { + public Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, String alterationType) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType) .stream() @@ -128,9 +132,62 @@ public Map> getMatchingGenePanelIds(StudyViewFilter studyVie } @Override - public int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { + public int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, String alterationType) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); return mapper.getTotalProfiledCountByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); } + private void buildClinicalAttributeNameMap() { + clinicalAttributesMap = this.getClinicalAttributes() + .stream() + .collect(Collectors.groupingBy(ca -> ca.getPatientAttribute() ? ClinicalAttributeDataSource.PATIENT : ClinicalAttributeDataSource.SAMPLE)); + } + + private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) { + if (clinicalAttributesMap.isEmpty()) { + buildClinicalAttributeNameMap(); + } + + if (studyViewFilter.getClinicalDataFilters() == null) { + return CategorizedClinicalDataCountFilter.getBuilder().build(); + } + + List patientCategoricalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.PATIENT) + .stream().filter(ca -> ca.getDatatype().equals("STRING")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + List patientNumericalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.PATIENT) + .stream().filter(ca -> ca.getDatatype().equals("NUMBER")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + List sampleCategoricalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.SAMPLE) + .stream().filter(ca -> ca.getDatatype().equals("STRING")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + List sampleNumericalAttributes = clinicalAttributesMap.get(ClinicalAttributeDataSource.SAMPLE) + .stream().filter(ca -> ca.getDatatype().equals("NUMBER")) + .map(ClinicalAttribute::getAttrId) + .toList(); + + return CategorizedClinicalDataCountFilter.getBuilder() + .setPatientCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters() + .stream().filter(clinicalDataFilter -> patientCategoricalAttributes.contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .setPatientNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> patientNumericalAttributes.contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .setSampleCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> sampleCategoricalAttributes.contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .setSampleNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() + .filter(clinicalDataFilter -> sampleNumericalAttributes.contains(clinicalDataFilter.getAttributeId())) + .collect(Collectors.toList())) + .build(); + } + + } \ No newline at end of file diff --git a/src/main/java/org/cbioportal/service/AlterationCountService.java b/src/main/java/org/cbioportal/service/AlterationCountService.java index 2e02a4be518..289806033ba 100644 --- a/src/main/java/org/cbioportal/service/AlterationCountService.java +++ b/src/main/java/org/cbioportal/service/AlterationCountService.java @@ -77,9 +77,9 @@ Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); - List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getMutatedGenes(StudyViewFilter studyViewFilter); + List getCnaGenes(StudyViewFilter studyViewFilter); - List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter); } diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index c1770e41e18..67079dc5b63 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -16,7 +16,6 @@ import org.cbioportal.persistence.StudyViewRepository; import org.cbioportal.service.AlterationCountService; import org.cbioportal.service.util.AlterationEnrichmentUtil; -import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.lang.NonNull; @@ -256,33 +255,30 @@ public Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - var alterationCountByGenes = studyViewRepository.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter); - return populateAlterationCounts(alterationCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED); + public List getMutatedGenes(StudyViewFilter studyViewFilter) { + var alterationCountByGenes = studyViewRepository.getMutatedGenes(studyViewFilter); + return populateAlterationCounts(alterationCountByGenes, studyViewFilter, AlterationType.MUTATION_EXTENDED); } - public List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - var copyNumberCountByGenes = studyViewRepository.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); - return populateAlterationCounts(copyNumberCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.COPY_NUMBER_ALTERATION); + public List getCnaGenes(StudyViewFilter studyViewFilter) { + var copyNumberCountByGenes = studyViewRepository.getCnaGenes(studyViewFilter); + return populateAlterationCounts(copyNumberCountByGenes, studyViewFilter, AlterationType.COPY_NUMBER_ALTERATION); } @Override - public List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { - var alterationCountByGenes = studyViewRepository.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter); - return populateAlterationCounts(alterationCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.STRUCTURAL_VARIANT); + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter) { + var alterationCountByGenes = studyViewRepository.getStructuralVariantGenes(studyViewFilter); + return populateAlterationCounts(alterationCountByGenes, studyViewFilter, AlterationType.STRUCTURAL_VARIANT); } private < T extends AlterationCountByGene> List populateAlterationCounts(@NonNull List alterationCounts, @NonNull StudyViewFilter studyViewFilter, - @NonNull CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, @NonNull AlterationType alterationType) { var updatedAlterationCounts = alterationCounts.stream().map(SerializationUtils::clone).toList(); var profiledCountsMap = studyViewRepository.getTotalProfiledCounts(studyViewFilter, - categorizedClinicalDataCountFilter, alterationType.toString()); - var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, alterationType.toString()); - var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, - categorizedClinicalDataCountFilter, alterationType.toString()); + var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, alterationType.toString()); + var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, alterationType.toString()); updatedAlterationCounts.parallelStream() .forEach(alterationCountByGene -> { diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 6a48daf03ad..5202b2589c6 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -9,26 +9,19 @@ import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.persistence.StudyViewRepository; -import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; -import org.cbioportal.persistence.enums.ClinicalAttributeDataType; import org.cbioportal.service.AlterationCountService; import org.cbioportal.service.StudyViewColumnarService; -import org.cbioportal.web.parameter.CategorizedClinicalDataCountFilter; import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.cache.annotation.Cacheable; import org.springframework.stereotype.Service; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.stream.Collectors; @Service public class StudyViewColumnarServiceImpl implements StudyViewColumnarService { - private final Map> clinicalAttributeNameMap = new HashMap<>(); - private final StudyViewRepository studyViewRepository; @@ -43,40 +36,31 @@ public StudyViewColumnarServiceImpl(StudyViewRepository studyViewRepository, Alt @Cacheable(cacheResolver = "generalRepositoryCacheResolver", condition = "@cacheEnabledConfig.getEnabled()") @Override public List getFilteredSamples(StudyViewFilter studyViewFilter) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return studyViewRepository.getFilteredSamples(studyViewFilter, categorizedClinicalDataCountFilter); + return studyViewRepository.getFilteredSamples(studyViewFilter); } @Override public List getMutatedGenes(StudyViewFilter studyViewFilter) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return alterationCountService.getMutatedGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return alterationCountService.getMutatedGenes(studyViewFilter); } @Override public List getGenomicDataCounts(StudyViewFilter studyViewFilter) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return studyViewRepository.getGenomicDataCounts(studyViewFilter, categorizedClinicalDataCountFilter); + return studyViewRepository.getGenomicDataCounts(studyViewFilter); } public List getCnaGenes(StudyViewFilter studyViewFilter) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return alterationCountService.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return alterationCountService.getCnaGenes(studyViewFilter); } @Override public List getStructuralVariantGenes(StudyViewFilter studyViewFilter) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return alterationCountService.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return alterationCountService.getStructuralVariantGenes(studyViewFilter); } @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - - - - return studyViewRepository.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, filteredAttributes) + return studyViewRepository.getClinicalDataCounts(studyViewFilter, filteredAttributes) .stream().collect(Collectors.groupingBy(ClinicalDataCount::getAttributeId)) .entrySet().parallelStream().map(e -> { ClinicalDataCountItem item = new ClinicalDataCountItem(); @@ -88,60 +72,18 @@ public List getClinicalDataCounts(StudyViewFilter studyVi @Override public List getCaseListDataCounts(StudyViewFilter studyViewFilter) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return studyViewRepository.getCaseListDataCounts(studyViewFilter, categorizedClinicalDataCountFilter); - } - - private CategorizedClinicalDataCountFilter extractClinicalDataCountFilters(final StudyViewFilter studyViewFilter) { - if (clinicalAttributeNameMap.isEmpty()) { - buildClinicalAttributeNameMap(); - } - - if (studyViewFilter.getClinicalDataFilters() == null) { - return CategorizedClinicalDataCountFilter.getBuilder().build(); - } - - final String patientCategoricalKey = ClinicalAttributeDataSource.PATIENT.getValue() + ClinicalAttributeDataType.CATEGORICAL.getValue(); - final String patientNumericKey = ClinicalAttributeDataSource.PATIENT.getValue() + ClinicalAttributeDataType.NUMERIC.getValue(); - final String sampleCategoricalKey = ClinicalAttributeDataSource.SAMPLE.getValue() + ClinicalAttributeDataType.CATEGORICAL.getValue(); - final String sampleNumericKey = ClinicalAttributeDataSource.SAMPLE.getValue() + ClinicalAttributeDataType.NUMERIC.getValue(); - - return CategorizedClinicalDataCountFilter.getBuilder() - .setPatientCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters() - .stream().filter(clinicalDataFilter -> clinicalAttributeNameMap.get(patientCategoricalKey).contains(clinicalDataFilter.getAttributeId())) - .collect(Collectors.toList())) - .setPatientNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() - .filter(clinicalDataFilter -> clinicalAttributeNameMap.get(patientNumericKey).contains(clinicalDataFilter.getAttributeId())) - .collect(Collectors.toList())) - .setSampleCategoricalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() - .filter(clinicalDataFilter -> clinicalAttributeNameMap.get(sampleCategoricalKey).contains(clinicalDataFilter.getAttributeId())) - .collect(Collectors.toList())) - .setSampleNumericalClinicalDataFilters(studyViewFilter.getClinicalDataFilters().stream() - .filter(clinicalDataFilter -> clinicalAttributeNameMap.get(sampleNumericKey).contains(clinicalDataFilter.getAttributeId())) - .collect(Collectors.toList())) - .build(); - } - - private void buildClinicalAttributeNameMap() { - List clinicalAttributeDataSources = List.of(ClinicalAttributeDataSource.values()); - for (ClinicalAttributeDataSource clinicalAttributeDataSource : clinicalAttributeDataSources) { - String categoricalKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.CATEGORICAL; - String numericKey = clinicalAttributeDataSource.getValue() + ClinicalAttributeDataType.NUMERIC; - clinicalAttributeNameMap.put(categoricalKey, studyViewRepository.getClinicalDataAttributeNames(clinicalAttributeDataSource, ClinicalAttributeDataType.CATEGORICAL)); - clinicalAttributeNameMap.put(numericKey, studyViewRepository.getClinicalDataAttributeNames(clinicalAttributeDataSource, ClinicalAttributeDataType.NUMERIC)); - } + return studyViewRepository.getCaseListDataCounts(studyViewFilter); } + @Override public List getPatientClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return studyViewRepository.getPatientClinicalData(studyViewFilter, attributeIds, categorizedClinicalDataCountFilter); + return studyViewRepository.getPatientClinicalData(studyViewFilter, attributeIds); } @Override public List getSampleClinicalData(StudyViewFilter studyViewFilter, List attributeIds) { - CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); - return studyViewRepository.getSampleClinicalData(studyViewFilter, attributeIds, categorizedClinicalDataCountFilter); + return studyViewRepository.getSampleClinicalData(studyViewFilter, attributeIds); } diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index 29d52a9e7df..7932a1841d5 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -91,7 +91,7 @@ - + @@ -115,7 +115,7 @@ - + @@ -135,22 +135,24 @@ SELECT ${unique_id} FROM ${table_name} - WHERE attribute_name = '${clinicalDataFilter.attributeId}' + WHERE attribute_name = '${clinicalDataFilter.attributeId}' AND + type='${type}' AND + (attribute_value = '' OR match(attribute_value, '^[\d\.]+$')) - AND attribute_value = -1000000 + AND attribute_value = '' - AND abs(minus(attribute_value, ${dataFilterValue.start})) < exp(-11) + AND abs(minus(cast(attribute_value as float), ${dataFilterValue.start})) < exp(-11) - AND attribute_value > ${dataFilterValue.start} + AND cast(attribute_value as float) > ${dataFilterValue.start} - AND attribute_value <= ${dataFilterValue.end} + AND cast(attribute_value as float) <= ${dataFilterValue.end} diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 9170b676872..b7d785e4991 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -167,31 +167,7 @@ GROUP BY s.cancer_study_identifier, sl.stable_id, sl.name; - - SELECT - attribute_name as attributeId, - CASE WHEN attribute_value = 'NULL' THEN 'NA' ELSE attribute_value END AS value, - Count(*) as count - FROM ${table_name_prefix}_clinical_attribute_categorical_view - - patient_unique_id IN ( - - INTERSECT - - ) - AND UPPER(attribute_value) NOT IN - - #{filteredAttributeValue} - - AND attribute_name IN - - #{attributeId} - - - GROUP BY attribute_name, - attribute_value - - + SELECT attribute_name as attributeId, @@ -250,20 +226,6 @@ value - - - - - - SELECT sample_unique_id FROM sample_derived @@ -280,10 +242,14 @@ - SELECT - DISTINCT(attribute_name) - FROM ${tableName}; + attr_id as attrId, + datatype as dataType, + patient_attribute as patientAttribute, + cs.cancer_study_identifier cancerStudyIdentifier + FROM clinical_attribute_meta cam + JOIN cancer_study cs on cs.cancer_study_id = cam.cancer_study_id From 14b7105f041e4dbc8c54d9c5d296ce038021dfe0 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Mon, 24 Jun 2024 16:59:08 -0400 Subject: [PATCH 6/7] Rfc80/fix patient level filtering for alteration counts endpoints (#10857) * Add patient_id column to genomic_event_derived * Update sql to convert list of patients to list of samples --- .../db-scripts/clickhouse/clickhouse.sql | 3 ++- .../clickhouse/clickhouse_migration.sql | 16 ++++++++++++---- .../mybatisclickhouse/StudyViewMapper.xml | 15 ++++++++++++--- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index 828c358d9f1..4e9897566bd 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -17,7 +17,8 @@ CREATE TABLE IF NOT EXISTS genomic_event_derived driver_tiers_filter LowCardinality(String), cna_alteration Nullable(Int8), cna_cytoband String, - sv_event_info String + sv_event_info String, + patient_unique_id String ) ENGINE = MergeTree ORDER BY ( variant_type, hugo_gene_symbol, genetic_profile_stable_id, sample_unique_id); diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql index 55c8e48d9b3..76223de462d 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql @@ -40,7 +40,8 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_uniqu 'NA' AS drivet_tiers_filter, NULL AS cna_alteration, '' AS cna_cytoband, - '' AS sv_event_info + '' AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id FROM mutation INNER JOIN mutation_event AS me ON mutation.mutation_event_id = me.mutation_event_id INNER JOIN sample_profile sp @@ -49,6 +50,7 @@ FROM mutation LEFT JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id INNER JOIN sample ON mutation.sample_id = sample.internal_id + INNER JOIN patient on sample.patient_id = patient.internal_id LEFT JOIN gene ON mutation.entrez_gene_id = gene.entrez_gene_id UNION ALL SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, @@ -64,7 +66,8 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_uniqu 'NA' AS drivet_tiers_filter, ce.alteration AS cna_alteration, rgg.cytoband AS cna_cytoband, - '' AS sv_event_info + '' AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id FROM cna_event ce INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id @@ -72,6 +75,7 @@ FROM cna_event ce INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id INNER JOIN sample ON sce.sample_id = sample.internal_id + INNER JOIN patient on sample.patient_id = patient.internal_id INNER JOIN gene ON ce.entrez_gene_id = gene.entrez_gene_id INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id UNION ALL @@ -88,10 +92,12 @@ SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, 'NA' AS drivet_tiers_filter, NULL AS cna_alteration, '' AS cna_cytoband, - event_info AS sv_event_info + event_info AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id FROM structural_variant sv INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN patient on s.patient_id = patient.internal_id INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id @@ -110,10 +116,12 @@ SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, 'NA' AS drivet_tiers_filter, NULL AS cna_alteration, '' AS cna_cytoband, - event_info AS sv_event_info + event_info AS sv_event_info, + concat(cs.cancer_study_identifier, '_', patient.stable_id) AS patient_unique_id FROM structural_variant sv INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id INNER JOIN sample s ON sv.sample_id = s.internal_id + INNER JOIN patient on s.patient_id = patient.internal_id INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index b7d785e4991..078afabea47 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -281,7 +281,11 @@ stgp.alteration_type = '${alterationType}' AND - + sample_unique_id IN ( + + INTERSECT + + ) @@ -306,8 +310,13 @@ select distinct gene_panel_id from sample_to_gene_panel - alteration_type = '${alterationType}' AND - + alteration_type = '${alterationType}' + AND + sample_unique_id IN ( + + INTERSECT + + ) ) From 2b87a263cbd4781da87041be84b0c6d89a41efc2 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Tue, 25 Jun 2024 09:59:52 -0400 Subject: [PATCH 7/7] :bug: Fix totalCount calculation when WES not available (#10858) --- .../service/impl/AlterationCountServiceImpl.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index 67079dc5b63..29f94f0fd0f 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -19,7 +19,6 @@ import org.cbioportal.web.parameter.StudyViewFilter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.lang.NonNull; -import org.springframework.lang.Nullable; import org.springframework.stereotype.Service; import java.util.ArrayList; @@ -286,7 +285,7 @@ private < T extends AlterationCountByGene> List populateAlterationCounts(@Non Set matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? matchingGenePanelIdsMap.get(hugoGeneSymbol) : Collections.emptySet(); - int totalProfiledCount = getTotalProfiledCount(alterationCountByGene.getHugoGeneSymbol(), + int totalProfiledCount = getTotalProfiledCount(hugoGeneSymbol, profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds); alterationCountByGene.setNumberOfProfiledCases(totalProfiledCount); @@ -297,7 +296,7 @@ private < T extends AlterationCountByGene> List populateAlterationCounts(@Non } private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map profiledCountsMap, - int profiledCountWithoutGenePanelData, @Nullable Set matchingGenePanelIds) { + int profiledCountWithoutGenePanelData, @NonNull Set matchingGenePanelIds) { int totalProfiledCount = profiledCountWithoutGenePanelData; if (hasGenePanelData(matchingGenePanelIds) && profiledCountsMap.containsKey(hugoGeneSymbol)) { @@ -306,9 +305,9 @@ private int getTotalProfiledCount(@NonNull String hugoGeneSymbol, @NonNull Map matchingGenePanelIds) { - return matchingGenePanelIds != null && matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING) - && matchingGenePanelIds.size() > 1; + private boolean hasGenePanelData(@NonNull Set matchingGenePanelIds) { + return matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING) + && matchingGenePanelIds.size() > 1 || !matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING) && !matchingGenePanelIds.isEmpty(); } private Pair, Long> getAlterationGeneCounts(