From 505847b19802785b582376a8210e07c0d6d3a549 Mon Sep 17 00:00:00 2001 From: Charles Haynes <33608920+haynescd@users.noreply.github.com> Date: Mon, 24 Jun 2024 12:36:45 -0400 Subject: [PATCH] :sparkles: Add StructuralVariant-genes endpoint (#10854) * :sparkles: Add StructuralVariant-genes endpoint * Fix sonar issues * Update MatchingGenePanel request to return list * Create and use sample_derive * Update where sample_derived is stored to fix unit test --- .../persistence/StudyViewRepository.java | 6 ++- .../mybatisclickhouse/StudyViewMapper.java | 6 ++- .../StudyViewMyBatisRepository.java | 17 +++++- .../typehandler/GenePanelIdsTypeHandler.java | 39 -------------- .../service/AlterationCountService.java | 2 + .../service/StudyViewColumnarService.java | 1 + .../impl/AlterationCountServiceImpl.java | 16 ++++-- .../impl/StudyViewColumnarServiceImpl.java | 6 +++ .../StudyViewColumnStoreController.java | 16 ++++++ .../db-scripts/clickhouse/clickhouse.sql | 16 +++++- .../clickhouse/clickhouse_migration.sql | 42 ++++++++++----- .../db-scripts/clickhouse/derived_tables.sql | 17 +++--- .../StudyViewFilterMapper.xml | 12 ++--- .../mybatisclickhouse/StudyViewMapper.xml | 52 ++++++++++--------- 14 files changed, 146 insertions(+), 102 deletions(-) delete mode 100644 src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 3ff72d01829..f866cf1442c 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -3,8 +3,8 @@ import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; -import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; +import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.Sample; import org.cbioportal.persistence.enums.ClinicalAttributeDataSource; import org.cbioportal.persistence.enums.ClinicalAttributeDataType; @@ -13,6 +13,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; public interface StudyViewRepository { List getFilteredSamples(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); @@ -24,6 +25,7 @@ public interface StudyViewRepository { List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes); @@ -39,7 +41,7 @@ public interface StudyViewRepository { int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); - Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); + Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); int getTotalProfiledCountsByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 8aa0d71321e..7a279943289 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -24,6 +24,9 @@ List getMutatedGenes(StudyViewFilter studyViewFilter, Cat List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); + + List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, + boolean applyPatientIdFilters, AlterationFilterHelper alterationFilterHelper); List getPatientClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, List attributeIds, List filteredAttributeValues); @@ -45,8 +48,7 @@ List getClinicalDataCounts(StudyViewFilter studyViewFilter, C int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters); - @MapKey("hugoGeneSymbol") - Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); + List getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); int getTotalProfiledCountByAlterationType(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, boolean applyPatientIdFilters, String alterationType); } diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 309c26b087a..6dbc760d097 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -2,6 +2,7 @@ import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.GenePanelToGene; import org.cbioportal.model.GenomicDataCount; import org.cbioportal.model.CopyNumberCountByGene; import org.cbioportal.model.Sample; @@ -16,6 +17,8 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; @Repository public class StudyViewMyBatisRepository implements StudyViewRepository { @@ -47,6 +50,13 @@ public List getCnaGenes(StudyViewFilter studyViewFilter, AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); } + @Override + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + return mapper.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter, + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), + AlterationFilterHelper.build(studyViewFilter.getAlterationFilter())); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, List filteredAttributes) { return mapper.getClinicalDataCounts(studyViewFilter, categorizedClinicalDataCountFilter, shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), @@ -103,9 +113,12 @@ public int getFilteredSamplesCount(StudyViewFilter studyViewFilter, CategorizedC } @Override - public Map getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { + public Map> getMatchingGenePanelIds(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, String alterationType) { return mapper.getMatchingGenePanelIds(studyViewFilter, categorizedClinicalDataCountFilter, - shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType); + shouldApplyPatientIdFilters(categorizedClinicalDataCountFilter), alterationType) + .stream() + .collect(Collectors.groupingBy(GenePanelToGene::getHugoGeneSymbol, + Collectors.mapping(GenePanelToGene::getGenePanelId, Collectors.toSet()))); } @Override diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java deleted file mode 100644 index 397cdf01bd2..00000000000 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/typehandler/GenePanelIdsTypeHandler.java +++ /dev/null @@ -1,39 +0,0 @@ -package org.cbioportal.persistence.mybatisclickhouse.typehandler; - -import org.apache.ibatis.type.BaseTypeHandler; -import org.apache.ibatis.type.JdbcType; - -import java.sql.CallableStatement; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.HashSet; -import java.util.Set; -import java.util.Arrays; - -public class GenePanelIdsTypeHandler extends BaseTypeHandler> { - - @Override - public void setNonNullParameter(PreparedStatement ps, int i, Set parameter, JdbcType jdbcType) throws SQLException { - // Convert Set to array for storage (if needed) - throw new UnsupportedOperationException("Storage of GenePanelIds not supported"); - } - - @Override - public Set getNullableResult(ResultSet rs, String columnName) throws SQLException { - String[] array = (String[]) rs.getArray(columnName).getArray(); - return new HashSet<>(Arrays.asList(array)); - } - - @Override - public Set getNullableResult(ResultSet rs, int columnIndex) throws SQLException { - String[] array = (String[]) rs.getArray(columnIndex).getArray(); - return new HashSet<>(Arrays.asList(array)); - } - - @Override - public Set getNullableResult(CallableStatement cs, int columnIndex) throws SQLException { - String[] array = (String[]) cs.getArray(columnIndex).getArray(); - return new HashSet<>(Arrays.asList(array)); - } -} diff --git a/src/main/java/org/cbioportal/service/AlterationCountService.java b/src/main/java/org/cbioportal/service/AlterationCountService.java index 0b927e591e1..2e02a4be518 100644 --- a/src/main/java/org/cbioportal/service/AlterationCountService.java +++ b/src/main/java/org/cbioportal/service/AlterationCountService.java @@ -80,4 +80,6 @@ Pair, Long> getPatientCnaGeneCounts(List getMutatedGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); List getCnaGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter); + } diff --git a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java index a8966d8fc2a..91ed7b8c647 100644 --- a/src/main/java/org/cbioportal/service/StudyViewColumnarService.java +++ b/src/main/java/org/cbioportal/service/StudyViewColumnarService.java @@ -16,6 +16,7 @@ public interface StudyViewColumnarService { List getMutatedGenes(StudyViewFilter interceptedStudyViewFilter); List getCnaGenes(StudyViewFilter interceptedStudyViewFilter); + List getStructuralVariantGenes(StudyViewFilter studyViewFilter); List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes); diff --git a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java index 7a57276013f..c1770e41e18 100644 --- a/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/AlterationCountServiceImpl.java @@ -265,7 +265,13 @@ public List getCnaGenes(StudyViewFilter studyViewFilter, var copyNumberCountByGenes = studyViewRepository.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); return populateAlterationCounts(copyNumberCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.COPY_NUMBER_ALTERATION); } - + + @Override + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter, CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter) { + var alterationCountByGenes = studyViewRepository.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter); + return populateAlterationCounts(alterationCountByGenes, studyViewFilter, categorizedClinicalDataCountFilter, AlterationType.STRUCTURAL_VARIANT); + } + private < T extends AlterationCountByGene> List populateAlterationCounts(@NonNull List alterationCounts, @NonNull StudyViewFilter studyViewFilter, @NonNull CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter, @@ -276,13 +282,13 @@ private < T extends AlterationCountByGene> List populateAlterationCounts(@Non alterationType.toString()); var profiledCountWithoutGenePanelData = studyViewRepository.getTotalProfiledCountsByAlterationType(studyViewFilter, categorizedClinicalDataCountFilter, alterationType.toString()); var matchingGenePanelIdsMap = studyViewRepository.getMatchingGenePanelIds(studyViewFilter, - categorizedClinicalDataCountFilter, AlterationType.MUTATION_EXTENDED.toString()); + categorizedClinicalDataCountFilter, alterationType.toString()); - updatedAlterationCounts + updatedAlterationCounts.parallelStream() .forEach(alterationCountByGene -> { String hugoGeneSymbol = alterationCountByGene.getHugoGeneSymbol(); - var matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? - matchingGenePanelIdsMap.get(hugoGeneSymbol).getMatchingGenePanelIds() : null; + Set matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ? + matchingGenePanelIdsMap.get(hugoGeneSymbol) : Collections.emptySet(); int totalProfiledCount = getTotalProfiledCount(alterationCountByGene.getHugoGeneSymbol(), profiledCountsMap, profiledCountWithoutGenePanelData, matchingGenePanelIds); diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 42c59686090..83916d0c369 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -61,6 +61,12 @@ public List getCnaGenes(StudyViewFilter studyViewFilter) return alterationCountService.getCnaGenes(studyViewFilter, categorizedClinicalDataCountFilter); } + @Override + public List getStructuralVariantGenes(StudyViewFilter studyViewFilter) { + CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); + return alterationCountService.getStructuralVariantGenes(studyViewFilter, categorizedClinicalDataCountFilter); + } + @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { CategorizedClinicalDataCountFilter categorizedClinicalDataCountFilter = extractClinicalDataCountFilters(studyViewFilter); diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index cfba847c2c8..cf06ef4ae76 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -132,6 +132,22 @@ public ResponseEntity> fetchCnaGenes( ); } + @PostMapping(value = "/column-store/structuralvariant-genes/fetch", + consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(description = "Fetch structural variant genes by study view filter") + @ApiResponse(responseCode = "200", description = "OK", + content = @Content(array = @ArraySchema(schema = @Schema(implementation = AlterationCountByGene.class)))) + public ResponseEntity> fetchStructuralVariantGenes( + @Parameter(required = true, description = "Study view filter") + @Valid @RequestBody(required = false) StudyViewFilter studyViewFilter, + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. This attribute is needed for the @PreAuthorize tag above. + @RequestAttribute(required = false, value = "involvedCancerStudies") Collection involvedCancerStudies, + @Parameter(hidden = true) // prevent reference to this attribute in the swagger-ui interface. + @Valid @RequestAttribute(required = false, value = "interceptedStudyViewFilter") StudyViewFilter interceptedStudyViewFilter + ) { + return new ResponseEntity<>(studyViewColumnarService.getStructuralVariantGenes(interceptedStudyViewFilter), HttpStatus.OK); + } + @PreAuthorize("hasPermission(#involvedCancerStudies, 'Collection', T(org.cbioportal.utils.security.AccessLevel).READ)") @PostMapping(value = "/column-store/clinical-data-counts/fetch", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse.sql b/src/main/resources/db-scripts/clickhouse/clickhouse.sql index a6c749d9e20..828c358d9f1 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse.sql @@ -35,4 +35,18 @@ CREATE TABLE gene_panel_to_gene gene_panel_id LowCardinality(String), gene String ) ENGINE = MergeTree() -ORDER BY (gene_panel_id); \ No newline at end of file +ORDER BY (gene_panel_id); + +CREATE TABLE sample_derived +( + sample_unique_id String, + sample_unique_id_base64 String, + sample_stable_id String, + patient_unique_id String, + patient_unique_id_base64 String, + patient_stable_id String, + cancer_study_identifier LowCardinality(String), + internal_id Int +) + ENGINE = MergeTree + ORDER BY (cancer_study_identifier, sample_unique_id); \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql index 2a508ee1584..55c8e48d9b3 100644 --- a/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql +++ b/src/main/resources/db-scripts/clickhouse/clickhouse_migration.sql @@ -29,7 +29,7 @@ WHERE gene.entrez_gene_id > 0; INSERT INTO genomic_event_derived SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, gene.hugo_gene_symbol AS hugo_gene_symbol, - gp.stable_id AS gene_panel_stable_id, + ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, g.stable_id AS genetic_profile_stable_id, 'mutation' AS variant_type, @@ -53,7 +53,7 @@ FROM mutation UNION ALL SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, gene.hugo_gene_symbol AS hugo_gene_symbol, - gp.stable_id AS gene_panel_stable_id, + ifNull(gp.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, g.stable_id AS genetic_profile_stable_id, 'cna' AS variant_type, @@ -68,7 +68,7 @@ SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_uniqu FROM cna_event ce INNER JOIN sample_cna_event sce ON ce.cna_event_id = sce.cna_event_id INNER JOIN sample_profile sp ON sce.sample_id = sp.sample_id AND sce.genetic_profile_id = sp.genetic_profile_id - INNER JOIN gene_panel gp ON sp.panel_id = gp.internal_id + LEFT JOIN gene_panel gp ON sp.panel_id = gp.internal_id INNER JOIN genetic_profile g ON sp.genetic_profile_id = g.genetic_profile_id INNER JOIN cancer_study cs ON g.cancer_study_id = cs.cancer_study_id INNER JOIN sample ON sce.sample_id = sample.internal_id @@ -76,8 +76,8 @@ FROM cna_event ce INNER JOIN reference_genome_gene rgg ON rgg.entrez_gene_id = ce.entrez_gene_id UNION ALL SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, - gene2.hugo_gene_symbol AS hugo_gene_symbol, - gene_panel.stable_id AS gene_panel_stable_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, gp.stable_id AS genetic_profile_stable_id, 'structural_variant' AS variant_type, @@ -93,13 +93,13 @@ FROM structural_variant sv INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id INNER JOIN sample s ON sv.sample_id = s.internal_id INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene2 ON sv.site2_entrez_gene_id = gene2.entrez_gene_id - INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id + INNER JOIN gene ON sv.site1_entrez_gene_id = gene.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id + LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id UNION ALL SELECT concat(cs.cancer_study_identifier, '_', s.stable_id) AS sample_unique_id, - gene1.hugo_gene_symbol AS hugo_gene_symbol, - gene_panel.stable_id AS gene_panel_stable_id, + gene.hugo_gene_symbol AS hugo_gene_symbol, + ifNull(gene_panel.stable_id, 'WES') AS gene_panel_stable_id, cs.cancer_study_identifier AS cancer_study_identifier, gp.stable_id AS genetic_profile_stable_id, 'structural_variant' AS variant_type, @@ -115,6 +115,22 @@ FROM structural_variant sv INNER JOIN genetic_profile gp ON sv.genetic_profile_id = gp.genetic_profile_id INNER JOIN sample s ON sv.sample_id = s.internal_id INNER JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - INNER JOIN gene gene1 ON sv.site1_entrez_gene_id = gene1.entrez_gene_id - INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id - INNER JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id; \ No newline at end of file + INNER JOIN gene ON sv.site2_entrez_gene_id = gene.entrez_gene_id + INNER JOIN sample_profile ON s.internal_id = sample_profile.sample_id AND sample_profile.genetic_profile_id = sv.genetic_profile_id + LEFT JOIN gene_panel ON sample_profile.panel_id = gene_panel.internal_id +WHERE + sv.site2_entrez_gene_id != sv.site1_entrez_gene_id + OR sv.site1_entrez_gene_id IS NULL; + +INSERT INTO sample_derived +SELECT concat(cs.cancer_study_identifier, '_', sample.stable_id) AS sample_unique_id, + base64Encode(sample.stable_id) AS sample_unique_id_base64, + sample.stable_id AS sample_stable_id, + concat(cs.cancer_study_identifier, '_', p.stable_id) AS patient_unique_id, + p.stable_id AS patient_stable_id, + base64Encode(p.stable_id) AS patient_unique_id_base64, + cs.cancer_study_identifier AS cancer_study_identifier, + sample.internal_id AS internal_id +FROM sample + INNER JOIN patient AS p ON sample.patient_id = p.internal_id + INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id \ No newline at end of file diff --git a/src/main/resources/db-scripts/clickhouse/derived_tables.sql b/src/main/resources/db-scripts/clickhouse/derived_tables.sql index 73a35cd4f69..0e982ed106f 100644 --- a/src/main/resources/db-scripts/clickhouse/derived_tables.sql +++ b/src/main/resources/db-scripts/clickhouse/derived_tables.sql @@ -20,12 +20,12 @@ SELECT sm.sample_unique_id AS sample_unique_id, csamp.attr_value AS attribute_value, cs.cancer_study_identifier AS cancer_study_identifier, 'sample' AS type -FROM sling_db_2024_05_23_original.sample_mv AS sm - INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs +FROM sample_mv AS sm + INNER JOIN cancer_study AS cs ON sm.cancer_study_identifier = cs.cancer_study_identifier - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam + FULL OUTER JOIN clinical_attribute_meta AS cam ON cs.cancer_study_id = cam.cancer_study_id - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_sample AS csamp + FULL OUTER JOIN clinical_sample AS csamp ON (sm.internal_id = csamp.internal_id) AND (csamp.attr_id = cam.attr_id) WHERE cam.patient_attribute = 0; @@ -37,10 +37,11 @@ SELECT NULL AS sample_unique_id, clinpat.attr_value AS attribute_value, cs.cancer_study_identifier AS cancer_study_identifier, 'patient' AS type -FROM sling_db_2024_05_23_original.patient AS p - INNER JOIN sling_db_2024_05_23_original.cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_attribute_meta AS cam +FROM patient AS p + INNER JOIN cancer_study AS cs ON p.cancer_study_id = cs.cancer_study_id + FULL OUTER JOIN clinical_attribute_meta AS cam ON cs.cancer_study_id = cam.cancer_study_id - FULL OUTER JOIN sling_db_2024_05_23_original.clinical_patient AS clinpat + FULL OUTER JOIN clinical_patient AS clinpat ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id) WHERE cam.patient_attribute = 1; + diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml index a4e8ef679d8..58cbfe48b0d 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewFilterMapper.xml @@ -5,9 +5,9 @@ - INTERSECT + INTERSECT SELECT sample_unique_id - FROM sample_mv + FROM sample_derived WHERE cancer_study_identifier IN #{studyId} @@ -19,13 +19,13 @@ INTERSECT SELECT * FROM ( - SELECT sample_mv.sample_unique_id + SELECT sample_derived.sample_unique_id FROM sample_profile JOIN genetic_profile gp ON sample_profile.genetic_profile_id = gp.genetic_profile_id JOIN cancer_study cs ON gp.cancer_study_id = cs.cancer_study_id - JOIN sample_mv on sample_profile.sample_id = sample_mv.internal_id + JOIN sample_derived on sample_profile.sample_id = sample_derived.internal_id - sample_mv.cancer_study_identifier IN + sample_derived.cancer_study_identifier IN #{studyId} @@ -43,7 +43,7 @@ INTERSECT SELECT sample_unique_id - FROM sample_mv + FROM sample_derived WHERE sample_unique_id IN '${sampleIdentifier.studyId}_${sampleIdentifier.sampleId}' diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index 73376a25f6d..55b8b529857 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -11,7 +11,7 @@ cancer_study_identifier as cancerStudyIdentifier, sample_unique_id_base64 as uniqueSampleKey, patient_unique_id_base64 as uniquePatientKey - FROM sample_mv + FROM sample_derived sample_unique_id IN ( ) @@ -23,7 +23,7 @@ - SELECT hugo_gene_symbol as hugoGeneSymbol, 1 as entrezGeneId, @@ -37,8 +37,7 @@ - GROUP BY hugo_gene_symbol - ORDER BY totalCount DESC; + GROUP BY hugo_gene_symbol; @@ -58,8 +57,21 @@ - GROUP BY hugo_gene_symbol, alteration, cytoband - ORDER BY totalCount DESC; + GROUP BY hugo_gene_symbol, alteration, cytoband; + + + @@ -127,16 +139,16 @@ @@ -176,7 +188,7 @@ sample_unique_id IN ( INTERSECT - SELECT sample_unique_id from sample_mv WHERE sample_mv.patient_unique_id IN + SELECT sample_unique_id from sample_derived WHERE sample_derived.patient_unique_id IN () ) @@ -239,7 +251,7 @@ SELECT sample_unique_id - FROM sample_mv + FROM sample_derived patient_unique_id IN () @@ -247,7 +259,7 @@ SELECT patient_unique_id - FROM sample_mv + FROM sample_derived sample_unique_id IN () @@ -296,17 +308,17 @@ - SELECT gene as hugoGeneSymbol, - arrayFilter(x -> x != '', groupArray( distinct gene_panel_id)) as matchingGenePanelIds + gene_panel_id as genePanelId FROM gene_panel_to_gene gene_panel_id in ( @@ -318,17 +330,9 @@ ) - GROUP BY gene; + GROUP BY gene, gene_panel_id; - - - - - - - - sample_unique_id IN ( )