Skip to content

Commit

Permalink
Complete CNA table
Browse files Browse the repository at this point in the history
  • Loading branch information
fuzhaoyuan committed Jul 29, 2024
1 parent ac2c28a commit 7429503
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 19 deletions.
10 changes: 5 additions & 5 deletions src/main/java/org/cbioportal/web/StudyViewController.java
Original file line number Diff line number Diff line change
Expand Up @@ -959,15 +959,15 @@ public ResponseEntity<List<GenomicDataCountItem>> fetchGenomicDataCounts(
@Parameter(required = true, description = "Intercepted Genomic Data Count Filter")
@Valid @RequestAttribute(required = false, value = "interceptedGenomicDataCountFilter") GenomicDataCountFilter interceptedGenomicDataCountFilter
) throws StudyNotFoundException {
List<GenomicDataFilter> gdFilters = interceptedGenomicDataCountFilter.getGenomicDataFilters();
List<GenomicDataFilter> genomicDataFilters = interceptedGenomicDataCountFilter.getGenomicDataFilters();
StudyViewFilter studyViewFilter = interceptedGenomicDataCountFilter.getStudyViewFilter();
// when there is only one filter, it means study view is doing a single chart filter operation
// remove filter from studyViewFilter to return all data counts
// the reason we do this is to make sure after chart get filtered, user can still see unselected portion of the chart
if (gdFilters.size() == 1) {
if (genomicDataFilters.size() == 1) {
studyViewFilterUtil.removeSelfFromGenomicDataFilter(
gdFilters.get(0).getHugoGeneSymbol(),
gdFilters.get(0).getProfileType(),
genomicDataFilters.get(0).getHugoGeneSymbol(),
genomicDataFilters.get(0).getProfileType(),
studyViewFilter);
}
List<SampleIdentifier> filteredSampleIdentifiers = studyViewFilterApplier.apply(studyViewFilter);
Expand All @@ -983,7 +983,7 @@ public ResponseEntity<List<GenomicDataCountItem>> fetchGenomicDataCounts(
List<GenomicDataCountItem> result = studyViewService.getCNAAlterationCountsByGeneSpecific(
studyIds,
sampleIds,
gdFilters.stream().map(gdFilter -> new Pair<>(gdFilter.getHugoGeneSymbol(), gdFilter.getProfileType())).collect(Collectors.toList()));
genomicDataFilters.stream().map(gdFilter -> new Pair<>(gdFilter.getHugoGeneSymbol(), gdFilter.getProfileType())).collect(Collectors.toList()));

return new ResponseEntity<>(result, HttpStatus.OK);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,7 @@ public ResponseEntity<List<GenomicDataCountItem>> fetchGenomicDataCounts(
studyViewFilter);
}

// This endpoint is CNA specific. The name choice of "genomic data" does not imply it support other genomic data types
List<GenomicDataCountItem> result = studyViewColumnarService.getCNACountsByGeneSpecific(studyViewFilter, genomicDataFilters);

return new ResponseEntity<>(result, HttpStatus.OK);
Expand Down
19 changes: 12 additions & 7 deletions src/main/resources/db-scripts/clickhouse/clickhouse.sql
Original file line number Diff line number Diff line change
Expand Up @@ -249,33 +249,38 @@ FROM patient AS p
ON (p.internal_id = clinpat.internal_id) AND (clinpat.attr_id = cam.attr_id)
WHERE cam.patient_attribute = 1;

CREATE TABLE IF NOT EXISTS genetic_alteration_derived
CREATE TABLE IF NOT EXISTS genetic_alteration_derived_cna
(
sample_unique_id String,
hugo_gene_symbol String,
cna_value Int8
profile_type String,
alteration_value Float32
)
ENGINE = MergeTree()
ORDER BY (sample_unique_id, hugo_gene_symbol);
ORDER BY (sample_unique_id, hugo_gene_symbol, profile_type);

INSERT INTO TABLE genetic_alteration_derived
INSERT INTO TABLE genetic_alteration_derived_cna
SELECT
sample_unique_id,
hugo_gene_symbol,
multiIf(value = '2', 2, value = '1', 1, value = '0', 0, value = '-1', -1, value = '-2', -2, 99) as cna_value
profile_type,
toFloat32(cna_value) as alteration_value
FROM
(SELECT
sample_id,
hugo_gene_symbol,
profile_type,
cna_value,
cancer_study_id
FROM
(SELECT
gp.cancer_study_id AS cancer_study_id,
g.hugo_gene_symbol AS hugo_gene_symbol,
arrayElement(splitByString('_', assumeNotNull(gp.stable_id)), -1) AS profile_type,
arrayMap(x -> (x = '' ? NULL : x), splitByString(',', assumeNotNull(trim(trailing ',' from ga.values)))) AS cna_value,
arrayMap(x -> (x = '' ? NULL : toInt64(x)), splitByString(',', assumeNotNull(trim(trailing ',' from gps.ordered_sample_list)))) AS sample_id
FROM genetic_profile gp
FROM
genetic_profile gp
JOIN genetic_profile_samples gps ON gp.genetic_profile_id = gps.genetic_profile_id
JOIN genetic_alteration ga ON gp.genetic_profile_id = ga.genetic_profile_id
JOIN gene g ON ga.genetic_entity_id = g.genetic_entity_id
Expand All @@ -292,4 +297,4 @@ OPTIMIZE TABLE gene_panel_to_gene_derived;
OPTIMIZE TABLE sample_derived;
OPTIMIZE TABLE genomic_event_derived;
OPTIMIZE TABLE clinical_data_derived;
OPTIMIZE TABLE genetic_alteration_derived;
OPTIMIZE TABLE genetic_alteration_derived_cna;
Original file line number Diff line number Diff line change
Expand Up @@ -235,17 +235,19 @@

<!-- for /genomic-data-counts/fetch - (returns GenomicDataCountItem objects) -->
<select id="getCNACounts" resultMap="GenomicDataCountItemResultMap">
<bind name="profileType" value="genomicDataFilters[0].profileType" />
WITH cna_query as (
SELECT
hugo_gene_symbol as hugoGeneSymbol,
'cna' as profileType,
#{profileType} as profileType,
multiIf(cna_value = 2, 'Amplified', cna_value = 1, 'Gained', cna_value = 0, 'Diploid', cna_value = -1,
'Heterozygously deleted', cna_value = -2, 'Homozygously deleted', 'NA') as label,
toString(cna_value) as value,
cast(count(*) as INTEGER) as count
FROM genetic_alteration_derived_test
FROM genetic_alteration_derived_cna
<where>
sample_unique_id IN (<include refid="sampleUniqueIdsFromStudyViewFilter"/>)
AND profile_type = #{profileType}
<foreach item="genomicDataFilter" collection="genomicDataFilters" open=" AND (" separator=" OR " close=")">
hugo_gene_symbol = #{genomicDataFilter.hugoGeneSymbol}
</foreach>
Expand All @@ -265,7 +267,7 @@
UNION ALL
SELECT
hugoGeneSymbol,
'cna' as profileType,
#{profileType},
'NA' as label,
'NA' as value,
cast(((SELECT * FROM (<include refid="getTotalSampleCount"/>)) - cna_count) as INTEGER) as count
Expand All @@ -275,9 +277,9 @@
<!-- for /mutation-data-counts/fetch (returns GenomicDataCountItem objects) mutation counts pie chart part -->
<select id="getMutationCounts">
WITH profiled_count as (
SELECT count(distinct stgp.sample_unique_id)
FROM sample_to_gene_panel stgp
JOIN gene_panel_to_gene gpg ON stgp.gene_panel_id = gpg.gene_panel_id
SELECT count(distinct sgp.sample_unique_id)
FROM sample_to_gene_panel_derived sgp
JOIN gene_panel_to_gene_derived gpg ON sgp.gene_panel_id = gpg.gene_panel_id
WHERE
sample_unique_id IN (<include refid="sampleUniqueIdsFromStudyViewFilter"/>)
AND gpg.gene = #{genomicDataFilter.hugoGeneSymbol}
Expand Down Expand Up @@ -310,7 +312,7 @@
sample_unique_id IN (<include refid="sampleUniqueIdsFromStudyViewFilter"/>)
AND variant_type = 'mutation'
<foreach item="genomicDataFilter" collection="genomicDataFilters" open=" AND (" separator=" OR " close=")">
hugo_gene_symbol = #{genomicDataFilter.hugoGeneSymbol}
hugo_gene_symbol = #{genomicDataFilter.hugoGeneSymbol}
</foreach>
</where>
GROUP BY
Expand Down

0 comments on commit 7429503

Please sign in to comment.