From 21593b20ca12ae3d96016c42dc8883b8a376973a Mon Sep 17 00:00:00 2001 From: Celine Pelletier Date: Tue, 29 Oct 2024 13:01:49 -0400 Subject: [PATCH] feat: SJIP-1059 add endpoint to check genes in trasncriptomics data --- src/app.test.ts | 97 ++++++++++++++++++++- src/app.ts | 16 +++- src/endpoints/transcriptomics/index.test.ts | 78 ++++++++++++++++- src/endpoints/transcriptomics/index.ts | 49 ++++++++++- src/endpoints/transcriptomics/types.ts | 16 ++++ 5 files changed, 247 insertions(+), 9 deletions(-) diff --git a/src/app.test.ts b/src/app.test.ts index c853ce8..0e80d97 100644 --- a/src/app.test.ts +++ b/src/app.test.ts @@ -16,7 +16,13 @@ import { } from './endpoints/sets/setsFeature'; import { Set, UpdateSetContentBody, UpdateSetTagBody } from './endpoints/sets/setsTypes'; import { getStatistics, getStudiesStatistics, Statistics } from './endpoints/statistics'; -import { checkSampleIdsAndGene, fetchDiffGeneExp, fetchFacets, fetchSampleGeneExp } from './endpoints/transcriptomics'; +import { + checkGenesExist, + checkSampleIdsAndGene, + fetchDiffGeneExp, + fetchFacets, + fetchSampleGeneExp, +} from './endpoints/transcriptomics'; import { DiffGeneExpVolcano, Facets as TranscriptomicsFacets, @@ -745,8 +751,8 @@ describe('Express app (without Arranger)', () => { .expect(403)); it('should return 200 if Authorization header contains valid token and no error occurs', async () => { - const sample_ids = ['bs-aa000aaa', 'bs-bbbb11b1']; - (checkSampleIdsAndGene as jest.Mock).mockImplementation(() => sample_ids); + const sampleIds = ['bs-aa000aaa', 'bs-bbbb11b1']; + (checkSampleIdsAndGene as jest.Mock).mockImplementation(() => sampleIds); const token = getToken(); @@ -755,7 +761,7 @@ describe('Express app (without Arranger)', () => { .set('Content-type', 'application/json') .set({ Authorization: `Bearer ${token}` }) .send(requestBody) - .expect(200, sample_ids); + .expect(200, sampleIds); expect((checkSampleIdsAndGene as jest.Mock).mock.calls.length).toEqual(1); expect((checkSampleIdsAndGene as jest.Mock).mock.calls[0][0]).toEqual(requestBody.sample_ids); expect((checkSampleIdsAndGene as jest.Mock).mock.calls[0][1]).toEqual(requestBody.ensembl_gene_id); @@ -780,4 +786,87 @@ describe('Express app (without Arranger)', () => { expect((checkSampleIdsAndGene as jest.Mock).mock.calls[0][1]).toEqual(requestBody.ensembl_gene_id); }); }); + + describe('POST /transcriptomics/checkGenesExist', () => { + beforeEach(() => { + (checkGenesExist as jest.Mock).mockReset(); + }); + + const requestBody = { + genes: + 'CYB5R1,TBCA,TOMM5,NRXN2,ENSG00000163462.18,ENSG00000211592,ENSG000002137410,FUT7,AL139424,ENSG00000204882.4', + }; + + it('should return 403 if no Authorization header', () => + request(app) + .post('/transcriptomics/checkGenesExist') + .set('Content-type', 'application/json') + .send(requestBody) + .expect(403)); + + it('should return 200 if Authorization header contains valid token and no error occurs', async () => { + const matchedGenes = [ + { + gene_symbol: 'GPR20', + ensembl_gene_id: 'ENSG00000204882.4', + }, + { + gene_symbol: 'TRIM46', + ensembl_gene_id: 'ENSG00000163462.18', + }, + ]; + (checkGenesExist as jest.Mock).mockImplementation(() => matchedGenes); + + const token = getToken(); + + await request(app) + .post('/transcriptomics/checkGenesExist') + .set('Content-type', 'application/json') + .set({ Authorization: `Bearer ${token}` }) + .send(requestBody) + .expect(200, matchedGenes); + expect((checkGenesExist as jest.Mock).mock.calls.length).toEqual(1); + expect((checkGenesExist as jest.Mock).mock.calls[0][0]).toEqual([ + 'CYB5R1', + 'TBCA', + 'TOMM5', + 'NRXN2', + 'ENSG00000163462.18', + 'ENSG00000211592', + 'ENSG000002137410', + 'FUT7', + 'AL139424', + 'ENSG00000204882.4', + ]); + }); + + it('should return 500 if Authorization header contains valid token but an error occurs', async () => { + const expectedError = new Error('OOPS'); + (checkGenesExist as jest.Mock).mockImplementation(() => { + throw expectedError; + }); + + const token = getToken(); + + await request(app) + .post('/transcriptomics/checkGenesExist') + .set('Content-type', 'application/json') + .set({ Authorization: `Bearer ${token}` }) + .send(requestBody) + .expect(500, { error: 'Internal Server Error' }); + expect((checkGenesExist as jest.Mock).mock.calls.length).toEqual(1); + expect((checkGenesExist as jest.Mock).mock.calls[0][0]).toEqual([ + 'CYB5R1', + 'TBCA', + 'TOMM5', + 'NRXN2', + 'ENSG00000163462.18', + 'ENSG00000211592', + 'ENSG000002137410', + 'FUT7', + 'AL139424', + 'ENSG00000204882.4', + ]); + }); + }); }); diff --git a/src/app.ts b/src/app.ts index ef7e091..33394a2 100644 --- a/src/app.ts +++ b/src/app.ts @@ -19,7 +19,13 @@ import { } from './endpoints/sets/setsFeature'; import { CreateSetBody, Set, SetSqon, UpdateSetContentBody, UpdateSetTagBody } from './endpoints/sets/setsTypes'; import { getStatistics, getStudiesStatistics } from './endpoints/statistics'; -import { checkSampleIdsAndGene, fetchDiffGeneExp, fetchFacets, fetchSampleGeneExp } from './endpoints/transcriptomics'; +import { + checkGenesExist, + checkSampleIdsAndGene, + fetchDiffGeneExp, + fetchFacets, + fetchSampleGeneExp, +} from './endpoints/transcriptomics'; import { cacheTTL, esHost, keycloakURL, userApiURL } from './env'; import { globalErrorHandler, globalErrorLogger } from './errors'; import { @@ -187,6 +193,14 @@ export default (keycloak: Keycloak, getProject: (projectId: string) => ArrangerP res.json(data); }); + app.postAsync('/transcriptomics/checkGenesExist', keycloak.protect(), async (req, res) => { + const genes: string = req.body.genes; + + const data = await checkGenesExist(genes.split(',')); + + res.json(data); + }); + app.postAsync('/authorized-studies', keycloak.protect(), computeAuthorizedStudiesForAllFences); app.use(globalErrorLogger, globalErrorHandler); diff --git a/src/endpoints/transcriptomics/index.test.ts b/src/endpoints/transcriptomics/index.test.ts index 65ba1ea..8200c58 100644 --- a/src/endpoints/transcriptomics/index.test.ts +++ b/src/endpoints/transcriptomics/index.test.ts @@ -1,6 +1,6 @@ import EsInstance from '../../ElasticSearchClientInstance'; -import { checkSampleIdsAndGene, fetchDiffGeneExp, fetchFacets, fetchSampleGeneExp } from '.'; -import { DiffGeneExpVolcano, Facets, SampleGeneExpVolcano } from './types'; +import { checkGenesExist, checkSampleIdsAndGene, fetchDiffGeneExp, fetchFacets, fetchSampleGeneExp } from '.'; +import { DiffGeneExpVolcano, Facets, MatchedGene, SampleGeneExpVolcano } from './types'; jest.mock('../../ElasticSearchClientInstance'); @@ -493,4 +493,78 @@ describe('Transcriptomics', () => { expect(result).toEqual(expectedResponse); }); }); + + describe('checkGenesExist', () => { + beforeEach(() => { + (EsInstance.getInstance as jest.Mock).mockReset(); + }); + + it('should return gene_symbol and ensembl_gene_id for the list of gene_symbol and or ensembl_gene_id received in param', async () => { + const mockEsResponseBody = { + took: 10, + timed_out: false, + _shards: { + total: 5, + successful: 5, + skipped: 0, + failed: 0, + }, + hits: [], + aggregations: { + distinct_genes: { + after_key: { + gene_symbol: 'TRIM46', + ensembl_gene_id: 'ENSG00000163462.18', + }, + buckets: [ + { + key: { + gene_symbol: 'GPR20', + ensembl_gene_id: 'ENSG00000204882.4', + }, + doc_count: 400, + }, + { + key: { + gene_symbol: 'TRIM46', + ensembl_gene_id: 'ENSG00000163462.18', + }, + doc_count: 400, + }, + ], + }, + }, + }; + + const expectedResponse: MatchedGene[] = [ + { + gene_symbol: 'GPR20', + ensembl_gene_id: 'ENSG00000204882.4', + }, + { + gene_symbol: 'TRIM46', + ensembl_gene_id: 'ENSG00000163462.18', + }, + ]; + + (EsInstance.getInstance as jest.Mock).mockImplementation(() => ({ + search: async () => ({ body: mockEsResponseBody }), + })); + + const result = await checkGenesExist([ + 'CYB5R1', + 'TBCA', + 'TOMM5', + 'NRXN2', + 'ENSG00000163462.18', + 'ENSG00000211592', + 'ENSG000002137410', + 'FUT7', + 'AL139424', + 'ENSG00000204882.4', + ]); + + expect(result).toEqual(expectedResponse); + }); + }); }); diff --git a/src/endpoints/transcriptomics/index.ts b/src/endpoints/transcriptomics/index.ts index a70e59c..bb29f38 100644 --- a/src/endpoints/transcriptomics/index.ts +++ b/src/endpoints/transcriptomics/index.ts @@ -1,5 +1,3 @@ -import { max, min } from 'lodash'; - import EsInstance from '../../ElasticSearchClientInstance'; import { ES_CHROMOSOME_AGG_SIZE, @@ -13,7 +11,9 @@ import { DiffGeneExpVolcano, Facets, FetchDiffGeneExpResponse, + FetchDistinctGenesBySymbolOrEnsemblId, FetchSampleGeneExpBySampleIdResponse, + MatchedGene, SampleGeneExpPoint, SampleGeneExpVolcano, } from './types'; @@ -207,3 +207,48 @@ export const checkSampleIdsAndGene = async (sample_ids: string[], ensembl_gene_i return sampleGeneExpBySample.by_sample.buckets.map(b => b.key); }; + +export const checkGenesExist = async (genes: string[]): Promise => { + const client = EsInstance.getInstance(); + + const { body } = await client.search({ + index: esSampleGeneExpIndex, + body: { + query: { + bool: { + should: [ + { + terms: { + symbol: genes, + }, + }, + { + terms: { + ensembl_gene_id: genes, + }, + }, + ], + }, + }, + size: 0, + aggs: { + distinct_genes: { + composite: { + sources: [ + { gene_symbol: { terms: { field: 'gene_symbol' } } }, + { ensembl_gene_id: { terms: { field: 'ensembl_gene_id' } } }, + ], + size: genes.length, + }, + }, + }, + }, + }); + + const distinctGenesBySymbolOrEnsemblId: FetchDistinctGenesBySymbolOrEnsemblId = body?.aggregations; + + return distinctGenesBySymbolOrEnsemblId.distinct_genes.buckets.map(bucket => ({ + gene_symbol: bucket.key.gene_symbol, + ensembl_gene_id: bucket.key.ensembl_gene_id, + })); +}; diff --git a/src/endpoints/transcriptomics/types.ts b/src/endpoints/transcriptomics/types.ts index 6ed6283..d990b8a 100644 --- a/src/endpoints/transcriptomics/types.ts +++ b/src/endpoints/transcriptomics/types.ts @@ -107,3 +107,19 @@ export type FetchSampleGeneExpBySampleIdResponse = { }[]; }; }; + +export type MatchedGene = { + gene_symbol: string; + ensembl_gene_id: string; +}; + +export type FetchDistinctGenesBySymbolOrEnsemblId = { + distinct_genes: { + buckets: { + key: { + gene_symbol: string; + ensembl_gene_id: string; + }; + }[]; + }; +};