diff --git a/beacon/__main__.py b/beacon/__main__.py index ce79ac14..5d28cb42 100644 --- a/beacon/__main__.py +++ b/beacon/__main__.py @@ -66,7 +66,7 @@ async def main(path=None): #) beacon = web.Application( - middlewares=[web.normalize_path_middleware(), middlewares.error_middleware, cors_middleware(origins=["https://beacon-network-test.ega-archive.org", "https://beacon-network-test2.ega-archive.org", "https://beacon-network-demo.ega-archive.org","https://beacon-network-demo2.ega-archive.org", "http://localhost:3000", "http://localhost:3010", "https://beacon-network-cineca-demo.ega-archive.org", "https://cancer-beacon-demo.ega-archive.org"])] + middlewares=[web.normalize_path_middleware(), middlewares.error_middleware, cors_middleware(origins=["https://beacon-network-test.ega-archive.org", "https://beacon-network-test2.ega-archive.org", "https://beacon-network-demo.ega-archive.org","https://beacon-network-demo2.ega-archive.org", "http://localhost:3000", "http://localhost:3010", "https://beacon-network-cineca-demo.ega-archive.org", "https://beacon.ega-archive.org", "https://cancer-beacon-demo.ega-archive.org"])] ) @@ -116,6 +116,11 @@ async def main(path=None): expose_headers="*", allow_methods=("POST", "PATCH", "GET", "OPTIONS"), allow_headers=DEFAULT_ALLOW_HEADERS), + "https://beacon.ega-archive.org": + aiohttp_cors.ResourceOptions(allow_credentials=True, + expose_headers="*", + allow_methods=("POST", "PATCH", "GET", "OPTIONS"), + allow_headers=DEFAULT_ALLOW_HEADERS), "http://localhost:3010": aiohttp_cors.ResourceOptions(allow_credentials=True, expose_headers="*", diff --git a/beacon/api_version.yml b/beacon/api_version.yml index 00afe353..74329f85 100644 --- a/beacon/api_version.yml +++ b/beacon/api_version.yml @@ -1 +1 @@ -api_version: v2.0-b8fa53a +api_version: v2.0-b50b1ac diff --git a/beacon/conf.py b/beacon/conf.py index 06132668..07234592 100644 --- a/beacon/conf.py +++ b/beacon/conf.py @@ -126,6 +126,6 @@ # ontologies_folder = "ontologies" -alphanumeric_terms = ['libraryStrategy', 'molecularAttributes.geneIds', 'diseases.ageOfOnset.iso8601duration'] +alphanumeric_terms = ['libraryStrategy', 'molecularAttributes.geneIds', 'diseases.ageOfOnset.iso8601duration', 'molecularAttributes.aminoacidChanges'] ontology_files={"NCIT": "http://purl.obolibrary.org/obo/NCIT.obo"} \ No newline at end of file diff --git a/beacon/db/analyses.py b/beacon/db/analyses.py index d4cb8df0..4e06fd11 100644 --- a/beacon/db/analyses.py +++ b/beacon/db/analyses.py @@ -45,7 +45,7 @@ def get_analyses(entry_id: Optional[str], qparams: RequestParams, dataset: str): limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_analysis_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'analyses' @@ -63,7 +63,7 @@ def get_analysis_with_id(entry_id: Optional[str], qparams: RequestParams, datase if limit > 100 or limit == 0: limit = 100 count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_variants_of_analysis(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'analyses' @@ -86,7 +86,7 @@ def get_variants_of_analysis(entry_id: Optional[str], qparams: RequestParams, da limit = 100 idq="caseLevelData.biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_filtering_terms_of_analyse(entry_id: Optional[str], qparams: RequestParams): query = {'scopes': 'analysis'} diff --git a/beacon/db/biosamples.py b/beacon/db/biosamples.py index 3361b921..574690fe 100644 --- a/beacon/db/biosamples.py +++ b/beacon/db/biosamples.py @@ -45,7 +45,7 @@ def get_biosamples(entry_id: Optional[str], qparams: RequestParams, dataset: str limit = 100 idq="id" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_biosample_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -64,7 +64,7 @@ def get_biosample_with_id(entry_id: Optional[str], qparams: RequestParams, datas limit = 100 idq="id" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_variants_of_biosample(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'g_variants' @@ -82,7 +82,7 @@ def get_variants_of_biosample(entry_id: Optional[str], qparams: RequestParams, d limit = 100 idq="caseLevelData.biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_analyses_of_biosample(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -102,7 +102,7 @@ def get_analyses_of_biosample(entry_id: Optional[str], qparams: RequestParams, d limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_runs_of_biosample(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'biosamples' @@ -120,7 +120,7 @@ def get_runs_of_biosample(entry_id: Optional[str], qparams: RequestParams, datas limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_filtering_terms_of_biosample(entry_id: Optional[str], qparams: RequestParams): query = {'scopes': 'biosample'} diff --git a/beacon/db/g_variants.py b/beacon/db/g_variants.py index 5cfd6380..49236e85 100644 --- a/beacon/db/g_variants.py +++ b/beacon/db/g_variants.py @@ -6,6 +6,7 @@ from beacon.request.model import AlphanumericFilter, Operator, RequestParams from beacon.db import client import yaml +import time from aiohttp import web @@ -238,6 +239,7 @@ def apply_request_parameters(query: Dict[str, List[dict]], qparams: RequestParam def get_variants(entry_id: Optional[str], qparams: RequestParams, dataset: str): + LOG.debug(time.time()) collection = 'g_variants' mongo_collection = client.beacon.genomicVariations parameters_as_filters=False @@ -267,7 +269,8 @@ def get_variants(entry_id: Optional[str], qparams: RequestParams, dataset: str): datasets_dict = yaml.safe_load(datasets_file) #LOG.debug(query) count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + LOG.debug(time.time()) + return schema, count, dataset_count, docs, dataset def get_variant_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -287,7 +290,7 @@ def get_variant_with_id(entry_id: Optional[str], qparams: RequestParams, dataset limit = 100 idq="caseLevelData.biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_biosamples_of_variant(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -323,7 +326,7 @@ def get_biosamples_of_variant(entry_id: Optional[str], qparams: RequestParams, d limit = 100 idq="id" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_runs_of_variant(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'g_variants' @@ -358,7 +361,7 @@ def get_runs_of_variant(entry_id: Optional[str], qparams: RequestParams, dataset limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_analyses_of_variant(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -394,7 +397,7 @@ def get_analyses_of_variant(entry_id: Optional[str], qparams: RequestParams, dat limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_filtering_terms_of_genomicvariation(entry_id: Optional[str], qparams: RequestParams): query = {'scopes': 'genomicVariation'} @@ -447,4 +450,4 @@ def get_individuals_of_variant(entry_id: Optional[str], qparams: RequestParams, limit = 100 idq="id" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs \ No newline at end of file + return schema, count, dataset_count, docs, dataset \ No newline at end of file diff --git a/beacon/db/individuals.py b/beacon/db/individuals.py index c847b347..0178058a 100644 --- a/beacon/db/individuals.py +++ b/beacon/db/individuals.py @@ -8,6 +8,7 @@ from beacon.db.utils import query_id, get_count, get_documents from beacon.request.model import RequestParams from beacon.db.g_variants import apply_request_parameters +import time import yaml from aiohttp import web @@ -18,6 +19,7 @@ def include_resultset_responses(query: Dict[str, List[dict]], qparams: RequestPa return query def get_individuals(entry_id: Optional[str], qparams: RequestParams, dataset: str): + LOG.debug(time.time()) collection = 'individuals' mongo_collection = client.beacon.individuals parameters_as_filters=False @@ -45,7 +47,7 @@ def get_individuals(entry_id: Optional[str], qparams: RequestParams, dataset: st limit = 100 idq="id" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_individual_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -65,7 +67,7 @@ def get_individual_with_id(entry_id: Optional[str], qparams: RequestParams, data if limit > 100 or limit == 0: limit = 100 count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_variants_of_individual(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -89,7 +91,7 @@ def get_variants_of_individual(entry_id: Optional[str], qparams: RequestParams, limit = 100 idq="caseLevelData.biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_biosamples_of_individual(entry_id: Optional[str], qparams: RequestParams, dataset: str): @@ -109,7 +111,7 @@ def get_biosamples_of_individual(entry_id: Optional[str], qparams: RequestParams limit = 100 idq="id" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_filtering_terms_of_individual(entry_id: Optional[str], qparams: RequestParams): @@ -144,7 +146,7 @@ def get_runs_of_individual(entry_id: Optional[str], qparams: RequestParams, data limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_analyses_of_individual(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'individuals' @@ -163,4 +165,4 @@ def get_analyses_of_individual(entry_id: Optional[str], qparams: RequestParams, limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset diff --git a/beacon/db/runs.py b/beacon/db/runs.py index 4ab9743c..5f1ea663 100644 --- a/beacon/db/runs.py +++ b/beacon/db/runs.py @@ -41,7 +41,7 @@ def get_runs(entry_id: Optional[str], qparams: RequestParams, dataset: str): limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_run_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'runs' @@ -59,7 +59,7 @@ def get_run_with_id(entry_id: Optional[str], qparams: RequestParams, dataset: st limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset @@ -83,7 +83,7 @@ def get_variants_of_run(entry_id: Optional[str], qparams: RequestParams, dataset limit = 100 idq="caseLevelData.biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_analyses_of_run(entry_id: Optional[str], qparams: RequestParams, dataset: str): collection = 'runs' @@ -102,7 +102,7 @@ def get_analyses_of_run(entry_id: Optional[str], qparams: RequestParams, dataset limit = 100 idq="biosampleId" count, dataset_count, docs = get_docs_by_response_type(include, query, datasets_dict, dataset, limit, skip, mongo_collection, idq) - return schema, count, dataset_count, docs + return schema, count, dataset_count, docs, dataset def get_filtering_terms_of_run(entry_id: Optional[str], qparams: RequestParams): query = {'scopes': 'run'} diff --git a/beacon/request/handlers.py b/beacon/request/handlers.py index f59356c0..54e4c778 100644 --- a/beacon/request/handlers.py +++ b/beacon/request/handlers.py @@ -8,6 +8,7 @@ import yaml import jwt import requests +import concurrent.futures from concurrent.futures import ThreadPoolExecutor from beacon.request import ontologies from beacon.request.model import Granularity, RequestParams @@ -59,8 +60,6 @@ async def wrapper(request: Request): [r for r in records] if records else [] ) LOG.debug(entity_schema) - LOG.debug(response_converted) - LOG.debug(type(response_converted)) response = build_beacon_collection_response( response_converted, count, qparams, lambda x, y: x, entity_schema ) @@ -221,11 +220,12 @@ async def wrapper(request: Request): #LOG.debug(response_datasets) new_count=0 loop = asyncio.get_running_loop() - for dataset in response_datasets: - with ThreadPoolExecutor() as pool: - entity_schema, count, dataset_count, records = await loop.run_in_executor(pool, db_fn, entry_id, qparams, dataset) - #LOG.debug(dataset) - + with ThreadPoolExecutor() as pool: + done, pending = await asyncio.wait(fs=[loop.run_in_executor(pool, db_fn, entry_id, qparams, dataset) for dataset in response_datasets], + return_when=asyncio.ALL_COMPLETED + ) + for task in done: + entity_schema, count, dataset_count, records, dataset = task.result() if dataset_count != -1: new_count+=dataset_count datasets_docs[dataset]=records diff --git a/deploy/conf.py b/deploy/conf.py index d061e5be..2813bb5c 100644 --- a/deploy/conf.py +++ b/deploy/conf.py @@ -126,6 +126,6 @@ # ontologies_folder = "ontologies" -alphanumeric_terms = ['libraryStrategy', 'molecularAttributes.geneIds', 'diseases.ageOfOnset.iso8601duration'] +alphanumeric_terms = ['libraryStrategy', 'molecularAttributes.geneIds', 'diseases.ageOfOnset.iso8601duration', 'molecularAttributes.aminoacidChanges'] ontology_files={"NCIT": "http://purl.obolibrary.org/obo/NCIT.obo"} \ No newline at end of file diff --git a/permissions/public_datasets.yml b/permissions/public_datasets.yml index d4714d80..a6ca5924 100644 --- a/permissions/public_datasets.yml +++ b/permissions/public_datasets.yml @@ -3,5 +3,4 @@ public_datasets: - CINECA_dataset - AV_Dataset - rd-connect_dataset -- coadread_tcga_pan_can_atlas_2018 -- B1MG-COADREAD \ No newline at end of file +- coadread_tcga_pan_can_atlas_2018 \ No newline at end of file