Skip to content

Commit

Permalink
Merge pull request #61 from bento-platform/fixes/variants
Browse files Browse the repository at this point in the history
Fixes/variants
  • Loading branch information
gsfk authored Oct 19, 2023
2 parents f8d97c2 + 8ade0a5 commit d14dfe1
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 52 deletions.
77 changes: 28 additions & 49 deletions bento_beacon/endpoints/variants.py
Original file line number Diff line number Diff line change
@@ -1,84 +1,63 @@
from flask import Blueprint
from ..authz.middleware import authz_middleware
from ..utils.beacon_request import query_parameters_from_request
from ..utils.beacon_response import beacon_response, add_info_to_response
from ..utils.beacon_response import beacon_response, add_info_to_response, zero_count_response
from ..utils.gohan_utils import query_gohan, gohan_total_variants_count, gohan_totals_by_sample_id
from ..utils.katsu_utils import katsu_filters_query
from ..utils.exceptions import NotImplemented
from ..utils.search import biosample_id_search

variants = Blueprint("variants", __name__)


# returns counts only
@variants.route("/g_variants", methods=['GET', 'POST'])
@authz_middleware.deco_public_endpoint # TODO: for now. eventually, return more depending on permissions
def get_variants():
variants_query, phenopacket_filters, experiment_filters = query_parameters_from_request()
variants_query, phenopacket_filters, experiment_filters, config_filters = query_parameters_from_request()
has_filters = phenopacket_filters or experiment_filters or config_filters

# if no query, return total count of variants
if not (variants_query or phenopacket_filters or experiment_filters):
if not (variants_query or has_filters):
add_info_to_response("no query found, returning total count")
total_count = gohan_total_variants_count()
return beacon_response({"count": total_count})

if phenopacket_filters:
phenopacket_sample_ids = katsu_filters_query(phenopacket_filters, "phenopacket", get_biosample_ids=True)
if not phenopacket_sample_ids:
return beacon_response({"count": 0, "results": []})

if experiment_filters:
experiment_sample_ids = katsu_filters_query(experiment_filters, "experiment", get_biosample_ids=True)
if not experiment_sample_ids:
return beacon_response({"count": 0, "results": []})

# compute cases for results
# some redundant bools for clarity
# collect biosample ids from all filters
sample_ids = []
if phenopacket_filters and experiment_filters:
sample_ids = list(set(phenopacket_sample_ids) & set(experiment_sample_ids))
# another early return if empty?
if phenopacket_filters and not experiment_filters:
sample_ids = phenopacket_sample_ids
if experiment_filters and not phenopacket_filters:
sample_ids = experiment_sample_ids

# workaround for casing issues in gohan
sample_ids = [id.upper() for id in sample_ids]
if has_filters:
sample_ids = biosample_id_search(phenopacket_filters=phenopacket_filters,
experiment_filters=experiment_filters, config_filters=config_filters)
if not sample_ids:
return zero_count_response()

# finally, find relevant variants, depending on whether a variants query was made
if variants_query:
# gohan search returns uppercase only
sample_ids = [id.upper() for id in sample_ids]

variant_results = query_gohan(variants_query, "record", ids_only=False)
if phenopacket_filters or experiment_filters:
if has_filters:
variant_results = list(filter(lambda v: v.get("sample_id") in sample_ids, variant_results))
gohan_count = len(variant_results)
else:
# gohan overview returns lowercase only
sample_ids = [id.lower() for id in sample_ids]

variant_totals = gohan_totals_by_sample_id()
if phenopacket_filters or experiment_filters:
gohan_count = sum(variant_totals.get(id) for id in sample_ids)
if has_filters:
gohan_count = sum(variant_totals.get(id) for id in sample_ids if id in variant_totals)
else:
gohan_count = sum(variant_totals.values())

return beacon_response({"count": gohan_count})


# -------------------------------------------------------
# "by id" endpoints
# -------------------------------------------------------
# endpoints in beacon model not yet implemented:
#
# These aren't useful for a counts-only beacon (you will never know any ids)

@variants.route("/g_variants/<id>", methods=['GET', 'POST'])
def variant_by_id(id): # TODO: authz
# get one variant by (internal) id
raise NotImplemented()


@variants.route("/g_variants/<id>/biosamples", methods=['GET', 'POST'])
def biosamples_by_variant(id): # TODO: authz
# all biosamples for a particular variant
raise NotImplemented()


@variants.route("/g_variants/<id>/individuals", methods=['GET', 'POST'])
def individuals_by_variant(id): # TODO: authz
# all individuals for a particular variant
raise NotImplemented()
# /g_variants/<id>
# /g_variants/<id>/biosamples
# /g_variants/<id>/individuals
#
# ... "id" here appears to be a unique id for each entry, not a variant identifier like a dbSNP entry
# -------------------------------------------------------
9 changes: 8 additions & 1 deletion bento_beacon/utils/katsu_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def katsu_filters_query(beacon_filters, datatype, get_biosample_ids=False):
if value.get("data_type") == datatype:
match_list = match_list + value.get("matches")

return match_list
return list(set(match_list))


def katsu_filters_and_sample_ids_query(beacon_filters, datatype, sample_ids):
Expand Down Expand Up @@ -274,6 +274,13 @@ def phenopackets_for_ids(ids):
return katsu_network_call(payload, endpoint)


def biosample_ids_for_individuals(individual_ids):
if not individual_ids:
return []
filters = [{"id": "subject.id", "operator": "#in", "value": individual_ids}]
return katsu_filters_query(filters, "phenopacket", get_biosample_ids=True)


def search_summary_statistics(ids):
endpoint = current_app.config["KATSU_SEARCH_OVERVIEW"]
payload = {"id": ids}
Expand Down
12 changes: 10 additions & 2 deletions bento_beacon/utils/search.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from functools import reduce
from .gohan_utils import query_gohan
from .katsu_utils import katsu_filters_query
from .katsu_utils import katsu_filters_query, search_from_config, biosample_ids_for_individuals


# TODO: search by linked field set elements instead of hardcoding
Expand All @@ -22,10 +22,18 @@ def biosample_id_search(variants_query=None, phenopacket_filters=None, experimen
return []
results_biosample_ids["experiment_sample_ids"] = experiment_sample_ids

# next two return *all* biosample ids for matching individuals

if phenopacket_filters:
phenopacket_sample_ids = katsu_filters_query(phenopacket_filters, "phenopacket", get_biosample_ids=True)
if not phenopacket_sample_ids:
return []
results_biosample_ids["phenopacket_sample_ids"] = phenopacket_sample_ids


if config_filters:
config_individuals = search_from_config(config_filters)
if not config_individuals:
return []
results_biosample_ids["config_sample_ids"] = biosample_ids_for_individuals(config_individuals)

return list(reduce(set.intersection, (set(ids) for ids in results_biosample_ids.values())))

0 comments on commit d14dfe1

Please sign in to comment.