diff --git a/flask/app/analyses.py b/flask/app/analyses.py index fc4911e01..cc98ec240 100644 --- a/flask/app/analyses.py +++ b/flask/app/analyses.py @@ -2,8 +2,9 @@ from datetime import datetime from flask_login import login_required -from sqlalchemy import distinct, func, or_ -from sqlalchemy.orm import aliased, selectinload +from sqlalchemy import distinct, func, or_, and_ +from sqlalchemy.orm import aliased, selectinload, joinedload + from flask import Blueprint, Response, abort, current_app as app, jsonify, request from sqlalchemy.sql.expression import cast @@ -25,6 +26,8 @@ paginated_response, transaction_or_abort, validate_json, + create_genotype_obj, + create_variant_obj, ) analyses_blueprint = Blueprint( @@ -694,3 +697,121 @@ def update_analysis(id: int): "updated_by_id": analysis.updated_by_id and analysis.updated_by.username, } ) + + +@analyses_blueprint.route( + "/api/analyses//datasets//variants", + methods=["POST"], +) +@login_required +@validate_json +def insert_participant_report(analysis_id: int, dataset_id: int): + + dat = request.json + vt_dat = dat["variant"] + ptp_codename = dat["participant"].split("_")[1] + + for k in ["analysis", "participant", "family"]: + app.logger.debug("{} : {}".format(k, dat[k])) + + # check analysis and datasets exists at all + valid_analysis = models.Analysis.query.filter( + models.Analysis.analysis_id == analysis_id + ).one_or_none() + + if not valid_analysis: + abort(404, "Analysis ID not found") + + valid_dataset = models.Dataset.query.filter( + models.Dataset.dataset_id == dataset_id + ).one_or_none() + + if not valid_dataset: + abort(404, "Dataset ID not found") + + # check ptp belongs to dataset otherwise one could insert any variant and genotype as long as dataset and analysis are valid + valid_participant_dataset = ( + db.session.query(models.Dataset) + .options( + joinedload(models.Dataset.tissue_sample).joinedload( + models.TissueSample.participant + ) + ) + .filter( + and_( + models.Dataset.dataset_id == dataset_id, + or_( + models.Participant.participant_codename == ptp_codename, + models.Participant.participant_aliases.like( + "%{}%".format(ptp_codename) + ), + ), + ) + ) + ).one_or_none() + + if not valid_participant_dataset: + abort( + 404, + "Dataset ID and/or participant codename does not match up - please check both are properly formatted.", + ) + + # check dataset id and analysis id are associated + valid_dataset_analysis = ( + db.session.query(models.datasets_analyses_table) + .filter( + and_( + models.datasets_analyses_table.c.dataset_id == dataset_id, + models.datasets_analyses_table.c.analysis_id == analysis_id, + ) + ) + .one_or_none() + ) + if not valid_dataset_analysis: + abort( + 404, + "Dataset and analysis IDs are not linked.", + ) + + app.logger.debug(valid_dataset_analysis) + + # how should this be handled? + existing_vt = ( + db.session.query(models.Variant) + .filter(models.Variant.analysis_id == analysis_id) + .all() + ) + + if existing_vt: + app.logger.warning( + "{} Variants already exist for dataset id: {} and analysis id: {}".format( + len(existing_vt), dataset_id, analysis_id + ) + ) + + for vt_row in vt_dat: + + vt_obj = create_variant_obj(vt_row, analysis_id) + + db.session.add(vt_obj) + + transaction_or_abort(db.session.flush) + + variant_id = vt_obj.variant_id + + gt_obj = create_genotype_obj( + vt_row["genotype"], + analysis_id=analysis_id, + dataset_id=dataset_id, + variant_id=variant_id, + ) + db.session.add(gt_obj) + + transaction_or_abort(db.session.flush) + + transaction_or_abort(db.session.commit) + + return ( + jsonify(request.json), + 201, + ) diff --git a/flask/app/utils.py b/flask/app/utils.py index 66bdf7531..6406750b1 100644 --- a/flask/app/utils.py +++ b/flask/app/utils.py @@ -27,7 +27,52 @@ from .extensions import db from .madmin import MinioAdmin -from .models import User, Group, Dataset +from .models import User, Group, Dataset, Variant, Genotype + + +def try_int(value: str): + if value is not None: + try: + int_value = int(value) + except ValueError as e: + int_value = None + return int_value + else: + return value + + +def create_variant_obj(row: Dict[str, any], analysis_id: int) -> Variant: + + cols_list = list(inspect(Variant).mapper.columns) + + potential_int_cols = ["number_of_callers"] + vt = Variant(**{col.name: row.get(col.name) for col in cols_list}) + + for col in potential_int_cols: + setattr(vt, col, try_int(row[col])) + + vt.analysis_id = analysis_id + + return vt + + +def create_genotype_obj( + row: Dict[str, any], analysis_id: int, dataset_id: int, variant_id: int +) -> Genotype: + + cols_list = list(inspect(Genotype).mapper.columns) + potential_int_cols = ["burden", "alt_depths", "coverage"] + + gt = Genotype(**{col.name: row.get(col.name) for col in cols_list}) + + for col in potential_int_cols: + setattr(gt, col, try_int(row[col])) + + gt.analysis_id = analysis_id + gt.dataset_id = dataset_id + gt.variant_id = variant_id + + return gt def str_to_bool(param: str) -> bool: