diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 90f0dc6..be81644 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -5,9 +5,9 @@ name: Python application on: push: - branches: [ main ] + branches: [ main, project_data ] pull_request: - branches: [ main ] + branches: [ main, project_data ] jobs: build: @@ -15,11 +15,11 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.7 - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - name: Set up Python 3.8 + uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.8" - name: Install dependencies run: | python -m pip install --upgrade pip @@ -33,4 +33,4 @@ jobs: flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - pytest + pytest --cov=app --log-level=ERROR tests diff --git a/.gitignore b/.gitignore index 4a07f90..d15afc5 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,11 @@ nginx.conf app/static/bclconvert_report_v0.02.html app/static/MultiQC_lane1.html app/static/MultiQC_lane1.html:Zone.Identifier +app/static/* +.coverage +tests/1__init__.py +aaa +facebook.ico +migration_docker_db.sh +static/* +celery_tmp/* diff --git a/Dockerfile b/Dockerfile index f4aa145..399764c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -FROM python:3.7.12-slim -LABEL version="v0.01" +FROM python:3.8.16-slim +LABEL version="v0.02" LABEL description="Docker image for running IGFPortal server" COPY requirements.txt /tmp/requirements.txt RUN apt-get -y update && \ @@ -13,4 +13,4 @@ RUN apt-get -y update && \ USER nobody WORKDIR /tmp ENTRYPOINT ["bash","-c"] -CMD ["flask", "run"] \ No newline at end of file +CMD ["flask", "run"] diff --git a/app/__init__.py b/app/__init__.py index ecd504c..900f677 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,9 +1,11 @@ +import os import logging - from flask import Flask, request from flask_appbuilder import AppBuilder, SQLA from .index import CustomIndexView from celery import Celery +from flask_caching import Cache +from flask_migrate import Migrate """ Logging configuration @@ -15,6 +17,7 @@ app = Flask(__name__) app.config.from_object("config") db = SQLA(app) +migrate = Migrate(app, db) appbuilder = AppBuilder(app, db.session, indexview=CustomIndexView) @@ -38,6 +41,28 @@ def set_sqlite_pragma(dbapi_connection, connection_record): result_backend=app.config['CELERY_RESULT_BACKEND']) celery.conf.update(app.config) +## CACHING +cache_config = { + "CACHE_TYPE": "RedisCache", + "CACHE_DEFAULT_TIMEOUT": 300, + "CACHE_REDIS_URL": app.config['CACHE_REDIS_URL'] +} + +test_cache_config = { + "CACHE_TYPE": "SimpleCache", + "CACHE_DEFAULT_TIMEOUT": 300, +} + +env_name = os.environ.get('ENV_NAME') +if 'TESTING' in app.config and \ + app.config.get('TESTING') is not None and \ + app.config.get('TESTING'): + app.config.from_mapping(test_cache_config) + cache = Cache(app) +else: + app.config.from_mapping(cache_config) + cache = Cache(app) + ## GDPR @app.context_processor def inject_template_scope(): diff --git a/app/admin_home_api.py b/app/admin_home_api.py index 6feae64..73fc917 100644 --- a/app/admin_home_api.py +++ b/app/admin_home_api.py @@ -9,6 +9,8 @@ from . import app, db, celery from .admin_home.admin_home_utils import parse_and_add_new_admin_view_data +log = logging.getLogger(__name__) + @celery.task(bind=True) def async_parse_and_add_new_admin_view_data( self, json_file: str) -> dict: @@ -16,7 +18,7 @@ def async_parse_and_add_new_admin_view_data( parse_and_add_new_admin_view_data(json_file) return {"message": "success"} except Exception as e: - logging.error( + log.error( "Failed to run celery job, error: {0}".\ format(e)) @@ -43,9 +45,9 @@ def update_admin_view_data(self): prefix='admin_view_',) with open(json_file, 'w') as fp: json.dump(json_data, fp) - _ = \ + msg = \ async_parse_and_add_new_admin_view_data.\ apply_async(args=[json_file]) return self.response(200, message='loaded new data') except Exception as e: - logging.error(e) \ No newline at end of file + log.error(e) \ No newline at end of file diff --git a/app/airflow/airflow_api_utils.py b/app/airflow/airflow_api_utils.py index d88e2fa..9eca832 100644 --- a/app/airflow/airflow_api_utils.py +++ b/app/airflow/airflow_api_utils.py @@ -1,13 +1,26 @@ import json import requests from urllib.parse import urljoin +from typing import Union + +def get_airflow_dag_id(airflow_conf_file: str, dag_tag: str) -> Union[str, None]: + try: + with open(airflow_conf_file, "r") as fp: + airflow_conf = json.load(fp) + dag_id = airflow_conf.get(dag_tag) + return dag_id + except Exception as e: + raise ValueError( + f"Failed to get dag id for tag {dag_tag} in config file {airflow_conf_file}, error: {e}") + def post_to_airflow_api( airflow_conf_file: str, url_suffix: str, data: dict, headers: dict = {"Content-Type": "application/json"}, - verify: bool = False): + verify: bool = False, + dry_run: bool = False): try: with open(airflow_conf_file, "r") as fp: airflow_conf = json.load(fp) @@ -19,13 +32,14 @@ def post_to_airflow_api( url = \ urljoin(airflow_conf['url'], url_suffix) res = \ - requests.post( - url=url, - data=data, - headers=headers, - auth=(airflow_conf["username"], airflow_conf["password"]), - verify=verify) - if res.status_code != 200: + requests.post( + url=url, + data=data, + headers=headers, + auth=(airflow_conf["username"], airflow_conf["password"]), + verify=verify) + if res.status_code != 200 and \ + not dry_run: raise ValueError( f"Failed post request, got status: {res.status_code}") return res @@ -36,16 +50,20 @@ def post_to_airflow_api( def trigger_airflow_pipeline( dag_id: str, conf_data: dict, - airflow_conf_file: str): + airflow_conf_file: str, + verify: bool = False, + dry_run: bool = False): try: url_suffix = \ f'dags/{dag_id}/dagRuns' data = {"conf": conf_data} res = \ - post_to_airflow_api( - airflow_conf_file=airflow_conf_file, - url_suffix=url_suffix, - data=data) + post_to_airflow_api( + airflow_conf_file=airflow_conf_file, + url_suffix=url_suffix, + data=data, + verify=verify, + dry_run=dry_run) return res except Exception as e: raise ValueError( diff --git a/app/analysis_view.py b/app/analysis_view.py new file mode 100644 index 0000000..8b0c8ef --- /dev/null +++ b/app/analysis_view.py @@ -0,0 +1,121 @@ +import logging +import os +from app import db +from .models import Analysis +from .models import Pipeline_seed +from .models import Pipeline +from .airflow.airflow_api_utils import trigger_airflow_pipeline +from flask_appbuilder import ModelView +from flask import redirect, flash, url_for, send_file +from flask_appbuilder.actions import action +from flask_appbuilder.models.sqla.interface import SQLAInterface +from . import celery + +log = logging.getLogger(__name__) + +def get_analysis_pipeline_seed_status(analysis_id: int) -> str: + try: + result = \ + db.session.\ + query( + Analysis.analysis_name, + Pipeline.pipeline_name, + Pipeline_seed.status).\ + join(Pipeline, Pipeline.pipeline_name==Analysis.analysis_type).\ + join(Pipeline_seed, Pipeline_seed.seed_id==Analysis.analysis_id).\ + filter(Pipeline_seed.pipeline_id==Pipeline.pipeline_id).\ + filter(Pipeline_seed.seed_table=='analysis').\ + filter(Pipeline_seed.status=='SEEDED').\ + filter(Pipeline.pipeline_type=='AIRFLOW').\ + filter(Analysis.analysis_id==analysis_id).\ + one_or_none() + if result is None: + return 'INVALID' + else: + return 'VALID' + except Exception as e: + log.error(e) + raise ValueError( + f"Failed to get analysis pipeline seed status, error: {e}") + + +@celery.task(bind=True) +def async_submit_analysis_pipeline(self, id_list): + try: + results = list() + for analysis_id in id_list: + ## get dag id + dag_name = \ + db.session.\ + query(Analysis.analysis_type).\ + filter(Analysis.analysis_id==analysis_id).\ + one_or_none() + if dag_name is not None: + dag_name = dag_name[0] + res = \ + trigger_airflow_pipeline( + dag_id=dag_name, + conf_data={"analysis_id": analysis_id}, + airflow_conf_file=os.environ['AIRFLOW_CONF_FILE']) + results.append(res.status_code) + return dict(zip(id_list, results)) + except Exception as e: + log.error( + f"Failed to run celery job, error: {e}") + + +class AnalysisView(ModelView): + datamodel = \ + SQLAInterface(Analysis) + list_columns = [ + "analysis_name", + "analysis_type", + "project.project_igf_id"] + base_permissions = [ + "can_list", + "can_show"] + base_order = ("analysis_id", "desc") + + @action("trigger_analysis_pipeline", "Trigger analysis pipeline", confirmation="confirm pipeline run?", icon="fa-rocket") + def trigger_analysis_pipeline(self, item): + try: + id_list = list() + analysis_list = list() + if isinstance(item, list): + id_list = [i.analysis_id for i in item] + analysis_list = [i.analysis_name for i in item] + else: + id_list = [item.analysis_id] + analysis_list = [item.analysis_name] + analysis_dict = \ + dict(zip(id_list, analysis_list)) + invalid_id_list = list() + valid_id_list = list() + invalid_name_list = list() + valid_name_list = list() + for analysis_id in id_list: + status = \ + get_analysis_pipeline_seed_status( + analysis_id=analysis_id) + if status == 'VALID': + valid_id_list.\ + append(analysis_id) + valid_name_list.\ + append(analysis_dict.get(analysis_id)) + if status == 'INVALID': + invalid_id_list.\ + append(analysis_id) + invalid_name_list.\ + append(analysis_dict.get(analysis_id)) + if len(valid_name_list) > 0: + _ = \ + async_submit_analysis_pipeline.\ + apply_async(args=[valid_id_list]) + flash(f"Submitted jobs for {', '.join(valid_name_list)}", "info") + if len(invalid_name_list) > 0: + flash(f"Skipped old analysis {', '.join(invalid_name_list)}", "danger") + self.update_redirect() + return redirect(url_for('AnalysisView.list')) + except: + flash('Failed to submit analysis', 'danger') + return redirect(url_for('AnalysisView.list')) \ No newline at end of file diff --git a/app/apis.py b/app/apis.py index 5d48355..35b18e1 100644 --- a/app/apis.py +++ b/app/apis.py @@ -5,6 +5,7 @@ from .raw_metadata_api import RawMetadataDataApi from .admin_home_api import AdminHomeApi from .raw_seqrun_api import RawSeqrunApi +from .raw_analysis_api import RawAnalysisApi """ @@ -15,4 +16,5 @@ appbuilder.add_api(MetadataLoadApi) appbuilder.add_api(RawMetadataDataApi) appbuilder.add_api(AdminHomeApi) -appbuilder.add_api(RawSeqrunApi) \ No newline at end of file +appbuilder.add_api(RawSeqrunApi) +appbuilder.add_api(RawAnalysisApi) \ No newline at end of file diff --git a/app/forms.py b/app/forms.py index 4808109..32ee9d5 100644 --- a/app/forms.py +++ b/app/forms.py @@ -2,7 +2,7 @@ from flask_appbuilder.forms import DynamicForm from wtforms.fields import StringField,SubmitField,IntegerField,RadioField,DecimalField from wtforms.validators import DataRequired,InputRequired,NumberRange -from wtforms.ext.sqlalchemy.fields import QuerySelectField +from wtforms_sqlalchemy.fields import QuerySelectField from . import appbuilder, db from .models import IlluminaInteropData diff --git a/app/home_view.py b/app/home_view.py index a0e8f7f..b3f360b 100644 --- a/app/home_view.py +++ b/app/home_view.py @@ -2,6 +2,7 @@ from flask_appbuilder.baseviews import BaseView, expose from flask_appbuilder.security.decorators import protect, has_access from . import db +from app import cache from .models import AdminHomeData log = logging.getLogger(__name__) @@ -41,11 +42,13 @@ class HomeView(BaseView): @expose('/user_home') @has_access + @cache.cached(timeout=600) def general(self): return self.render_template('user_index.html') @expose('/admin_home') @has_access + @cache.cached(timeout=600) def admin_home(self): try: (finished_seqrun, finished_analysis, diff --git a/app/iframe_view.py b/app/iframe_view.py new file mode 100644 index 0000000..d2f3dce --- /dev/null +++ b/app/iframe_view.py @@ -0,0 +1,154 @@ +import json +import base64 +import logging +from flask import url_for +from app import cache +from flask_appbuilder.baseviews import BaseView, expose +from flask_appbuilder.security.decorators import protect, has_access +from app import db +from .models import ( + Project_analysis_info_file, + Project_seqrun_info_file, + Project_seqrun_info_data, + Project_analysis_info_data, + PreDeMultiplexingData, + IlluminaInteropData) + + +log = logging.getLogger(__name__) + +def get_path_for_project_seqrun_info_file(id): + try: + record = \ + db.session.\ + query( + Project_seqrun_info_file.file_path, + Project_seqrun_info_data.project_id).\ + join(Project_seqrun_info_data, Project_seqrun_info_data.project_seqrun_info_data_id==Project_seqrun_info_file.project_seqrun_info_data_id).\ + filter(Project_seqrun_info_file.project_seqrun_info_file_id==id).\ + one_or_none() + if record is None: + log.warning(f"Missing data for id {id}") + return '', '' + (file_path, project_id) = record + return file_path, project_id + except Exception as e: + log.error(e) + +def get_path_for_project_analysis_info_file(id): + try: + record = \ + db.session.\ + query( + Project_analysis_info_file.file_path, + Project_analysis_info_data.project_id).\ + join(Project_analysis_info_data, Project_analysis_info_data.project_analysis_info_data_id==Project_analysis_info_file.project_analysis_info_data_id).\ + filter(Project_analysis_info_file.project_analysis_info_file_id==id).\ + one_or_none() + if record is None: + log.warning(f"Missing data for id {id}") + return '', '' + (file_path, project_id) = record + return file_path, project_id + except Exception as e: + log.error(e) + + +def get_path_for_predemult_report(id): + try: + record = \ + db.session.\ + query(PreDeMultiplexingData.file_path).\ + filter(PreDeMultiplexingData.demult_id==id).\ + one_or_none() + if record is None: + log.warning( + f"Missing pre-demult data for id {id}") + return '' + (file_path,) = \ + record + return file_path + except Exception as e: + raise ValueError( + f"Failed to get report for predemult entry {id}, error: {e}") + +def get_path_for_interop_report(id): + try: + record = \ + db.session.\ + query(IlluminaInteropData.file_path).\ + filter(IlluminaInteropData.report_id==id).\ + one_or_none() + if record is None: + log.warning( + f"Missing Interop data for id {id}") + return '' + (file_path,) = \ + record + return file_path + except Exception as e: + raise ValueError( + f"Failed to get report for interop report entry {id}, error: {e}") + +class IFrameView(BaseView): + route_base = "/" + + @expose("/static/rawdata/") + @has_access + @cache.cached(timeout=600) + def view_seqrun_report(self, id): + file_path, project_id = \ + get_path_for_project_seqrun_info_file(id=id) + project_url = \ + url_for('ProjectView.get_project_data', id=project_id) + # return self.render_template("iframe.html", url=file_path, project_url=project_url) + with open(file_path, 'r') as fp: + html_data = fp.read() + return self.render_template("iframe.html", html_data=html_data, url_link=project_url) + + @expose("/static/analysis/") + @has_access + @cache.cached(timeout=600) + def view_analysis_report(self, id): + file_path, project_id = \ + get_path_for_project_analysis_info_file(id=id) + project_url = \ + url_for('ProjectView.get_project_data', id=project_id) + # return self.render_template("iframe.html", url=file_path, project_url=project_url) + with open(file_path, 'rb') as fp: + html_data = fp.read() + return self.render_template("iframe.html", html_data=html_data, url_link=project_url) + + @expose("/static/predemult/") + @has_access + @cache.cached(timeout=1200) + def view_predemult_report(self, id): + file_path = \ + get_path_for_predemult_report(id=id) + url_link = \ + url_for('PreDeMultiplexingDataView.list') + if file_path.endswith('.html'): + with open(file_path, 'r') as fp: + html_data = fp.read() + return self.render_template("iframe.html", html_data=html_data, url_link=url_link) + # elif file_path.endswith('.pdf'): + # with open(file_path, 'rb') as fp: + # pdf_data = fp.read() + # return self.render_template("iframe_pdf.html", pdf_data=pdf_data, url_link=url_link) + else: + return self.response(500) + + @expose("/static/interop/") + @has_access + @cache.cached(timeout=1200) + def view_interop_report(self, id): + file_path = \ + get_path_for_interop_report(id=id) + url_link = \ + url_for('IlluminaInteropDataView.list') + if file_path.endswith('.html'): + with open(file_path, 'r') as fp: + html_data = fp.read() + return self.render_template("iframe.html", html_data=html_data, url_link=url_link) + else: + return self.response(500) diff --git a/app/index_table_view.py b/app/index_table_view.py index c4a203b..743eef9 100644 --- a/app/index_table_view.py +++ b/app/index_table_view.py @@ -7,7 +7,7 @@ from datetime import datetime from io import BytesIO, StringIO from flask_appbuilder.security.decorators import protect, has_access -from wtforms.ext.sqlalchemy.fields import QuerySelectField +from wtforms_sqlalchemy.fields import QuerySelectField from wtforms.fields import SelectField from flask_appbuilder.fieldwidgets import Select2Widget from flask_appbuilder import ModelView @@ -72,7 +72,7 @@ def download_sample_index_csv(self, item): df.to_csv(output, index=False) output.seek(0) self.update_redirect() - return send_file(output, attachment_filename=file_name, as_attachment=True) + return send_file(output, download_name=file_name, as_attachment=True) except Exception as e: log.error(e) flash('Failed to create csv', 'danger') diff --git a/app/interop_data_api.py b/app/interop_data_api.py index f2cabef..7110bba 100644 --- a/app/interop_data_api.py +++ b/app/interop_data_api.py @@ -1,45 +1,65 @@ -import json, logging, typing +import os, json, logging, gzip, tempfile, hashlib, shutil from typing import Any +from datetime import datetime from flask_appbuilder import ModelRestApi from flask import request from flask_appbuilder.api import expose, rison from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_appbuilder.security.decorators import protect -from . import db +from . import app, db, celery from .models import IlluminaInteropData """ InterOp data Api """ -def search_interop_for_run(run_name: str) -> Any: - try: - result = \ - db.session.\ - query(IlluminaInteropData).\ - filter(IlluminaInteropData.run_name==run_name).one_or_none() - return result - except Exception as e: - raise ValueError("Failed lookup for interop data, error: {0}".format(e)) +log = logging.getLogger(__name__) -def add_interop_data(run_data: Any) -> None: +def load_interop_report( + run_name: str, + tag: str, + file_path: str, + base_path: str): try: - if isinstance(run_data, str): - run_data = json.loads(run_data) - if isinstance(run_data, bytes): - run_data = json.loads(run_data.decode()) - interop_entry = \ - IlluminaInteropData( - run_name = run_data.get('run_name'), - table_data = run_data.get('table_data'), - flowcell_data = run_data.get('flowcell_data'), - intensity_data = run_data.get('intensity_data'), - cluster_count_data = run_data.get('cluster_count_data'), - density_data = run_data.get('density_data'), - qscore_bins_data = run_data.get('qscore_bins_data'), - qscore_cycles_data = run_data.get('qscore_cycles_data'), - occupied_pass_filter = run_data.get('occupied_pass_filter')) + ## get date stamp + datestamp = datetime.now() + datetime_str = \ + datestamp.strftime("%Y%m%d_%H%M%S") + ## get file name + file_name = \ + os.path.basename(file_path) + ## calculate new disk path + hash_string = \ + f"{run_name}{tag}{file_name}{datetime_str}" + hash_md5 = \ + hashlib.\ + md5(hash_string.encode('utf-8')).\ + hexdigest() + ## create dir and copy report file + target_dir = \ + os.path.join( + base_path, + run_name, + hash_md5) + target_file_path = \ + os.path.join( + target_dir, + file_name) + os.makedirs( + target_dir, + exist_ok=True) + shutil.copyfile( + file_path, + target_file_path) + ## update db record try: + interop_entry = \ + IlluminaInteropData( + run_name=run_name, + tag=tag, + file_path=target_file_path, + date_stamp=datestamp + ) db.session.add(interop_entry) db.session.flush() db.session.commit() @@ -47,109 +67,227 @@ def add_interop_data(run_data: Any) -> None: db.session.rollback() raise except Exception as e: - raise ValueError("Failed adding interop data, error: {0}".format(e)) - -def edit_interop_data(run_data: Any) -> None: - try: - if isinstance(run_data, str): - run_data = json.loads(run_data) - if isinstance(run_data, bytes): - run_data = json.loads(run_data.decode()) - if "run_name" not in run_data: - raise ValueError("Missing run name") - try: - db.session.\ - query(IlluminaInteropData).\ - filter(IlluminaInteropData.run_name==run_data.get("run_name")).\ - update(run_data) - db.session.commit() - except: - db.session.rollback() - raise - except Exception as e: - raise ValueError("Failed to update interop data, error: {0}".format(e)) + raise ValueError( + f"Failed to load interop report to db, error: {e}") -def add_or_edit_interop_data(run_data: Any) -> None: +@celery.task(bind=True) +def async_load_interop_report( + self, + run_name: str, + tag: str, + file_path: str, + base_path: str) -> dict: try: - if isinstance(run_data, str): - run_data = json.loads(run_data) - if isinstance(run_data, bytes): - run_data = json.loads(run_data.decode()) - if "run_name" not in run_data: - raise ValueError("Missing run name") - result = \ - search_interop_for_run( - run_name=run_data.get('run_name')) - if result is None: - add_interop_data(run_data=run_data) - else: - edit_interop_data(run_data=run_data) + load_interop_report( + run_name=run_name, + tag=tag, + file_path=file_path, + base_path=base_path) + return {"message": "success"} except Exception as e: - raise ValueError("Failed to add or edit interop data, error: {0}".format(e)) + log.error( + "Failed to run celery job, error: {0}".\ + format(e)) class SeqrunInteropApi(ModelRestApi): resource_name = "interop_data" datamodel = SQLAInterface(IlluminaInteropData) - @expose('/search_run') - @rison() - def search_run(self, **kwargs): - try: - if "run_name" in kwargs['rison']: - message = 'EXIST' - result = \ - search_interop_for_run( - run_name=kwargs['rison']['run_name']) - if result is None: - message = 'NOT EXIST' - return self.response(200, message=message) - return self.response_400(message="Please send run_name") - except Exception as e: - logging.error(e) - - @expose('/add_run', methods=['POST']) + @expose('/add_report', methods=['POST']) @protect() - def add_run(self): + def add_report(self): try: + log.warn('received_res') + log.warn(f"Files: {request.files}") + log.warn(f"Data: {request.data}") + log.warn(f"Form: {request.form}") if not request.files: return self.response_400('No files') + json_data = request.form + run_name = json_data.get('run_name') + tag = json_data.get('tag') + if run_name is None or \ + tag is None: + return self.response_400('Missing run_name or tag') + ## get report file from request file_objs = request.files.getlist('file') file_obj = file_objs[0] + file_name = file_obj.filename file_obj.seek(0) - run_data = file_obj.read() - add_interop_data(run_data=run_data) - return self.response(200, message='added run data') + file_data = file_obj.read() + ## report file can be gzipped + if file_name.endswith('.gz'): + file_data = gzip.decompress(file_data).decode('utf-8') + ## get report file and dump it to tmp dir + report_dir = \ + tempfile.mkdtemp( + dir=app.config['CELERY_WORK_DIR'], + prefix='report_',) + report_file = \ + os.path.join(report_dir, file_name) + with open(report_file, 'wb') as fp: + fp.write(file_data) + ## send job to celery worker + base_dir = \ + os.path.join( + app.config['REPORT_UPLOAD_PATH'], + 'interop_reports') + _ = \ + async_load_interop_report.\ + apply_async(args=[ + run_name, + tag, + report_file, + base_dir]) + return self.response( + 200, + message=\ + f'successfully submitted interop report loading job for {os.path.basename(report_file)}') except Exception as e: - logging.error(e) + log.error(e) + return self.response_500('failed to load file') - @expose('/edit_run', methods=['POST']) - @protect() - def edit_run(self): - try: - if not request.files: - return self.response_400('No files') - file_objs = request.files.getlist('file') - file_obj = file_objs[0] - file_obj.seek(0) - run_data = file_obj.read() - edit_interop_data(run_data=run_data) - return self.response(200, message='updated run data') - except Exception as e: - logging.error(e) +# def search_interop_for_run(run_name: str) -> Any: +# try: +# result = \ +# db.session.\ +# query(IlluminaInteropData).\ +# filter(IlluminaInteropData.run_name==run_name).one_or_none() +# return result +# except Exception as e: +# raise ValueError("Failed lookup for interop data, error: {0}".format(e)) - @expose('/add_or_edit_run', methods=['POST']) - @protect() - def add_or_edit_run(self): - try: - if not request.files: - return self.response_400('No files') - file_objs = request.files.getlist('file') - file_obj = file_objs[0] - file_obj.seek(0) - run_data = file_obj.read() - add_or_edit_interop_data(run_data) - return self.response(200, message='successfully added or updated run data') - except Exception as e: - logging.error(e) \ No newline at end of file + +# def add_interop_data(run_data: Any) -> None: +# try: +# if isinstance(run_data, str): +# run_data = json.loads(run_data) +# if isinstance(run_data, bytes): +# run_data = json.loads(run_data.decode()) +# interop_entry = \ +# IlluminaInteropData( +# run_name = run_data.get('run_name'), +# table_data = run_data.get('table_data'), +# flowcell_data = run_data.get('flowcell_data'), +# intensity_data = run_data.get('intensity_data'), +# cluster_count_data = run_data.get('cluster_count_data'), +# density_data = run_data.get('density_data'), +# qscore_bins_data = run_data.get('qscore_bins_data'), +# qscore_cycles_data = run_data.get('qscore_cycles_data'), +# occupied_pass_filter = run_data.get('occupied_pass_filter')) +# try: +# db.session.add(interop_entry) +# db.session.flush() +# db.session.commit() +# except: +# db.session.rollback() +# raise +# except Exception as e: +# raise ValueError("Failed adding interop data, error: {0}".format(e)) + +# def edit_interop_data(run_data: Any) -> None: +# try: +# if isinstance(run_data, str): +# run_data = json.loads(run_data) +# if isinstance(run_data, bytes): +# run_data = json.loads(run_data.decode()) +# if "run_name" not in run_data: +# raise ValueError("Missing run name") +# try: +# db.session.\ +# query(IlluminaInteropData).\ +# filter(IlluminaInteropData.run_name==run_data.get("run_name")).\ +# update(run_data) +# db.session.commit() +# except: +# db.session.rollback() +# raise +# except Exception as e: +# raise ValueError("Failed to update interop data, error: {0}".format(e)) + + +# def add_or_edit_interop_data(run_data: Any) -> None: +# try: +# if isinstance(run_data, str): +# run_data = json.loads(run_data) +# if isinstance(run_data, bytes): +# run_data = json.loads(run_data.decode()) +# if "run_name" not in run_data: +# raise ValueError("Missing run name") +# result = \ +# search_interop_for_run( +# run_name=run_data.get('run_name')) +# if result is None: +# add_interop_data(run_data=run_data) +# else: +# edit_interop_data(run_data=run_data) +# except Exception as e: +# raise ValueError("Failed to add or edit interop data, error: {0}".format(e)) + + +# class SeqrunInteropApi(ModelRestApi): +# resource_name = "interop_data" +# datamodel = SQLAInterface(IlluminaInteropData) + +# @expose('/search_run') +# @rison() +# def search_run(self, **kwargs): +# try: +# if "run_name" in kwargs['rison']: +# message = 'EXIST' +# result = \ +# search_interop_for_run( +# run_name=kwargs['rison']['run_name']) +# if result is None: +# message = 'NOT EXIST' +# return self.response(200, message=message) +# return self.response_400(message="Please send run_name") +# except Exception as e: +# logging.error(e) + +# @expose('/add_run', methods=['POST']) +# @protect() +# def add_run(self): +# try: +# if not request.files: +# return self.response_400('No files') +# file_objs = request.files.getlist('file') +# file_obj = file_objs[0] +# file_obj.seek(0) +# run_data = file_obj.read() +# add_interop_data(run_data=run_data) +# return self.response(200, message='added run data') +# except Exception as e: +# logging.error(e) + +# @expose('/edit_run', methods=['POST']) +# @protect() +# def edit_run(self): +# try: +# if not request.files: +# return self.response_400('No files') +# file_objs = request.files.getlist('file') +# file_obj = file_objs[0] +# file_obj.seek(0) +# run_data = file_obj.read() +# edit_interop_data(run_data=run_data) +# return self.response(200, message='updated run data') +# except Exception as e: +# logging.error(e) + +# @expose('/add_or_edit_run', methods=['POST']) +# @protect() +# def add_or_edit_run(self): +# try: +# if not request.files: +# return self.response_400('No files') +# file_objs = request.files.getlist('file') +# file_obj = file_objs[0] +# file_obj.seek(0) +# run_data = file_obj.read() +# add_or_edit_interop_data(run_data) +# return self.response(200, message='successfully added or updated run data') +# except Exception as e: +# logging.error(e) \ No newline at end of file diff --git a/app/interop_view.py b/app/interop_view.py index 92266c0..9296d71 100644 --- a/app/interop_view.py +++ b/app/interop_view.py @@ -1,5 +1,6 @@ import json import logging +from app import cache from flask import abort from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_appbuilder import ModelView, SimpleFormView @@ -9,136 +10,153 @@ from .models import IlluminaInteropData from .forms import SeqrunInteropForm +log = logging.getLogger(__name__) + """ InterOp data view """ -def fetch_interop_data_by_id(run_id): - try: - results = \ - db.session.query(IlluminaInteropData).\ - filter(IlluminaInteropData.run_id==run_id).one_or_none() - if results is None: - abort(404) - run_name = results.run_name - intensity_data = results.intensity_data - table_data = results.table_data - flowcell_data = results.flowcell_data - cluster_count_data = results.cluster_count_data - density_data = results.density_data - qscore_bins_data = results.qscore_bins_data - qscore_cycles_data = results.qscore_cycles_data - occupied_pass_filter = results.occupied_pass_filter - date_stamp = results.date_stamp - if intensity_data is not None or \ - intensity_data != "": - intensity_data = json.loads(intensity_data) - return run_name, intensity_data, table_data, flowcell_data, \ - cluster_count_data, density_data, qscore_bins_data, \ - qscore_cycles_data, occupied_pass_filter, date_stamp - except Exception as e: - logging.error(e) - - class IlluminaInteropDataView(ModelView): datamodel = SQLAInterface(IlluminaInteropData) - label_columns = {'seqrun':'Sequencing run', 'date_stamp': 'Updated on'} - list_columns = ['seqrun', 'date_stamp'] - base_permissions = ['can_list', 'can_get_seqrun'] - base_order = ("date_stamp" , "desc") + label_columns = { + 'run_name':'Sequencing run', + 'tag':'Tag', + 'date_stamp': 'Updated on', + 'report': 'Report'} + list_columns = [ + 'run_name', + 'tag', + 'date_stamp', + 'report'] + base_permissions = ['can_list'] + base_order = ("date_stamp", "desc") - @expose('/interop/') - @has_access - def get_seqrun(self, id): - (run_name, intensity_data, table_data, flowcell_data, - cluster_count_data, density_data, qscore_bins_data, - qscore_cycles_data, occupied_pass_filter, date_stamp) = \ - fetch_interop_data_by_id(run_id=id) - chart_data = intensity_data.get("chart_data") - labels = intensity_data.get("labels") - flowcell_data = json.loads(flowcell_data) - surface1_data = flowcell_data.get("surface1") - surface2_data = flowcell_data.get("surface2") - cluster_count_data = json.loads(cluster_count_data) - density_data = json.loads(density_data) - qscore_bins_data = json.loads(qscore_bins_data) - qscore_cycles_data = json.loads(qscore_cycles_data) - if occupied_pass_filter is None: - occupied_pass_filter='' - if occupied_pass_filter != '': - occupied_pass_filter = json.loads(occupied_pass_filter) - return \ - self.render_template( - 'interop.html', - run_name=run_name, - date_stamp=date_stamp, - labels=labels, - surface1=surface1_data, - surface2=surface2_data, - table_data=table_data, - cluster_count_data=cluster_count_data, - density_data=density_data, - qscore_bins_data = qscore_bins_data, - qscore_cycles_data=qscore_cycles_data, - occupied_pass_filter=occupied_pass_filter, - chart_data=chart_data) +# def fetch_interop_data_by_id(run_id): +# try: +# results = \ +# db.session.query(IlluminaInteropData).\ +# filter(IlluminaInteropData.run_id==run_id).one_or_none() +# if results is None: +# abort(404) +# run_name = results.run_name +# intensity_data = results.intensity_data +# table_data = results.table_data +# flowcell_data = results.flowcell_data +# cluster_count_data = results.cluster_count_data +# density_data = results.density_data +# qscore_bins_data = results.qscore_bins_data +# qscore_cycles_data = results.qscore_cycles_data +# occupied_pass_filter = results.occupied_pass_filter +# date_stamp = results.date_stamp +# if intensity_data is not None or \ +# intensity_data != "": +# intensity_data = json.loads(intensity_data) +# return run_name, intensity_data, table_data, flowcell_data, \ +# cluster_count_data, density_data, qscore_bins_data, \ +# qscore_cycles_data, occupied_pass_filter, date_stamp +# except: +# raise -""" -def fetch_interop_data(run_name): - results = \ - db.session.query(IlluminaInteropData).\ - filter(IlluminaInteropData.run_name==run_name).one_or_none() - if results is None: - abort(404) - intensity_data = results.intensity_data - table_data = results.table_data - flowcell_data = results.flowcell_data - cluster_count_data = results.cluster_count_data - density_data = results.density_data - qscore_bins_data = results.qscore_bins_data - qscore_cycles_data = results.qscore_cycles_data - occupied_pass_filter = results.occupied_pass_filter - date_stamp = results.date_stamp - if intensity_data is not None or \ - intensity_data != "": - intensity_data = json.loads(intensity_data) - return intensity_data, table_data, flowcell_data, \ - cluster_count_data, density_data, qscore_bins_data, \ - qscore_cycles_data, occupied_pass_filter, date_stamp -class SeqrunInteropFormView(SimpleFormView): - form = SeqrunInteropForm - form_title = "Get Interop data" - def form_post(self, form): - (intensity_data, table_data, flowcell_data, - cluster_count_data, density_data, qscore_bins_data, - qscore_cycles_data, occupied_pass_filter, date_stamp) = \ - fetch_interop_data( - run_name=form.run_name.data.run_name) - chart_data = intensity_data.get("chart_data") - labels = intensity_data.get("labels") - flowcell_data = json.loads(flowcell_data) - surface1_data = flowcell_data.get("surface1") - surface2_data = flowcell_data.get("surface2") - cluster_count_data = json.loads(cluster_count_data) - density_data = json.loads(density_data) - qscore_bins_data = json.loads(qscore_bins_data) - qscore_cycles_data = json.loads(qscore_cycles_data) - if occupied_pass_filter != '': - occupied_pass_filter = json.loads(occupied_pass_filter) - return \ - self.render_template( - 'interop.html', - run_name=form.run_name.data.run_name, - date_stamp=date_stamp, - labels=labels, - surface1=surface1_data, - surface2=surface2_data, - table_data=table_data, - cluster_count_data=cluster_count_data, - density_data=density_data, - qscore_bins_data = qscore_bins_data, - qscore_cycles_data=qscore_cycles_data, - occupied_pass_filter=occupied_pass_filter, - chart_data=chart_data) -""" \ No newline at end of file +# class IlluminaInteropDataView(ModelView): +# datamodel = SQLAInterface(IlluminaInteropData) +# label_columns = {'seqrun':'Sequencing run', 'date_stamp': 'Updated on'} +# list_columns = ['seqrun', 'date_stamp'] +# base_permissions = ['can_list', 'can_get_seqrun'] +# base_order = ("date_stamp" , "desc") + +# @expose('/interop/') +# @has_access +# @cache.cached(timeout=600) +# def get_seqrun(self, id): +# (run_name, intensity_data, table_data, flowcell_data, +# cluster_count_data, density_data, qscore_bins_data, +# qscore_cycles_data, occupied_pass_filter, date_stamp) = \ +# fetch_interop_data_by_id(run_id=id) +# chart_data = intensity_data.get("chart_data") +# labels = intensity_data.get("labels") +# flowcell_data = json.loads(flowcell_data) +# surface1_data = flowcell_data.get("surface1") +# surface2_data = flowcell_data.get("surface2") +# cluster_count_data = json.loads(cluster_count_data) +# density_data = json.loads(density_data) +# qscore_bins_data = json.loads(qscore_bins_data) +# qscore_cycles_data = json.loads(qscore_cycles_data) +# if occupied_pass_filter is None: +# occupied_pass_filter='' +# if occupied_pass_filter != '': +# occupied_pass_filter = json.loads(occupied_pass_filter) +# return \ +# self.render_template( +# 'interop.html', +# run_name=run_name, +# date_stamp=date_stamp, +# labels=labels, +# surface1=surface1_data, +# surface2=surface2_data, +# table_data=table_data, +# cluster_count_data=cluster_count_data, +# density_data=density_data, +# qscore_bins_data = qscore_bins_data, +# qscore_cycles_data=qscore_cycles_data, +# occupied_pass_filter=occupied_pass_filter, +# chart_data=chart_data) + + +# def fetch_interop_data(run_name): +# results = \ +# db.session.query(IlluminaInteropData).\ +# filter(IlluminaInteropData.run_name==run_name).one_or_none() +# if results is None: +# abort(404) +# intensity_data = results.intensity_data +# table_data = results.table_data +# flowcell_data = results.flowcell_data +# cluster_count_data = results.cluster_count_data +# density_data = results.density_data +# qscore_bins_data = results.qscore_bins_data +# qscore_cycles_data = results.qscore_cycles_data +# occupied_pass_filter = results.occupied_pass_filter +# date_stamp = results.date_stamp +# if intensity_data is not None or \ +# intensity_data != "": +# intensity_data = json.loads(intensity_data) +# return intensity_data, table_data, flowcell_data, \ +# cluster_count_data, density_data, qscore_bins_data, \ +# qscore_cycles_data, occupied_pass_filter, date_stamp + +# class SeqrunInteropFormView(SimpleFormView): +# form = SeqrunInteropForm +# form_title = "Get Interop data" +# def form_post(self, form): +# (intensity_data, table_data, flowcell_data, +# cluster_count_data, density_data, qscore_bins_data, +# qscore_cycles_data, occupied_pass_filter, date_stamp) = \ +# fetch_interop_data( +# run_name=form.run_name.data.run_name) +# chart_data = intensity_data.get("chart_data") +# labels = intensity_data.get("labels") +# flowcell_data = json.loads(flowcell_data) +# surface1_data = flowcell_data.get("surface1") +# surface2_data = flowcell_data.get("surface2") +# cluster_count_data = json.loads(cluster_count_data) +# density_data = json.loads(density_data) +# qscore_bins_data = json.loads(qscore_bins_data) +# qscore_cycles_data = json.loads(qscore_cycles_data) +# if occupied_pass_filter != '': +# occupied_pass_filter = json.loads(occupied_pass_filter) +# return \ +# self.render_template( +# 'interop.html', +# run_name=form.run_name.data.run_name, +# date_stamp=date_stamp, +# labels=labels, +# surface1=surface1_data, +# surface2=surface2_data, +# table_data=table_data, +# cluster_count_data=cluster_count_data, +# density_data=density_data, +# qscore_bins_data = qscore_bins_data, +# qscore_cycles_data=qscore_cycles_data, +# occupied_pass_filter=occupied_pass_filter, +# chart_data=chart_data) \ No newline at end of file diff --git a/app/metadata/metadata_util.py b/app/metadata/metadata_util.py index 6302c0d..f5a9367 100644 --- a/app/metadata/metadata_util.py +++ b/app/metadata/metadata_util.py @@ -1,31 +1,125 @@ -import os, json, typing +import os +import json +import typing +import tempfile from typing import Tuple import pandas as pd -from ..models import Project -from ..models import IgfUser -from ..models import ProjectUser -from ..models import Project_attribute -from ..models import Sample -from ..models import Sample_attribute -from ..models import Experiment -from ..models import Experiment_attribute -from ..models import Run -from ..models import Run_attribute -from ..models import Platform -from ..models import Flowcell_barcode_rule -from ..models import Seqrun -from ..models import Seqrun_attribute -from ..models import Seqrun_stats -from ..models import Collection -from ..models import Collection_attribute -from ..models import Collection_group -from ..models import File -from ..models import File_attribute -from ..models import Pipeline -from ..models import Pipeline_seed -from ..models import Analysis +from dateutil.parser import parse +from ..models import ( + Project, + IgfUser, + ProjectUser, + Project_attribute, + Sample, + Sample_attribute, + Experiment, + Experiment_attribute, + Run, + Run_attribute, + Platform, + Flowcell_barcode_rule, + Seqrun, + Seqrun_attribute, + Seqrun_stats, + Collection, + Collection_attribute, + Collection_group, + File, + File_attribute, + Pipeline, + Pipeline_seed, + Analysis, + RawAnalysis, + RawAnalysisValidationSchema, + RawAnalysisTemplate, + Project_info_data, + Project_seqrun_info_data, + Project_seqrun_info_file, + Project_analysis_info_data, + Project_analysis_info_file, + RDSProject_backup) from .. import db +def backup_specific_portal_tables(json_file: str) -> str: + try: + backup_order = [ + RawAnalysis, + RawAnalysisValidationSchema, + RawAnalysisTemplate, + Project_info_data, + Project_seqrun_info_data, + Project_seqrun_info_file, + Project_analysis_info_data, + Project_analysis_info_file, + RDSProject_backup + ] + db_data = dict() + for table_name in backup_order: + data = \ + pd.read_sql( + table_name.__tablename__, + db.session.bind) + if table_name.__tablename__=='raw_analysis': + data['date_stamp'] = \ + data['date_stamp'].astype(str) + if table_name.__tablename__=='raw_analysis_validation_schema': + data['date_stamp'] = \ + data['date_stamp'].astype(str) + if table_name.__tablename__=='project_seqrun_info_file': + data['date_created'] = \ + data['date_created'].astype(str) + if table_name.__tablename__=='project_seqrun_info_file': + data['date_updated'] = \ + data['date_updated'].astype(str) + if table_name.__tablename__=='project_analysis_info_file': + data['date_created'] = \ + data['date_created'].astype(str) + if table_name.__tablename__=='project_analysis_info_file': + data['date_updated'] = \ + data['date_updated'].astype(str) + if table_name.__tablename__=='rds_project_backup': + data['date_stamp'] = \ + data['date_stamp'].astype(str) + # if table_name.__tablename__=='raw_analysis': + # data = \ + # pd.read_sql( + # table_name.__tablename__, + # db.session.bind, + # parse_dates=["date_stamp"]) + # if table_name.__tablename__=='raw_analysis_validation_schema': + # data = \ + # pd.read_sql( + # table_name.__tablename__, + # db.session.bind, + # parse_dates=["date_stamp"]) + # if table_name.__tablename__=='project_seqrun_info_file': + # data = \ + # pd.read_sql( + # table_name.__tablename__, + # db.session.bind, + # parse_dates=["date_created", "date_updated"]) + # if table_name.__tablename__=='project_analysis_info_file': + # data = \ + # pd.read_sql( + # table_name.__tablename__, + # db.session.bind, + # parse_dates=["date_created", "date_updated"]) + # if table_name.__tablename__=='rds_project_backup': + # data = \ + # pd.read_sql( + # table_name.__tablename__, + # db.session.bind, + # parse_dates=["date_stamp"]) + db_data.update({ + table_name.__tablename__: data.to_dict(orient="records")}) + with open(json_file, 'w') as fp: + json.dump(db_data, fp) + return json_file + except Exception as e: + raise ValueError( + f"Failed to backup portal tables, error: {e}") + + def cleanup_and_load_new_data_to_metadata_tables( input_json: str, cleanup: bool=True) -> None: @@ -36,27 +130,29 @@ def cleanup_and_load_new_data_to_metadata_tables( json_data = json.load(fp) if not isinstance(json_data, dict): raise TypeError('No dictionary found for metadata update') + ## get a tmp json file + (_, json_file) = \ + tempfile.mkstemp( + suffix='.json', + prefix='portal_metadata_',) + ## backup portal data + json_file = \ + backup_specific_portal_tables(json_file) + # with open(json_file, 'r') as fp: + # t_data = fp.read() + # print(t_data) + ## backup main db delete_order_tables = [ - File_attribute, - File, - Collection_attribute, - Collection, - Collection_group, Pipeline_seed, Pipeline, Analysis, Platform, Flowcell_barcode_rule, - Seqrun_attribute, - Seqrun_stats, Seqrun, Run_attribute, Run, - Experiment_attribute, Experiment, - Sample_attribute, Sample, - Project_attribute, Project, IgfUser, ProjectUser] @@ -66,46 +162,221 @@ def cleanup_and_load_new_data_to_metadata_tables( IgfUser, ProjectUser, Sample, - Sample_attribute, Experiment, - Experiment_attribute, Platform, Flowcell_barcode_rule, Seqrun, - Seqrun_stats, - Seqrun_attribute, Run, Run_attribute, Pipeline, - Pipeline_seed, - Collection, - Collection_attribute, - File, - Collection_group, - File_attribute] + Pipeline_seed] + portal_backup_order = [ + RawAnalysis, + RawAnalysisValidationSchema, + RawAnalysisTemplate, + Project_info_data, + Project_seqrun_info_data, + Project_seqrun_info_file, + Project_analysis_info_data, + Project_analysis_info_file, + RDSProject_backup + ] + portal_delete_order = [ + RawAnalysisValidationSchema, + RawAnalysisTemplate, + RawAnalysis, + Project_seqrun_info_data, + Project_seqrun_info_file, + Project_analysis_info_data, + Project_analysis_info_file, + Project_info_data, + RDSProject_backup + ] try: + ## delete main tables for table in delete_order_tables: if table.__tablename__ in json_data.keys(): db.session.query(table).delete() + ## delete portal tables + with open(json_file, 'r') as fp: + # t_data = fp.read() + portal_json_data = json.load(fp) + #print(portal_json_data) + for table in portal_delete_order: + if table.__tablename__ in portal_json_data.keys(): + db.session.query(table).delete() + ## load main data for table in create_order_tables: if table.__tablename__ in json_data.keys(): table_data = json_data.get(table.__tablename__) df = pd.DataFrame(table_data) + ## project + if table.__tablename__=='project' and \ + 'start_timestamp' in df.columns: + df['start_timestamp'] = \ + pd.to_datetime(df.start_timestamp) + ## user + if table.__tablename__=='user' and \ + 'start_timestamp' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + ## sample + if table.__tablename__=='sample': + if 'date_created' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + if 'taxon_id' in df.columns: + df['taxon_id'] = \ + df['taxon_id'].fillna(0) + df.fillna('', inplace=True) + ## platform + if table.__tablename__=='platform' and \ + 'date_created' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + ## seqrun + if table.__tablename__=='seqrun' and \ + 'date_created' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + ## experiment + if table.__tablename__=='experiment' and \ + 'date_created' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + ## run + if table.__tablename__=='run' and \ + 'date_created' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + # ## collection + # if table.__tablename__=='collection' and \ + # 'date_stamp' in df.columns: + # df['date_stamp'] = \ + # pd.to_datetime(df.date_stamp) + # ## file + # if table.__tablename__=='file' and \ + # 'date_created' in df.columns and \ + # 'date_updated' in df.columns: + # df['date_created'] = \ + # pd.to_datetime(df.date_created) + # df['date_updated'] = \ + # pd.to_datetime(df.date_updated) + ## pipeline + if table.__tablename__=='pipeline' and \ + 'date_stamp' in df.columns: + df['date_stamp'] = \ + pd.to_datetime(df.date_stamp) + ## pipeline_seed + if table.__tablename__=='pipeline_seed' and \ + 'date_stamp' in df.columns: + df['date_stamp'] = \ + pd.to_datetime(df.date_stamp) + ## fill NA if table.__tablename__=='project_user': pass - elif table.__tablename__=='sample': - df['taxon_id'] = df['taxon_id'].fillna(0) - df.fillna('', inplace=True) else: df.fillna('', inplace=True) db.session.\ bulk_insert_mappings( table, df.to_dict(orient="records")) + ## load portal data + for table in portal_backup_order: + if table.__tablename__ in portal_json_data.keys(): + table_data = portal_json_data.get(table.__tablename__) + df = pd.DataFrame(table_data) + ## raw_analysis + if table.__tablename__ == 'raw_analysis' and \ + 'date_stamp' in df.columns: + df['date_stamp'] = \ + pd.to_datetime(df.date_stamp) + ## raw_analysis_validation_schema + if table.__tablename__ == 'raw_analysis_validation_schema' and \ + 'date_stamp' in df.columns: + df['date_stamp'] = \ + pd.to_datetime(df.date_stamp) + ## project_seqrun_info_file + if table.__tablename__ == 'project_seqrun_info_file' and \ + 'date_created' in df.columns and \ + 'date_updated' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + df['date_updated'] = \ + pd.to_datetime(df.date_updated) + ## project_analysis_info_file + if table.__tablename__ == 'project_analysis_info_file' and \ + 'date_created' in df.columns and \ + 'date_updated' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + df['date_updated'] = \ + pd.to_datetime(df.date_updated) + ## rds_project_backup + if table.__tablename__ == 'rds_project_backup' and \ + 'date_stamp' in df.columns: + df['date_stamp'] = \ + pd.to_datetime(df.date_stamp) + ## project_index + if table.__tablename__ == 'project_index' and \ + 'update_time' in df.columns: + df['update_time'] = \ + pd.to_datetime(df.update_time) + ## load data + db.session.\ + bulk_insert_mappings( + table, + df.to_dict(orient="records")) + ## save all changes + db.session.commit() + except Exception as e: + db.session.rollback() + raise ValueError( + f"Failed to load data db, error: {e}") + ## load collection tables + try: + delete_order_tables = [ + Collection_group, + File, + Collection + ] + create_order_tables = [ + Collection, + File, + Collection_group + ] + for table in delete_order_tables: + if table.__tablename__ in json_data.keys(): + db.session.query(table).delete() + ## load main data + for table in create_order_tables: + if table.__tablename__ in json_data.keys(): + table_data = json_data.get(table.__tablename__) + df = pd.DataFrame(table_data) + ## collection + if table.__tablename__=='collection' and \ + 'date_stamp' in df.columns: + df['date_stamp'] = \ + pd.to_datetime(df.date_stamp) + ## file + if table.__tablename__=='file' and \ + 'date_created' in df.columns and \ + 'date_updated' in df.columns: + df['date_created'] = \ + pd.to_datetime(df.date_created) + df['date_updated'] = \ + pd.to_datetime(df.date_updated) + df.fillna('', inplace=True) + db.session.\ + bulk_insert_mappings( + table, + df.to_dict(orient="records")) + ## save all changes db.session.commit() except Exception as e: db.session.rollback() - raise ValueError("Failed to load data db, error: {0}".format(e)) + raise ValueError( + f"Failed to load collection data to db, error: {e}") finally: if cleanup: os.remove(input_json) @@ -123,9 +394,7 @@ def check_for_projects_in_metadata_db( query(Project.project_igf_id).\ filter(Project.project_igf_id.in_(project_list)).\ all() - results = [ - i[0] if isinstance(i, tuple) else i - for i in results] + results = [i[0] for i in results] output = dict() for i in project_list: if i in results: diff --git a/app/metadata_api.py b/app/metadata_api.py index 8b0726b..6a82a6c 100644 --- a/app/metadata_api.py +++ b/app/metadata_api.py @@ -8,6 +8,8 @@ from . import app, db, celery from .metadata.metadata_util import cleanup_and_load_new_data_to_metadata_tables +log = logging.getLogger(__name__) + @celery.task(bind=True) def async_cleanup_and_load_new_data_to_metadata_tables( self, json_file: str) -> dict: @@ -15,7 +17,7 @@ def async_cleanup_and_load_new_data_to_metadata_tables( cleanup_and_load_new_data_to_metadata_tables(json_file) return {"message": "success"} except Exception as e: - logging.error( + log.error( "Failed to run celery job, error: {0}".\ format(e)) @@ -51,6 +53,7 @@ def submit_cleanup_job(self): apply_async(args=[json_file]) return self.response(200, message='successfully submitted metadata update job') except Exception as e: - logging.error(e) + log.error(e) + return self.response_500('failed to submit metadata update job') diff --git a/app/metadata_view.py b/app/metadata_view.py index bd8d082..ded244e 100644 --- a/app/metadata_view.py +++ b/app/metadata_view.py @@ -1,23 +1,202 @@ +import os +import json +import logging +import gviz_api +from app import db +from app import cache +import pandas as pd +from flask import abort, render_template, url_for, Markup from flask_appbuilder import ModelView from flask_appbuilder.views import MasterDetailView from .models import Project, IgfUser, Seqrun, Analysis, Sample +from .models import Project_info_data, Project_seqrun_info_data, Project_seqrun_info_file +from .models import Project_analysis_info_data, Project_analysis_info_file from flask import redirect, flash -from app import db from flask_appbuilder.actions import action from flask_appbuilder.models.sqla.interface import SQLAInterface +from flask_appbuilder.baseviews import BaseView, expose +from flask_appbuilder.security.decorators import protect, has_access + +log = logging.getLogger(__name__) + +def convert_to_gviz_json_for_display(description, data, columns_order, output_file=None): + ''' + A utility method for writing gviz format json file for data display using Google charts + + :param description, A dictionary for the data table description + :param data, A list containing the data table + :column_order, A tuple of data table column order + :param output_file, Output filename, default None + :returns: None if output_file name is present, or else json_data string + ''' + try: + data_table = gviz_api.DataTable(description) # load description to gviz api + data_table.LoadData(data) # load data to gviz_api + final_data = data_table.ToJSon(columns_order=columns_order) # create final data structure + if output_file is None: + return final_data + else: + with open(output_file,'w') as jf: + jf.write(final_data) # write final data to output file + return None + except: + raise + + +def get_project_info_analysis_data(project_id): + try: + analysis_results = \ + db.session.query( + Analysis.analysis_name, + Project_analysis_info_data.analysis_tag, + Project_analysis_info_file.file_tag, + Project_analysis_info_file.file_path, + Project_analysis_info_file.project_analysis_info_file_id, + ).\ + join(Project_analysis_info_data, Analysis.analysis_id==Project_analysis_info_data.analysis_id).\ + join(Project_analysis_info_file, Project_analysis_info_data.project_analysis_info_data_id==Project_analysis_info_file.project_analysis_info_data_id).\ + filter(Project_analysis_info_data.project_id==project_id).\ + all() + analysis_results_df = \ + pd.DataFrame( + analysis_results, + columns=[ + "Analysis name", + "Analysis tag", + "File tag", + "file_path", + "file_id"]) + analysis_results_df["file_id"].\ + astype(int) + analysis_results_df["file_id"].\ + fillna(0, inplace=True) + analysis_results_df["Report"] = \ + analysis_results_df.\ + apply(lambda x: \ + '' + os.path.basename(x['file_path']) + '', + axis=1) + analysis_results_df = \ + analysis_results_df[[ + "Analysis name", + "Analysis tag", + "File tag", + "Report" + ]] + analysis_results_gviz = \ + convert_to_gviz_json_for_display( + description=[(col_name, "string") for col_name in analysis_results_df.columns.tolist()], + data=analysis_results_df.values.tolist(), + columns_order=analysis_results_df.columns.tolist()) + return analysis_results_gviz + except Exception as e: + log.error(e) + + +def get_project_info_seqrun_data(project_id): + try: + seqrun_results = \ + db.session.query( + Seqrun.seqrun_igf_id, + Project_seqrun_info_data.lane_number, + Project_seqrun_info_data.index_group_tag, + Project_seqrun_info_file.file_tag, + Project_seqrun_info_file.file_path, + Project_seqrun_info_file.project_seqrun_info_file_id + ).\ + join(Seqrun, Seqrun.seqrun_id==Project_seqrun_info_data.seqrun_id).\ + join(Project_info_data, Project_info_data.project_info_data_id==Project_seqrun_info_data.project_info_data_id).\ + join(Project_seqrun_info_file, Project_seqrun_info_file.project_seqrun_info_data_id==Project_seqrun_info_data.project_seqrun_info_data_id).\ + filter(Project_info_data.project_id==project_id).\ + all() + seqrun_results_df = \ + pd.DataFrame( + seqrun_results, + columns=["Sequencing run", + "Lane number", + "Index group", + "File tag", + "file_path", + "file_id"]) + seqrun_results_df["file_id"].\ + astype(int) + seqrun_results_df["file_id"].\ + fillna(0, inplace=True) + seqrun_results_df["Report"] = \ + seqrun_results_df.\ + apply(lambda x: \ + '' + os.path.basename(x['file_path']) + '', + axis=1) + seqrun_results_df = \ + seqrun_results_df[[ + "Sequencing run", + "Lane number", + "Index group", + "File tag", + "Report"]] + seqrun_results_gviz = \ + convert_to_gviz_json_for_display( + description=[(col_name, "string") for col_name in seqrun_results_df.columns.tolist()], + data=seqrun_results_df.values.tolist(), + columns_order=seqrun_results_df.columns.tolist()) + return seqrun_results_gviz + except Exception as e: + log.error(e) + + +def fetch_project_info_data(project_id): + try: + project_igf_id = \ + db.session.query(Project).\ + filter(Project.project_id==project_id).one_or_none() + if project_igf_id is None: + abort(404) + results = \ + db.session.query(Project_info_data).\ + filter(Project_info_data.project_id==project_id).one_or_none() + if results is None: + abort(404) + seqrun_results_gviz = \ + get_project_info_seqrun_data(project_id=project_id) + analysis_results_gviz = \ + get_project_info_analysis_data(project_id=project_id) + return project_igf_id, results.sample_read_count_data, results.project_history_data, seqrun_results_gviz,analysis_results_gviz + except Exception as e: + log.error(e) class ProjectView(ModelView): datamodel = SQLAInterface(Project) label_columns = { - "project_igf_id": "Name", + "project_info": "Project name", "status": "Status", "start_timestamp": "Created on" } - list_columns = ["project_igf_id", "status", "start_timestamp"] + list_columns = ["project_info", "status", "start_timestamp"] search_columns = ["project_igf_id", "status", "start_timestamp"] - base_permissions = ["can_list"] + base_permissions = ["can_list", "can_get_project_data"] base_order = ("project_id", "desc") + @expose('/project_data/') + @has_access + @cache.cached(timeout=600) + def get_project_data(self, id): + (project_igf_id, sample_read_count_data, + project_history_data, seqrun_results_gviz, + analysis_results_gviz) = \ + fetch_project_info_data(project_id=id) + sample_read_count_data = \ + json.loads(sample_read_count_data) + #project_history_data = \ + # json.loads(project_history_data) + return \ + self.render_template( + 'project_info.html', + project_igf_id=project_igf_id, + sample_read_count_data=sample_read_count_data, + project_history_data=project_history_data, + seqrun_results_gviz_data=seqrun_results_gviz, + analysis_results_gviz_data=analysis_results_gviz, + image_height=700) + class UserView(ModelView): datamodel = SQLAInterface(IgfUser) label_columns = { @@ -39,25 +218,6 @@ class SeqrunView(ModelView): base_permissions = ["can_list"] base_order = ("seqrun_id", "desc") -class AnalysisView(ModelView): - datamodel = SQLAInterface(Analysis) - list_columns = ["analysis_name", "analysis_type", "project.project_igf_id"] - base_permissions = ["can_list", "can_show"] - base_order = ("analysis_id", "desc") - @action("trigger_analysis_pipeline", "Trigger analysis pipeline", confirmation="confirm pipeline run?", icon="fa-rocket") - def trigger_analysis_pipeline(self, item): - id_list = list() - analysis_list = list() - if isinstance(item, list): - id_list = [i.analysis_id for i in item] - analysis_list = [i.analysis_name for i in item] - else: - id_list = [item.analysis_id] - analysis_list = [item.analysis_name] - flash("Submitted jobs for {0}".format(', '.join(analysis_list)), "info") - self.update_redirect() - return redirect(self.get_redirect()) - class SampleView(ModelView): datamodel = SQLAInterface(Sample, db.session) label_columns = { diff --git a/app/models.py b/app/models.py index ad65d50..e89300c 100644 --- a/app/models.py +++ b/app/models.py @@ -1,4 +1,5 @@ import datetime, json +from . import db from flask import Markup, url_for from flask_appbuilder import Model from sqlalchemy.dialects.mysql import INTEGER @@ -8,6 +9,11 @@ from sqlalchemy import UnicodeText from sqlalchemy.types import TypeDecorator from flask_appbuilder.models.mixins import AuditMixin +from flask_appbuilder.models.decorators import renders +from sqlalchemy.orm import column_property +from sqlalchemy import select, func, literal +from sqlalchemy.sql.functions import coalesce +from sqlalchemy.orm import object_session """ @@ -19,6 +25,7 @@ class JSONType(TypeDecorator): It will assign JSON datatype for mysql tables and unicodetext for sqlite ''' impl = UnicodeText + cache_ok = True def load_dialect_impl(self, dialect): if dialect.name == 'mysql': @@ -55,6 +62,7 @@ class LONGTEXTType(TypeDecorator): It will assign LONGTEXT datatype for mysql tables and unicodetext for sqlite ''' impl = UnicodeText + cache_ok = True def load_dialect_impl(self, dialect): if dialect.name == 'mysql': @@ -89,24 +97,18 @@ def process_result_value(self, value, dialect): class IlluminaInteropData(Model): __tablename__ = 'illumina_interop_data' __table_args__ = ( - UniqueConstraint('run_name'), + UniqueConstraint('run_name', 'tag', 'date_stamp'), { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) - run_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) - run_name = Column(String(50), nullable=False) - table_data = Column(TEXT()) - flowcell_data = Column(TEXT()) - intensity_data = Column(TEXT()) - cluster_count_data = Column(TEXT()) - density_data = Column(TEXT()) - qscore_bins_data = Column(TEXT()) - qscore_cycles_data = Column(TEXT()) - occupied_pass_filter = Column(TEXT()) + report_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + run_name = Column(String(100), nullable=False) + tag = Column(String(200), nullable=False) + file_path = Column(String(500), nullable=False) + status = Column(Enum("ACTIVE", "WITHDRAWN", "UNKNOWN"), nullable=False, server_default='ACTIVE') date_stamp = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now) def __repr__(self): return self.run_name - - def seqrun(self): - return Markup(''+self.run_name+'') + def report(self): + return Markup('report') """ Pre de-multiplexing data @@ -115,23 +117,20 @@ def seqrun(self): class PreDeMultiplexingData(Model): __tablename__ = 'pre_demultiplexing_data' __table_args__ = ( - UniqueConstraint('run_name', 'samplesheet_tag'), + UniqueConstraint('run_name', 'samplesheet_tag', 'date_stamp'), { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) demult_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) run_name = Column(String(50), nullable=False) - samplesheet_tag = Column(String(50), nullable=False) - flowcell_cluster_plot = Column(TEXT()) - project_summary_table = Column(TEXT()) - project_summary_plot = Column(TEXT()) - sample_table = Column(TEXT()) - sample_plot= Column(TEXT()) - undetermined_table = Column(TEXT()) - undetermined_plot = Column(TEXT()) + samplesheet_tag = Column(String(200), nullable=False) + file_path = Column(String(500), nullable=False) + status = Column(Enum("ACTIVE", "WITHDRAWN", "UNKNOWN"), nullable=False, server_default='ACTIVE') date_stamp = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now) def __repr__(self): return self.run_name def report(self): - return Markup('report') + return Markup('report') + def download_report(self): + return Markup('download') """ Admin home view @@ -206,8 +205,12 @@ class RawSeqrun(Model): override_cycles = Column(String(30), nullable=True) status = Column(Enum("ACTIVE", "REJECTED", "PREDEMULT", "READY", "FINISHED"), nullable=False, server_default='ACTIVE') date_stamp = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now) - samplesheet_id = Column(INTEGER(unsigned=True), ForeignKey("samplesheet.samplesheet_id", onupdate="NO ACTION", ondelete="NO ACTION"), nullable=True) + samplesheet_id = Column(INTEGER(unsigned=True), ForeignKey("samplesheet.samplesheet_id", onupdate="NO ACTION", ondelete="NO ACTION")) samplesheet = relationship('SampleSheetModel') + mismatches = Column(Enum("0", "1", "2"), nullable=True, server_default='1') + trigger_time = Column(TIMESTAMP(), nullable=True) + run_config = Column(LONGTEXTType(), nullable=True) + def __repr__(self): return self.raw_seqrun_igf_id @@ -218,16 +221,55 @@ def __repr__(self): class RawAnalysis(Model): __tablename__ = 'raw_analysis' __table_args__ = ( - UniqueConstraint('analysis_tag'), + UniqueConstraint('analysis_name', 'project_id'), { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) raw_analysis_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) - analysis_tag = Column(String(50), nullable=False) - analysis_yaml = Column(LONGTEXTType(), nullable=False) - status = Column(Enum("VALIDATED", "FAILED", "SYNCHED", "UNKNOWN"), nullable=False, server_default='UNKNOWN') - report = Column(TEXT()) + project_id = Column(INTEGER(unsigned=True), ForeignKey('project.project_id', onupdate="CASCADE", ondelete="SET NULL")) + project = relationship('Project') + pipeline_id = Column(INTEGER(unsigned=True), ForeignKey('pipeline.pipeline_id', onupdate="CASCADE", ondelete="SET NULL")) + pipeline = relationship('Pipeline') + analysis_name = Column(String(120), nullable=False) + analysis_yaml = Column(LONGTEXTType(), nullable=True) + status = Column(Enum("VALIDATED", "FAILED", "REJECTED", "SYNCHED", "UNKNOWN"), nullable=False, server_default='UNKNOWN') + report = Column(LONGTEXTType()) + date_stamp = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now) + def __repr__(self): + return self.analysis_name + + +""" + Raw analysis validation schema +""" + +class RawAnalysisValidationSchema(Model): + __tablename__ = 'raw_analysis_validation_schema' + __table_args__ = ( + UniqueConstraint('pipeline_id'), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + raw_analysis_schema_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + pipeline_id = Column(INTEGER(unsigned=True), ForeignKey('pipeline.pipeline_id', onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + pipeline = relationship('Pipeline') + json_schema = Column(JSONType) + status = Column(Enum("VALIDATED", "FAILED", "REJECTED", "SYNCHED", "UNKNOWN"), nullable=False, server_default='UNKNOWN') date_stamp = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now) def __repr__(self): - return self.analysis_tag + return self.pipeline.pipeline_name + + +""" + Raw analysis template +""" +class RawAnalysisTemplate(Model): + __tablename__ = 'raw_analysis_template' + __table_args__ = ( + UniqueConstraint('template_tag'), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + template_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + template_tag = Column(String(80), nullable=False) + template_data = Column(LONGTEXTType(), nullable=False) + def __repr__(self): + return self.template_tag + """ Index tables @@ -265,14 +307,118 @@ class SampleIndex(AuditMixin, Model): i5_index = Column(String(20), nullable=True) avg_region_molarity = Column(String(10), nullable=True) avg_fragment_size = Column(INTEGER, nullable=True) - project_index_id = Column(INTEGER(unsigned=True), ForeignKey("project_index.project_index_id", onupdate="NO ACTION", ondelete="NO ACTION"), nullable=True) + project_index_id = Column(INTEGER(unsigned=True), ForeignKey("project_index.project_index_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) project_index = relationship('ProjectIndex') def __repr__(self): return self.sample_name +""" + Project info +""" + +class Project_info_data(Model): + __tablename__ = 'project_info_data' + __table_args__ = ( + UniqueConstraint('project_info_data_id',), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + project_info_data_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + sample_read_count_data = Column(LONGTEXTType()) + project_history_data = Column(LONGTEXTType()) + project_id = Column(INTEGER(unsigned=True), ForeignKey("project.project_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project = relationship('Project') + def __repr__(self): + return self.project_info_data.project_info_data_id + + +""" + Project seqrun info +""" + +class Project_seqrun_info_data(Model): + __tablename__ = 'project_seqrun_info_data' + __table_args__ = ( + UniqueConstraint('project_id', 'seqrun_id', 'lane_number', 'index_group_tag'), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + project_seqrun_info_data_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + project_id = Column(INTEGER(unsigned=True), ForeignKey("project.project_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project = relationship('Project') + seqrun_id = Column(INTEGER(unsigned=True), ForeignKey("seqrun.seqrun_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + seqrun = relationship('Seqrun') + project_info_data_id = Column(INTEGER(unsigned=True), ForeignKey("project_info_data.project_info_data_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project_info_data = relationship("Project_info_data") + lane_number = Column(Enum('1', '2', '3', '4', '5', '6', '7', '8'), nullable=False) + index_group_tag = Column(String(120), nullable=False) + def __repr__(self): + return self.project_seqrun_info_data.project_seqrun_info_data_id + +""" + Project seqrun file +""" + +class Project_seqrun_info_file(Model): + __tablename__ = 'project_seqrun_info_file' + __table_args__ = ( + UniqueConstraint('file_path',), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + project_seqrun_info_file_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + project_seqrun_info_data_id = Column(INTEGER(unsigned=True), ForeignKey("project_seqrun_info_data.project_seqrun_info_data_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project_seqrun_info_data = relationship("Project_seqrun_info_data") + file_tag = Column(String(120),) + file_path = Column(String(1000), nullable=False) + md5 = Column(String(65)) + size = Column(String(52)) + date_created = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp()) + date_updated = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now ) + def __repr__(self): + return self.project_seqrun_info_file.project_seqrun_info_file_id + +""" + Project analysis info +""" + +class Project_analysis_info_data(Model): + __tablename__ = 'project_analysis_info_data' + __table_args__ = ( + UniqueConstraint('project_id', 'analysis_id'), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + project_analysis_info_data_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + project_id = Column(INTEGER(unsigned=True), ForeignKey("project.project_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project = relationship('Project') + analysis_id = Column(INTEGER(unsigned=True), ForeignKey("analysis.analysis_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + analysis = relationship('Analysis') + project_info_data_id = Column(INTEGER(unsigned=True), ForeignKey("project_info_data.project_info_data_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project_info_data = relationship("Project_info_data") + analysis_tag = Column(String(120), nullable=False) + def __repr__(self): + return self.project_analysis_info_data.project_analysis_info_data_id + +""" + Project analysis file +""" + +class Project_analysis_info_file(Model): + __tablename__ = 'project_analysis_info_file' + __table_args__ = ( + UniqueConstraint('file_path',), + { 'mysql_engine':'InnoDB', 'mysql_charset':'utf8' }) + project_analysis_info_file_id = Column(INTEGER(unsigned=True), primary_key=True, nullable=False) + project_analysis_info_data_id = Column(INTEGER(unsigned=True), ForeignKey("project_analysis_info_data.project_analysis_info_data_id", onupdate="CASCADE", ondelete="SET NULL"), nullable=True) + project_analysis_info_data = relationship("Project_analysis_info_data") + file_tag = Column(String(120)) + file_path = Column(String(1000), nullable=False) + md5 = Column(String(65)) + size = Column(String(52)) + date_created = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp()) + date_updated = Column(TIMESTAMP(), nullable=False, server_default=current_timestamp(), onupdate=datetime.datetime.now ) + def __repr__(self): + return self.project_analysis_info_file.project_analysis_info_file_id + + + """ RDS project backup """ + class RDSProject_backup(Model): __tablename__ = 'rds_project_backup' __table_args__ = ( @@ -332,6 +478,10 @@ def __repr__(self): ''' return self.project_igf_id + def project_info(self): + return Markup(''+self.project_igf_id+'') + + class IgfUser(Model): ''' diff --git a/app/pre_demultiplexing_data_api.py b/app/pre_demultiplexing_data_api.py index 1c2eed5..4a1cda7 100644 --- a/app/pre_demultiplexing_data_api.py +++ b/app/pre_demultiplexing_data_api.py @@ -1,182 +1,316 @@ -import json, logging +import os, json, logging, hashlib, shutil, gzip, tempfile +from datetime import datetime from flask_appbuilder import ModelRestApi -from flask import request +from flask import request, jsonify from flask_appbuilder.api import expose from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_appbuilder.security.decorators import protect -from . import db +from . import db, app, celery from .models import PreDeMultiplexingData """ Pre-demultiplexing data Api """ -def search_predemultiplexing_data(run_name, samplesheet_tag): - try: - result = \ - db.session.\ - query(PreDeMultiplexingData).\ - filter(PreDeMultiplexingData.run_name==run_name).\ - filter(PreDeMultiplexingData.samplesheet_tag==samplesheet_tag).\ - one_or_none() - return result - except Exception as e: - raise ValueError( - "Failed to search pre demultiplexing data, error: {0}".\ - format(e)) - +log = logging.getLogger(__name__) -def add_predemultiplexing_data(data): +def load_predemult_report( + run_name: str, + tag_name: str, + file_path: str, + base_path: str): try: - if isinstance(data, bytes): - data = json.loads(data.decode()) - if isinstance(data, str): - data = json.loads(data) - flowcell_cluster_plot = data.get("flowcell_cluster_plot") - if isinstance(flowcell_cluster_plot, dict): - flowcell_cluster_plot = json.dumps(flowcell_cluster_plot) - project_summary_table = data.get("project_summary_table") - if isinstance(project_summary_table, dict): - project_summary_table = json.dumps(project_summary_table) - project_summary_plot = data.get("project_summary_plot") - if isinstance(project_summary_plot, dict): - project_summary_plot = json.dumps(project_summary_plot) - sample_table = data.get("sample_table") - if isinstance(sample_table, dict): - sample_table = json.dumps(sample_table) - sample_plot = data.get("sample_plot") - if isinstance(sample_plot, dict): - sample_plot = json.dumps(sample_plot) - undetermined_table = data.get("undetermined_table") - if isinstance(undetermined_table, dict): - undetermined_table = json.dumps(undetermined_table) - undetermined_plot = data.get("undetermined_plot") - if isinstance(undetermined_plot, dict): - undetermined_plot = json.dumps(undetermined_plot) - predemult_data = \ - PreDeMultiplexingData( - run_name=data.get("run_name"), - samplesheet_tag=data.get("samplesheet_tag"), - flowcell_cluster_plot=flowcell_cluster_plot, - project_summary_table=project_summary_table, - project_summary_plot=project_summary_plot, - sample_table=sample_table, - sample_plot=sample_plot, - undetermined_table=undetermined_table, - undetermined_plot=undetermined_plot) + ## get date stamp + datestamp = datetime.now() + datetime_str = \ + datestamp.strftime("%Y%m%d_%H%M%S") + ## get file name + file_name = \ + os.path.basename(file_path) + ## calculate new disk path + hash_string = \ + f"{run_name}{tag_name}{file_name}{datetime_str}" + hash_md5 = \ + hashlib.\ + md5(hash_string.encode('utf-8')).\ + hexdigest() + ## create dir and copy report file + target_dir = \ + os.path.join( + base_path, + run_name, + hash_md5) + target_file_path = \ + os.path.join( + target_dir, + file_name) + os.makedirs( + target_dir, + exist_ok=True) + shutil.copyfile( + file_path, + target_file_path) + ## update db record try: - db.session.add(predemult_data) + predemult_entry = \ + PreDeMultiplexingData( + run_name=run_name, + samplesheet_tag=tag_name, + file_path=target_file_path, + date_stamp=datestamp + ) + db.session.add(predemult_entry) db.session.flush() db.session.commit() except: db.session.rollback() raise except Exception as e: - raise ValueError( - "Failed to add de-multiplex data, error: {0}".\ - format(e)) - -def edit_predemultiplexing_data(data): - try: - if isinstance(data, bytes): - data = json.loads(data.decode()) - if isinstance(data, str): - data = json.loads(data) - if "run_name" not in data: - raise ValueError("Missing run name") - if "samplesheet_tag" not in data: - raise ValueError("Missing sampleshheet tag") - flowcell_cluster_plot = data.get("flowcell_cluster_plot") - if flowcell_cluster_plot is not None and \ - isinstance(flowcell_cluster_plot, dict): - flowcell_cluster_plot = json.dumps(flowcell_cluster_plot) - data.update({"flowcell_cluster_plot": flowcell_cluster_plot}) - project_summary_table = data.get("project_summary_table") - if project_summary_table is not None and \ - isinstance(project_summary_table, dict): - project_summary_table = json.dumps(project_summary_table) - data.update({"project_summary_table": project_summary_table}) - project_summary_plot = data.get("project_summary_plot") - if project_summary_plot is not None and \ - isinstance(project_summary_plot, dict): - project_summary_plot = json.dumps(project_summary_plot) - data.update({"project_summary_plot": project_summary_plot}) - sample_table = data.get("sample_table") - if sample_table is not None and \ - isinstance(sample_table, dict): - sample_table = json.dumps(sample_table) - data.update({"sample_table": sample_table}) - sample_plot = data.get("sample_plot") - if sample_plot is not None and \ - isinstance(sample_plot, dict): - sample_plot = json.dumps(sample_plot) - data.update({"sample_plot": sample_plot}) - undetermined_table = data.get("undetermined_table") - if undetermined_table is not None and \ - isinstance(undetermined_table, dict): - undetermined_table = json.dumps(undetermined_table) - data.update({"undetermined_table": undetermined_table}) - undetermined_plot = data.get("undetermined_plot") - if undetermined_plot is not None and \ - isinstance(undetermined_plot, dict): - undetermined_plot = json.dumps(undetermined_plot) - data.update({"undetermined_plot": undetermined_plot}) - try: - db.session.\ - query(PreDeMultiplexingData).\ - filter(PreDeMultiplexingData.run_name==data.get("run_name")).\ - filter(PreDeMultiplexingData.samplesheet_tag==data.get("samplesheet_tag")).\ - update(data) - db.session.commit() - except: - db.session.rollback() - raise - except Exception as e: - raise ValueError( - "Failed to update de-multiplex data, error: {0}".\ - format(e)) + raise ValueError( + f"Failed to load pre-demult report to db, error: {e}") -def add_or_edit_predemultiplexing_data(data): +@celery.task(bind=True) +def async_load_predemult_report( + self, + run_name: str, + tag_name: str, + file_path: str, + base_path: str) -> dict: try: - if isinstance(data, bytes): - data = json.loads(data.decode()) - if isinstance(data, str): - data = json.loads(data) - if "run_name" not in data: - raise ValueError("Missing run name") - if "samplesheet_tag" not in data: - raise ValueError("Missing sampleshheet tag") - result = \ - search_predemultiplexing_data( - run_name=data.get("run_name"), - samplesheet_tag=data.get("samplesheet_tag")) - if result is None: - add_predemultiplexing_data(data=data) - else: - edit_predemultiplexing_data(data=data) + load_predemult_report( + run_name=run_name, + tag_name=tag_name, + file_path=file_path, + base_path=base_path) + return {"message": "success"} except Exception as e: - raise ValueError( - "Failed to add or update de-multiplex data, error: {0}".\ - format(e)) + log.error( + "Failed to run celery job, error: {0}".\ + format(e)) class PreDeMultiplexingDataApi(ModelRestApi): resource_name = "predemultiplexing_data" datamodel = SQLAInterface(PreDeMultiplexingData) - @expose('/add_or_edit_report', methods=['POST']) + @expose('/add_report', methods=['POST']) @protect() - def add_or_edit_demult_report(self): + def add_report(self): try: + log.warn('received_res') + log.warn(f"Files: {request.files}") + log.warn(f"Data: {request.data}") + log.warn(f"Form: {request.form}") if not request.files: return self.response_400('No files') + json_data = request.form + run_name = json_data.get('run_name') + samplesheet_tag = json_data.get('samplesheet_tag') + if run_name is None or \ + samplesheet_tag is None: + return self.response_400('Missing run_name or samplesheet_tag') + ## get report file from request file_objs = request.files.getlist('file') file_obj = file_objs[0] + file_name = file_obj.filename file_obj.seek(0) - json_data = file_obj.read() - add_or_edit_predemultiplexing_data(data=json_data) - return self.response(200, message='successfully added or updated demult data') + file_data = file_obj.read() + ## report file can be gzipped + if file_name.endswith('.gz'): + file_data = gzip.decompress(file_data).decode('utf-8') + ## get report file and dump it to tmp dir + report_dir = \ + tempfile.mkdtemp( + dir=app.config['CELERY_WORK_DIR'], + prefix='report_',) + report_file = \ + os.path.join(report_dir, file_name) + with open(report_file, 'wb') as fp: + fp.write(file_data) + ## send job to celery worker + base_dir = \ + os.path.join( + app.config['REPORT_UPLOAD_PATH'], + 'predemult_reports') + _ = \ + async_load_predemult_report.\ + apply_async(args=[ + run_name, + samplesheet_tag, + report_file, + base_dir]) + return self.response(200, message=f'successfully submitted demult report loading job for {os.path.basename(report_file)}') except Exception as e: - logging.error(e) + log.error(e) + return self.response_500('failed to load file') + + +# def search_predemultiplexing_data(run_name, samplesheet_tag): +# try: +# result = \ +# db.session.\ +# query(PreDeMultiplexingData).\ +# filter(PreDeMultiplexingData.run_name==run_name).\ +# filter(PreDeMultiplexingData.samplesheet_tag==samplesheet_tag).\ +# one_or_none() +# return result +# except Exception as e: +# raise ValueError( +# "Failed to search pre demultiplexing data, error: {0}".\ +# format(e)) + + +# def add_predemultiplexing_data(data): +# try: +# if isinstance(data, bytes): +# data = json.loads(data.decode()) +# if isinstance(data, str): +# data = json.loads(data) +# flowcell_cluster_plot = data.get("flowcell_cluster_plot") +# if isinstance(flowcell_cluster_plot, dict): +# flowcell_cluster_plot = json.dumps(flowcell_cluster_plot) +# project_summary_table = data.get("project_summary_table") +# if isinstance(project_summary_table, dict): +# project_summary_table = json.dumps(project_summary_table) +# project_summary_plot = data.get("project_summary_plot") +# if isinstance(project_summary_plot, dict): +# project_summary_plot = json.dumps(project_summary_plot) +# sample_table = data.get("sample_table") +# if isinstance(sample_table, dict): +# sample_table = json.dumps(sample_table) +# sample_plot = data.get("sample_plot") +# if isinstance(sample_plot, dict): +# sample_plot = json.dumps(sample_plot) +# undetermined_table = data.get("undetermined_table") +# if isinstance(undetermined_table, dict): +# undetermined_table = json.dumps(undetermined_table) +# undetermined_plot = data.get("undetermined_plot") +# if isinstance(undetermined_plot, dict): +# undetermined_plot = json.dumps(undetermined_plot) +# predemult_data = \ +# PreDeMultiplexingData( +# run_name=data.get("run_name"), +# samplesheet_tag=data.get("samplesheet_tag"), +# flowcell_cluster_plot=flowcell_cluster_plot, +# project_summary_table=project_summary_table, +# project_summary_plot=project_summary_plot, +# sample_table=sample_table, +# sample_plot=sample_plot, +# undetermined_table=undetermined_table, +# undetermined_plot=undetermined_plot) +# try: +# db.session.add(predemult_data) +# db.session.flush() +# db.session.commit() +# except: +# db.session.rollback() +# raise +# except Exception as e: +# raise ValueError( +# "Failed to add de-multiplex data, error: {0}".\ +# format(e)) + +# def edit_predemultiplexing_data(data): +# try: +# if isinstance(data, bytes): +# data = json.loads(data.decode()) +# if isinstance(data, str): +# data = json.loads(data) +# if "run_name" not in data: +# raise ValueError("Missing run name") +# if "samplesheet_tag" not in data: +# raise ValueError("Missing sampleshheet tag") +# flowcell_cluster_plot = data.get("flowcell_cluster_plot") +# if flowcell_cluster_plot is not None and \ +# isinstance(flowcell_cluster_plot, dict): +# flowcell_cluster_plot = json.dumps(flowcell_cluster_plot) +# data.update({"flowcell_cluster_plot": flowcell_cluster_plot}) +# project_summary_table = data.get("project_summary_table") +# if project_summary_table is not None and \ +# isinstance(project_summary_table, dict): +# project_summary_table = json.dumps(project_summary_table) +# data.update({"project_summary_table": project_summary_table}) +# project_summary_plot = data.get("project_summary_plot") +# if project_summary_plot is not None and \ +# isinstance(project_summary_plot, dict): +# project_summary_plot = json.dumps(project_summary_plot) +# data.update({"project_summary_plot": project_summary_plot}) +# sample_table = data.get("sample_table") +# if sample_table is not None and \ +# isinstance(sample_table, dict): +# sample_table = json.dumps(sample_table) +# data.update({"sample_table": sample_table}) +# sample_plot = data.get("sample_plot") +# if sample_plot is not None and \ +# isinstance(sample_plot, dict): +# sample_plot = json.dumps(sample_plot) +# data.update({"sample_plot": sample_plot}) +# undetermined_table = data.get("undetermined_table") +# if undetermined_table is not None and \ +# isinstance(undetermined_table, dict): +# undetermined_table = json.dumps(undetermined_table) +# data.update({"undetermined_table": undetermined_table}) +# undetermined_plot = data.get("undetermined_plot") +# if undetermined_plot is not None and \ +# isinstance(undetermined_plot, dict): +# undetermined_plot = json.dumps(undetermined_plot) +# data.update({"undetermined_plot": undetermined_plot}) +# try: +# db.session.\ +# query(PreDeMultiplexingData).\ +# filter(PreDeMultiplexingData.run_name==data.get("run_name")).\ +# filter(PreDeMultiplexingData.samplesheet_tag==data.get("samplesheet_tag")).\ +# update(data) +# db.session.commit() +# except: +# db.session.rollback() +# raise +# except Exception as e: +# raise ValueError( +# "Failed to update de-multiplex data, error: {0}".\ +# format(e)) + + +# def add_or_edit_predemultiplexing_data(data): +# try: +# if isinstance(data, bytes): +# data = json.loads(data.decode()) +# if isinstance(data, str): +# data = json.loads(data) +# if "run_name" not in data: +# raise ValueError("Missing run name") +# if "samplesheet_tag" not in data: +# raise ValueError("Missing sampleshheet tag") +# result = \ +# search_predemultiplexing_data( +# run_name=data.get("run_name"), +# samplesheet_tag=data.get("samplesheet_tag")) +# if result is None: +# add_predemultiplexing_data(data=data) +# else: +# edit_predemultiplexing_data(data=data) +# except Exception as e: +# raise ValueError( +# "Failed to add or update de-multiplex data, error: {0}".\ +# format(e)) + + +# class PreDeMultiplexingDataApi(ModelRestApi): +# resource_name = "predemultiplexing_data" +# datamodel = SQLAInterface(PreDeMultiplexingData) + # @expose('/add_or_edit_report', methods=['POST']) + # @protect() + # def add_or_edit_demult_report(self): + # try: + # if not request.files: + # return self.response_400('No files') + # file_objs = request.files.getlist('file') + # file_obj = file_objs[0] + # file_obj.seek(0) + # json_data = file_obj.read() + # add_or_edit_predemultiplexing_data(data=json_data) + # return self.response(200, message='successfully added or updated demult data') + # except Exception as e: + # logging.error(e) diff --git a/app/pre_demultiplexing_view.py b/app/pre_demultiplexing_view.py index c49c997..36aa6e4 100644 --- a/app/pre_demultiplexing_view.py +++ b/app/pre_demultiplexing_view.py @@ -1,12 +1,17 @@ import json import logging +from typing import Any +from io import BytesIO +from app import cache from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_appbuilder import ModelView from flask_appbuilder.baseviews import expose +from flask import redirect, flash, url_for, send_file from flask_appbuilder.security.decorators import protect, has_access from . import db from .models import PreDeMultiplexingData +log = logging.getLogger(__name__) """ Pre de-multiplexing view @@ -18,86 +23,116 @@ class PreDeMultiplexingDataView(ModelView): 'run_name':'Sequencing run', 'samplesheet_tag':'Tag', 'date_stamp': 'Updated on', - 'report': 'De-multiplexing report'} + 'report': 'Report', + 'download_report': 'Download'} list_columns = [ 'run_name', 'samplesheet_tag', 'date_stamp', - 'report'] - base_permissions = ['can_list', 'can_get_report'] + 'report', + 'download_report'] + base_permissions = ['can_list', 'can_download_reports']#, 'can_get_report'] base_order = ("date_stamp", "desc") - @expose('/predemult_report/') + @expose("/download/rawdata/") @has_access - def get_report(self, id): + @cache.cached(timeout=600) + def download_reports(self, id: str) -> Any: try: - (run_name, samplesheet_tag, flowcell_cluster_plot, project_summary_table, project_summary_plot, - sample_table, sample_plot, undetermined_table, undetermined_plot, date_stamp) = \ - get_pre_demultiplexing_data(demult_id=id) - flowcell_labels = flowcell_cluster_plot.get('labels') - total_cluster_raw = flowcell_cluster_plot.get('total_cluster_raw') - total_cluster_pf = flowcell_cluster_plot.get('total_cluster_pf') - total_yield = flowcell_cluster_plot.get('total_yield') - lanes = list(sample_table.keys()) - return \ - self.render_template( - 'demultiplexing_report.html', - run_name=run_name, - date_stamp=date_stamp, - flowcell_labels=flowcell_labels, - total_cluster_raw=total_cluster_raw, - total_cluster_pf=total_cluster_pf, - total_yield=total_yield, - project_summary_table=project_summary_table, - project_summary_plot=project_summary_plot, - sample_table=sample_table, - sample_plot=sample_plot, - undetermined_table=undetermined_table, - undetermined_plot=undetermined_plot, - lanes=lanes) + records = \ + db.session.\ + query( + PreDeMultiplexingData.samplesheet_tag, + PreDeMultiplexingData.file_path).\ + filter(PreDeMultiplexingData.demult_id==id).\ + one_or_none() + if records is None: + raise ValueError(f"Report not found for id: {id}") + (sample_sheet_tag, file_path) = records + with open(file_path, 'rb') as fp: + html_data = fp.read() + output = BytesIO(html_data) + sample_sheet_tag = sample_sheet_tag.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f'{sample_sheet_tag}.html', as_attachment=True) except Exception as e: - logging.error(e) + log.error(e) + flash('Failed to download report', 'danger') + return redirect(url_for('PreDeMultiplexingDataView.list')) + # @expose('/predemult_report/') + # @has_access + # @cache.cached(timeout=600) + # def get_report(self, id): + # try: + # (run_name, samplesheet_tag, flowcell_cluster_plot, project_summary_table, project_summary_plot, + # sample_table, sample_plot, undetermined_table, undetermined_plot, date_stamp) = \ + # get_pre_demultiplexing_data(demult_id=id) + # flowcell_labels = flowcell_cluster_plot.get('labels') + # total_cluster_raw = flowcell_cluster_plot.get('total_cluster_raw') + # total_cluster_pf = flowcell_cluster_plot.get('total_cluster_pf') + # total_yield = flowcell_cluster_plot.get('total_yield') + # lanes = list(sample_table.keys()) + # return \ + # self.render_template( + # 'demultiplexing_report.html', + # run_name=run_name, + # date_stamp=date_stamp, + # flowcell_labels=flowcell_labels, + # total_cluster_raw=total_cluster_raw, + # total_cluster_pf=total_cluster_pf, + # total_yield=total_yield, + # project_summary_table=project_summary_table, + # project_summary_plot=project_summary_plot, + # sample_table=sample_table, + # sample_plot=sample_plot, + # undetermined_table=undetermined_table, + # undetermined_plot=undetermined_plot, + # lanes=lanes) + # except Exception as e: + # log.error(e) -def get_pre_demultiplexing_data(demult_id): - try: - result = \ - db.session.\ - query(PreDeMultiplexingData).\ - filter(PreDeMultiplexingData.demult_id==demult_id).\ - one_or_none() - run_name = '' - samplesheet_tag = '' - flowcell_cluster_plot = '' - project_summary_table = '' - project_summary_plot = '' - sample_table = '' - undetermined_table = '' - undetermined_plot = '' - if result is not None: - run_name = result.run_name - samplesheet_tag = result.samplesheet_tag - flowcell_cluster_plot = result.flowcell_cluster_plot - if isinstance(flowcell_cluster_plot, str): - flowcell_cluster_plot = json.loads(flowcell_cluster_plot) - project_summary_table = result.project_summary_table - project_summary_plot = result.project_summary_plot - if isinstance(project_summary_plot, str): - project_summary_plot = json.loads(project_summary_plot) - sample_table = result.sample_table - if isinstance(sample_table, str): - sample_table = json.loads(sample_table) - sample_plot = result.sample_plot - if isinstance(sample_plot, str): - sample_plot = json.loads(sample_plot) - undetermined_table = result.undetermined_table - if isinstance(undetermined_table, str): - undetermined_table = json.loads(undetermined_table) - undetermined_plot = result.undetermined_plot - if isinstance(undetermined_plot, str): - undetermined_plot = json.loads(undetermined_plot) - date_stamp = result.date_stamp - return run_name, samplesheet_tag, flowcell_cluster_plot, project_summary_table, project_summary_plot,\ - sample_table, sample_plot, undetermined_table, undetermined_plot, date_stamp - except Exception as e: - logging.error(e) \ No newline at end of file + +# def get_pre_demultiplexing_data(demult_id): +# try: +# result = \ +# db.session.\ +# query(PreDeMultiplexingData).\ +# filter(PreDeMultiplexingData.demult_id==demult_id).\ +# one_or_none() +# run_name = '' +# samplesheet_tag = '' +# flowcell_cluster_plot = '' +# project_summary_table = '' +# project_summary_plot = '' +# sample_table = '' +# undetermined_table = '' +# undetermined_plot = '' +# if result is not None: +# run_name = result.run_name +# samplesheet_tag = result.samplesheet_tag +# flowcell_cluster_plot = result.flowcell_cluster_plot +# if isinstance(flowcell_cluster_plot, str): +# flowcell_cluster_plot = json.loads(flowcell_cluster_plot) +# project_summary_table = result.project_summary_table +# project_summary_plot = result.project_summary_plot +# if isinstance(project_summary_plot, str): +# project_summary_plot = json.loads(project_summary_plot) +# sample_table = result.sample_table +# if isinstance(sample_table, str): +# sample_table = json.loads(sample_table) +# sample_plot = result.sample_plot +# if isinstance(sample_plot, str): +# sample_plot = json.loads(sample_plot) +# undetermined_table = result.undetermined_table +# if isinstance(undetermined_table, str): +# undetermined_table = json.loads(undetermined_table) +# undetermined_plot = result.undetermined_plot +# if isinstance(undetermined_plot, str): +# undetermined_plot = json.loads(undetermined_plot) +# date_stamp = result.date_stamp +# return run_name, samplesheet_tag, flowcell_cluster_plot, project_summary_table, project_summary_plot,\ +# sample_table, sample_plot, undetermined_table, undetermined_plot, date_stamp +# except: +# raise \ No newline at end of file diff --git a/app/raw_analysis/CELLRANGER_MULTI_template_v1.txt b/app/raw_analysis/CELLRANGER_MULTI_template_v1.txt new file mode 100644 index 0000000..cb1b7e4 --- /dev/null +++ b/app/raw_analysis/CELLRANGER_MULTI_template_v1.txt @@ -0,0 +1,49 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + feature_types: Gene Expression/Antibody Capture/CRISPR Guide Capture/Multiplexing Capture/VDJ-B/VDJ-T/VDJ-T-GD/Antigen Capture + cellranger_group: GROUP_NAME +{% endfor -%} +analysis_metadata: + cellranger_multi_config: + - "[gene-expression]" + - "reference,/project/tgu/resources/pipeline_resource/transcriptome/cellranger/v7/GRCh38/refdata-gex-GRCh38-2020-A" + - "# probe-set,/path/to/probe/set, # Required, Fixed RNA Profiling only." + - "# filter-probes,, # Optional, Fixed RNA Profiling only." + - "# r1-length," + - "# r2-length," + - "# chemistry," + - "# expect-cells," + - "# force-cells," + - "# no-secondary," + - "# no-bam," + - "# check-library-compatibility," + - "# include-introns," + - "# min-assignment-confidence,<0.9>, # Optional, Cell Multiplexing only." + - "# cmo-set,/path/to/CMO/reference, # Optional, Cell Multiplexing only." + - "# barcode-sample-assignment,/path/to/barcode-sample-assignment/csv, # Optional, Cell Multiplexing only." + - "[feature] # For Feature Barcode libraries only" + - "# reference,/path/to/feature/reference" + - "# r1-length," + - "# r2-length," + - "# [vdj] # For TCR and BCR libraries only" + - "reference,/project/tgu/resources/pipeline_resource/vdj/v7/hg38/refdata-cellranger-vdj-GRCh38-alts-ensembl-7.1.0" + - "# inner-enrichment-primers,/path/to/primers" + - "# r1-length," + - "# r2-length," + - "[antigen-specificity] # for 5' BCR/TCR Antigen Capture only" + - "#control_id,mhc_allele" + - "#[samples] # for Cell Multiplexing libraries only" + - "#sample_id,cmo_ids,description" + - "#[samples] # for Fixed RNA Profiling multiplexed libraries only" + - "#sample_id,probe_barcode_ids,description" + scanpy_config: + TEMPLATE_FILE: /project/tgu/software/scanpy-notebook-image/templates/scanpy_single_sample_analysis_v0.0.6.3.ipynb + IMAGE_FILE: /project/tgu/resources/pipeline_resource/singularity_images/scanpy-notebook-image/scanpy-notebook-image_v0.0.4.sif + MITO_PREFIX: MT- + RUN_SCRUBLET: true + RUN_CELLCYCLE_SCORE: true + CELL_MARKER_LIST: /project/tgu/resources/pipeline_resource/cell_markers/PangaloDB/PanglaoDB_markers_27_Mar_2020.tsv + CELL_MARKER_SPECIES: HG38 + S_GENES: '' + G2M_GENES: '' + CELL_MARKER_MODE: NON-VDJ \ No newline at end of file diff --git a/app/raw_analysis/GEOMX_DCC.txt b/app/raw_analysis/GEOMX_DCC.txt new file mode 100644 index 0000000..a3c4167 --- /dev/null +++ b/app/raw_analysis/GEOMX_DCC.txt @@ -0,0 +1,9 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + dsp_id: +{% endfor -%} +analysis_metadata: + config_zip_file: /rds/general/project/genomics-facility-archive-2019/live/orwell/GeoMx/ + geomx_pkc_file: /rds/general/project/genomics-facility-archive-2019/live/orwell/GeoMx/GeoMx_PKC_Files/Hs_R_NGS_WTA_v1.0.pkc + geomx_dcc_params: + - "--threads=8" \ No newline at end of file diff --git a/app/raw_analysis/NF_ATAC_template_v1.txt b/app/raw_analysis/NF_ATAC_template_v1.txt new file mode 100644 index 0000000..b2501b6 --- /dev/null +++ b/app/raw_analysis/NF_ATAC_template_v1.txt @@ -0,0 +1,11 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + sample: SAMPLE_NAME + replicate: REPLICATE_ID +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/atacseq + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_Ampliseq_template_v1.txt b/app/raw_analysis/NF_Ampliseq_template_v1.txt new file mode 100644 index 0000000..c23237c --- /dev/null +++ b/app/raw_analysis/NF_Ampliseq_template_v1.txt @@ -0,0 +1,10 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + condition: CONDITION_NAME +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/ampliseq + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_Bactmap_template_v1.txt b/app/raw_analysis/NF_Bactmap_template_v1.txt new file mode 100644 index 0000000..7d436a5 --- /dev/null +++ b/app/raw_analysis/NF_Bactmap_template_v1.txt @@ -0,0 +1,9 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: "" +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/bactmap + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_ChIP_template_v1.txt b/app/raw_analysis/NF_ChIP_template_v1.txt new file mode 100644 index 0000000..d8d3f2d --- /dev/null +++ b/app/raw_analysis/NF_ChIP_template_v1.txt @@ -0,0 +1,11 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + antibody: ANTIBODY_NAME + control: CONTROL_SAMPLE_ID +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/chipseq + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_CutAndRun_template_v1.txt b/app/raw_analysis/NF_CutAndRun_template_v1.txt new file mode 100644 index 0000000..1b67a7f --- /dev/null +++ b/app/raw_analysis/NF_CutAndRun_template_v1.txt @@ -0,0 +1,12 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + group: GROUP_NAME + replicate: REPLICATE_ID + control: CONTROL_SAMPLE_ID +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/cutandrun + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_HIC_template_v1.txt b/app/raw_analysis/NF_HIC_template_v1.txt new file mode 100644 index 0000000..1493ea0 --- /dev/null +++ b/app/raw_analysis/NF_HIC_template_v1.txt @@ -0,0 +1,9 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: "" +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/hic + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_Methylseq_template_v1.txt b/app/raw_analysis/NF_Methylseq_template_v1.txt new file mode 100644 index 0000000..bf863cf --- /dev/null +++ b/app/raw_analysis/NF_Methylseq_template_v1.txt @@ -0,0 +1,10 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: "" +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r A.B" + - "--aligner bismark" \ No newline at end of file diff --git a/app/raw_analysis/NF_RNA_template_v1.txt b/app/raw_analysis/NF_RNA_template_v1.txt new file mode 100644 index 0000000..67de6b1 --- /dev/null +++ b/app/raw_analysis/NF_RNA_template_v1.txt @@ -0,0 +1,13 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + condition: CONDITION_NAME (USE CAPS) + strandedness: reverse +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/rnaseq + nextflow_params: + - "-profile singularity" + - "-r A.B" + - "--aligner star_rsem" + - "--seq_center IGF" \ No newline at end of file diff --git a/app/raw_analysis/NF_Sarek_template_v1.txt b/app/raw_analysis/NF_Sarek_template_v1.txt new file mode 100644 index 0000000..c162892 --- /dev/null +++ b/app/raw_analysis/NF_Sarek_template_v1.txt @@ -0,0 +1,12 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + patient: PATIENT_ID + sex: SEX + status: STATUS_ID +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/sarek + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/NF_smRNA_template_v1.txt b/app/raw_analysis/NF_smRNA_template_v1.txt new file mode 100644 index 0000000..d50080a --- /dev/null +++ b/app/raw_analysis/NF_smRNA_template_v1.txt @@ -0,0 +1,9 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: "" +{% endfor -%} +analysis_metadata: + NXF_VER: X.Y.Z + nfcore_pipeline: nf-core/smrnaseq + nextflow_params: + - "-profile singularity" + - "-r A.B" \ No newline at end of file diff --git a/app/raw_analysis/Snakemake_RNA_template_v1.txt b/app/raw_analysis/Snakemake_RNA_template_v1.txt new file mode 100644 index 0000000..81c308a --- /dev/null +++ b/app/raw_analysis/Snakemake_RNA_template_v1.txt @@ -0,0 +1,27 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + condition: CONDITION_NAME (USE CAPS) + strandedness: reverse +{% endfor -%} +analysis_metadata: + ref: + species: ENSEMBL_SPECIES_NAME + release: ENSEMBL_RELEASE_NUMBER + build: ENSEMBL_BUILD_NAME + trimming: + activate: False + mergeReads: + activate: False + pca: + activate: True + labels: + - LABEL_LIST + diffexp: + contrasts: + DE_GROUP: + - CONDITION_NAME_LIST + model: MODEL_STRING + params: + cutadapt-pe: "" + cutadapt-se: "" + star: --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMattributes NH HI AS NM MD --limitBAMsortRAM 12000000000 \ No newline at end of file diff --git a/app/raw_analysis/Snakemake_RNA_template_v2.txt b/app/raw_analysis/Snakemake_RNA_template_v2.txt new file mode 100644 index 0000000..8ed6dda --- /dev/null +++ b/app/raw_analysis/Snakemake_RNA_template_v2.txt @@ -0,0 +1,32 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: + condition: CONDITION_NAME (USE CAPS) + strandedness: reverse +{% endfor -%} +analysis_metadata: + ref: + species: ENSEMBL_SPECIES_NAME_EG_"homo_sapiens" + release: ENSEMBL_RELEASE_NUMBER_EG_110 + build: ENSEMBL_BUILD_NAME_EG_GRCh38 + trimming: + activate: false + mergeReads: + activate: false + pca: + activate: true + labels: + - CONDITION_COLUMNS + diffexp: + variables_of_interest: + condition: + base_level: BASE_CONDITION + batch_effects: '' + contrasts: + DE_GROUP_SIMPLE: + variable_of_interest: CONDITION_COLUMN + level_of_interest: TREATMENT_CONDITION + model: "~MODEL_STRING" + params: + cutadapt-pe: "" + cutadapt-se: "" + star: --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMattributes NH HI AS NM MD --limitBAMsortRAM 12000000000 \ No newline at end of file diff --git a/app/raw_analysis/analysis_validation_cellranger_multi_v1 b/app/raw_analysis/analysis_validation_cellranger_multi_v1 new file mode 100644 index 0000000..fdcbfa0 --- /dev/null +++ b/app/raw_analysis/analysis_validation_cellranger_multi_v1 @@ -0,0 +1,59 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "id": "https://github.com/imperial-genomics-facility/IGFPortal", + "title": "IGF Cellranger Multi analysis validation schema", + "description": "Schema for validation of Cellranger multi analysis yaml file", + "type" : "object", + "version": "0.0.1", + "properties": { + "sample_metadata": { + "type": "object", + "uniqueItems": true, + "minItems": 1, + "patternProperties": { + "^IGF[a-zA-Z0-9-_]+$": { + "type": ["object", "string"], + "properties": { + "feature_types": { + "type": "string", + "enum": [ + "Gene Expression", + "Antibody Capture", + "CRISPR Guide Capture", + "Multiplexing Capture", + "VDJ-B", + "VDJ-T", + "VDJ-T-GD", + "Antigen Capture" + ] + }, + "cellranger_group": { + "type": "string", + "pattern": "^[A-Z0-9-_]+$" + } + } + } + } + }, + "analysis_metadata": { + "type": "object", + "properties": { + "scanpy": { + "type": "object", + "uniqueItems": true, + "minItems": 1 + }, + "cellranger_multi_config": { + "type": "array", + "uniqueItems": true, + "minItems": 1, + "items": { + "type": "string" + } + } + }, + "required": ["cellranger_multi_config", "scanpy"] + } + }, + "required": ["sample_metadata", "analysis_metadata"] +} \ No newline at end of file diff --git a/app/raw_analysis/analysis_validation_geomx_dcc_v1.json b/app/raw_analysis/analysis_validation_geomx_dcc_v1.json new file mode 100644 index 0000000..32d4d56 --- /dev/null +++ b/app/raw_analysis/analysis_validation_geomx_dcc_v1.json @@ -0,0 +1,44 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "id": "https://github.com/imperial-genomics-facility/IGFPortal", + "title": "IGF GeoMx DCC analysis validation schema", + "description": "Schema for validation of GeoMx DCC analysis yaml file", + "type" : "object", + "version": "0.0.1", + "properties": { + "sample_metadata": { + "type": "object", + "uniqueItems": true, + "minItems": 1, + "patternProperties": { + "^IGF[a-zA-Z0-9-_]+$": { + "type": ["object", "string"], + "properties": { + "dsp_id": { + "type": "string", + "pattern": "^DSP-" + } + } + } + } + }, + "analysis_metadata": { + "type": "object", + "properties": { + "config_zip_file": { + "type": "string" + }, + "geomx_dcc_params": { + "type": "array", + "uniqueItems": true, + "minItems": 1, + "items": { + "type": "string" + } + } + }, + "required": ["config_zip_file", "geomx_dcc_params"] + } + }, + "required": ["sample_metadata", "analysis_metadata"] +} \ No newline at end of file diff --git a/app/raw_analysis/analysis_validation_nfcore_v1.json b/app/raw_analysis/analysis_validation_nfcore_v1.json new file mode 100644 index 0000000..f9bc541 --- /dev/null +++ b/app/raw_analysis/analysis_validation_nfcore_v1.json @@ -0,0 +1,65 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "id": "https://github.com/imperial-genomics-facility/IGFPortal", + "title": "IGF NF-core analysis validation schema", + "description": "Schema for validation of NF-core analysis yaml file", + "type" : "object", + "version": "0.0.1", + "properties": { + "sample_metadata": { + "type": "object", + "uniqueItems": true, + "minItems": 1, + "patternProperties": { + "^IGF[a-zA-Z0-9-_]+$": { + "type": ["object", "string"], + "properties": { + "condition": { + "type": "string", + "pattern": "^[A-Z0-9-_]+$" + }, + "strandedness": { + "type": "string", + "enum": ["reverse", "forward", "unstranded"] + } + } + } + } + }, + "analysis_metadata": { + "type": "object", + "properties": { + "NXF_VER": { + "type": "string" + }, + "nfcore_pipeline": { + "type": "string", + "enum": [ + "nf-core/smrnaseq", + "nf-core/rnaseq", + "nf-core/methylseq", + "nf-core/sarek", + "nf-core/ampliseq", + "nf-core/rnafusion", + "nf-core/rnavar", + "nf-core/atacseq", + "nf-core/chipseq", + "nf-core/cutandrun", + "nf-core/bactmap", + "nf-core/hic" + ] + }, + "nextflow_params": { + "type": "array", + "uniqueItems": true, + "minItems": 1, + "items": { + "type": "string" + } + } + }, + "required": ["NXF_VER", "nfcore_pipeline", "nextflow_params"] + } + }, + "required": ["sample_metadata", "analysis_metadata"] +} \ No newline at end of file diff --git a/app/raw_analysis/analysis_validation_snakemake_rnaseq_v1.json b/app/raw_analysis/analysis_validation_snakemake_rnaseq_v1.json new file mode 100644 index 0000000..e90e7c3 --- /dev/null +++ b/app/raw_analysis/analysis_validation_snakemake_rnaseq_v1.json @@ -0,0 +1,116 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "id": "https://github.com/imperial-genomics-facility/IGFPortal", + "title": "IGF Snakemake RNA-Seq analysis validation schema", + "description": "Schema for validation of Snakemake RNA-Seq analysis yaml file", + "type" : "object", + "version": "0.0.1", + "properties": { + "sample_metadata": { + "type": "object", + "uniqueItems": true, + "minItems": 1, + "patternProperties": { + "^IGF[a-zA-Z0-9-_]+$": { + "type": "object", + "properties": { + "condition": { + "type": "string", + "pattern": "^[A-Z0-9-_]+$" + }, + "strandedness": { + "type": "string", + "enum": ["reverse", "forward", "unstranded"] + } + } + } + } + }, + "analysis_metadata": { + "type": "object", + "properties": { + "ref": { + "type": "object", + "properties": { + "species": { + "type" : "string", + "pattern": "^[a-zA-Z0-9-_]+$" + }, + "release": { + "type" : "number" + }, + "build": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_]+$" + } + }, + "required": ["species", "release", "build"] + }, + "trimming": { + "type": "object", + "properties": { + "activate": { + "type": "boolean" + } + } + }, + "mergeReads": { + "type": "object", + "properties": { + "activate": { + "type": "boolean" + } + } + }, + "pca": { + "type": "object", + "properties": { + "activate": { + "type": "boolean" + }, + "labels": { + "type": "array", + "uniqueItems": true, + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[a-zA-Z0-9-_]+$" + } + } + } + }, + "diffexp": { + "type": "object", + "properties": { + "contrasts": { + "type": "object", + "uniqueItems": true, + "minItems": 1, + "patternProperties": { + "^[a-zA-Z0-9-_]+": { + "type": "array", + "minItems": 1, + "items": { + "type": "string", + "pattern": "^[A-Z0-9-_]+$" + } + } + } + }, + "model": { + "type": "string" + } + }, + "required": ["contrasts", "model"] + }, + "params": { + "type": "object", + "uniqueItems": true, + "minItems": 1 + } + }, + "required": ["ref", "pca"] + } + }, + "required": ["sample_metadata", "analysis_metadata"] +} \ No newline at end of file diff --git a/app/raw_analysis/analysis_validation.json b/app/raw_analysis/analysis_validation_v1.json similarity index 100% rename from app/raw_analysis/analysis_validation.json rename to app/raw_analysis/analysis_validation_v1.json diff --git a/app/raw_analysis/default_analysis_template.txt b/app/raw_analysis/default_analysis_template.txt new file mode 100644 index 0000000..1da16e0 --- /dev/null +++ b/app/raw_analysis/default_analysis_template.txt @@ -0,0 +1,4 @@ +sample_metadata: +{% for SAMPLE_ID in SAMPLE_ID_LIST %} {{ SAMPLE_ID }}: '' +{% endfor -%} +analysis_metadata: '' \ No newline at end of file diff --git a/app/raw_analysis/raw_analysis_util.py b/app/raw_analysis/raw_analysis_util.py index a766a13..f935d0a 100644 --- a/app/raw_analysis/raw_analysis_util.py +++ b/app/raw_analysis/raw_analysis_util.py @@ -1,36 +1,463 @@ import os import json +import logging +from yaml import load +from yaml import Loader from ..models import RawAnalysis +from ..models import Sample +from ..models import Experiment +from ..models import Run +from ..models import File +from ..models import Collection +from ..models import Collection_group +from ..models import Project +from ..models import Pipeline +from ..models import RawAnalysisValidationSchema +from ..models import RawAnalysisTemplate from .. import db -from typing import Union -from jsonschema import Draft4Validator, ValidationError - -def validate_analysis_json( - analysis_json_data: list, - schema_json_file: str=os.path.join(os.path.dirname(__file__), 'analysis_validation.json')) \ - -> list: - try: - with open(schema_json_file, 'r') as fp: - schema_json_data = json.load(fp) - analysis_validator = \ - Draft4Validator(schema_json_data) - validation_errors = \ - sorted( - analysis_validator.\ - iter_errors(analysis_json_data), - key=lambda e: e.path) +from jsonschema import Draft202012Validator +from jinja2 import Template + +log = logging.getLogger(__name__) + +def prepare_temple_for_analysis(template_tag: str) -> str: + try: + pass + # fetch template from RawAnalysisTemplate table + # or just return sample_metadata: IGF ids as yaml + except Exception as e: + raise ValueError( + f"Failed to get template, error: {e}") + +def project_query(): + try: + results = \ + db.session.\ + query(Project).\ + filter(Project.status=='ACTIVE').\ + order_by(Project.project_id.desc()).\ + limit(100).\ + all() + return results + except Exception as e: + raise ValueError( + f"Failed to get project list, error: {e}") + + +def pipeline_query(): + try: + results = \ + db.session.\ + query(Pipeline).\ + filter(Pipeline.is_active=='Y').\ + filter(Pipeline.pipeline_type=='AIRFLOW').\ + filter(Pipeline.pipeline_name.like("dag%")).\ + order_by(Pipeline.pipeline_id.desc()).\ + limit(100).\ + all() + return results + except Exception as e: + raise ValueError( + f"Failed to get pipeline list, error: {e}") + + +def validate_json_schema( + raw_analysis_schema_id: int) -> None: + try: + status = 'FAILED' + raw_analysis_schema = \ + db.session.\ + query(RawAnalysisValidationSchema).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==raw_analysis_schema_id).\ + one_or_none() + if raw_analysis_schema is None: + raise ValueError( + f"No metadata entry found for id {raw_analysis_schema_id}") + json_schema = \ + raw_analysis_schema.json_schema + if json_schema is not None: + try: + _ = json.loads(json_schema) + status = 'VALIDATED' + except Exception as e: + log.error(f"Failed to run json validation, error: {e}") + status = 'FAILED' + ## update db status + try: + db.session.\ + query(RawAnalysisValidationSchema).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==raw_analysis_schema_id).\ + update({'status': status}) + db.session.commit() + except: + db.session.rollback() + raise + return status + except Exception as e: + raise ValueError( + f"Failed to validate json schema, error: {e}") + + +def _get_validation_status_for_analysis_design( + analysis_yaml: str, + validation_schema: str) -> list: + try: error_list = list() - for err in validation_errors: - if isinstance(err, str): - error_list.append(err) - else: - if len(err.schema_path) > 2: + # load yaml + try: + json_data = \ + load(analysis_yaml, Loader=Loader) + except: + error_list.append( + 'Failed to load yaml data. Invalid format.') + return error_list + try: + schema = \ + json.loads(validation_schema) + except: + error_list.append( + 'Failed to load validation schema. Invalid format.') + return error_list + try: + # validation can fail if inputs are not correct + schema_validator = \ + Draft202012Validator(schema) + for error in sorted(schema_validator.iter_errors(json_data), key=str): + error_list.append(error.message) + except: + error_list.append( + 'Failed to check validation schema') + return error_list + except Exception as e: + raise ValueError( + f"Failed to get schema validation for analysis design, error: {e}") + + +def _get_project_id_for_samples( + sample_igf_id_list: list) -> list: + try: + project_list = list() + results = \ + db.session.\ + query(Project).\ + distinct(Project.project_igf_id).\ + join(Sample, Project.project_id==Sample.project_id).\ + filter(Sample.sample_igf_id.in_(sample_igf_id_list)).\ + all() + project_list = [ + p.project_igf_id for p in list(results)] + return project_list + except Exception as e: + raise ValueError( + f"Failed to get project id for sample list, error; {e}") + + +def _get_file_collection_for_samples( + sample_igf_id_list: list, + active_status: str = 'ACTIVE', + fastq_collection_type_list: list = ('demultiplexed_fastq',)) -> list: + """ + A function for fetching fastq and run_igf_id for a list od samples + + :param sample_igf_id_list: A list of sample_igf_ids for DB lookup + :param active_status: Filter tag for active experiment, run and file status, default: active + :param fastq_collection_type_list: Fastq collection type list, default ('demultiplexed_fastq',) + :returns: A list of sample_igf_ids which are linked to valid file paths + """ + try: + sample_with_files = list() + results = \ + db.session.\ + query(Sample.sample_igf_id).\ + distinct(Sample.sample_igf_id).\ + join(Experiment, Sample.sample_id==Experiment.sample_id).\ + join(Run, Experiment.experiment_id==Run.experiment_id).\ + join(Collection, Collection.name==Run.run_igf_id).\ + join(Collection_group, Collection.collection_id==Collection_group.collection_id).\ + join(File, File.file_id==Collection_group.file_id).\ + filter(Run.status==active_status).\ + filter(Experiment.status==active_status).\ + filter(File.status==active_status).\ + filter(Collection.type.in_(fastq_collection_type_list)).\ + filter(Sample.sample_igf_id.in_(sample_igf_id_list)).\ + all() + if results is not None: + sample_with_files = [ + s.sample_igf_id + for s in list(results)] + return sample_with_files + except Exception as e: + raise ValueError( + f'Failed to fetch fastq dir for sample id {sample_igf_id_list}, error: {e}') + + +def _get_sample_metadata_checks_for_analysis( + sample_metadata: dict, + project_igf_id: str) -> list: + try: + error_list = list() + if not isinstance(sample_metadata, dict): + error_list.append( + f'sample_metadata has type {type(sample_metadata)}') + return error_list + else: + sample_ids = \ + list(sample_metadata.keys()) + if len(sample_ids) == 0: + error_list.append( + 'No sample ids found in sample_metadata') + return error_list + if len(sample_ids) > 0: + sample_with_files = \ + _get_file_collection_for_samples( + sample_igf_id_list=sample_ids) + if len(sample_ids) != len(sample_with_files): + if len(sample_with_files) == 0: + error_list.append('No sample has fastq') + else: + missing_samples = \ + list(set(sample_ids).difference(set(sample_with_files))) + error_list.append( + f"Missing fastq for samples: {', '.join(missing_samples)}") + project_list = \ + _get_project_id_for_samples(sample_igf_id_list=sample_ids) + if len(project_list) == 0 : + error_list.append('No project info found') + if len(project_list) > 1: + error_list.append( + f"samples are linked to multiple projects: {', '.join(project_list)}") + if len(project_list) == 1 and \ + project_list[0] != project_igf_id: error_list.append( - f"{err.schema_path[2]}: {err.message}") - else: + f'Analysis is linked to project {project_igf_id} but samples are linked to project {project_list[0]}') + return error_list + except Exception as e: + raise ValueError( + f"Failed to check sample metadata, error: {e}") + +def _get_validation_errors_for_analysis_design(raw_analysis_id: int) ->list: + try: + error_list = list() + # get raw analysis design + raw_analysis_design = \ + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + one_or_none() + if raw_analysis_design is None: + error_list.append( + f"No metadata entry found for id {raw_analysis_id}") + # no missing db record found + if len(error_list) == 0: + # get design yaml + analysis_yaml = \ + raw_analysis_design.analysis_yaml + if analysis_yaml is None: + error_list.append( + "No analysis design found") + pipeline_id = \ + raw_analysis_design.pipeline_id + if pipeline_id is None: + error_list.append( + "No pipeline info found") + if raw_analysis_design.project is None: + error_list.append( + "No project id found") + else: + project_igf_id = \ + raw_analysis_design.project.project_igf_id + # get validation schema + raw_analysis_schema = \ + db.session.\ + query(RawAnalysisValidationSchema).\ + filter(RawAnalysisValidationSchema.pipeline_id==pipeline_id).\ + one_or_none() + if raw_analysis_schema is None: + error_list.append( + "No analysis schema found") + ## no missing data, lets validate design against a schema + if len(error_list) == 0: + validation_schema = \ + raw_analysis_schema.json_schema + try: + # check design against schema + schema_validation_errors = \ + _get_validation_status_for_analysis_design( + analysis_yaml=analysis_yaml, + validation_schema=validation_schema) + if len(schema_validation_errors) > 0: + error_list.extend( + schema_validation_errors) + except Exception as e: + log.error(e) error_list.append( - err.message) + "Failed to inspect analysis design") + ## valid schema, lets check sample ids + if len(error_list) == 0: + json_data = \ + load(analysis_yaml, Loader=Loader) + sample_metadata = \ + json_data.get('sample_metadata') + if sample_metadata is None: + error_list.append( + 'sample_metadata missing after validation checks ??') + ## no corrupted db record found + if len(error_list) == 0: + sample_metadata_errors = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata=sample_metadata, + project_igf_id=project_igf_id) + if len(sample_metadata_errors) > 0: + error_list.extend( + sample_metadata_errors) return error_list except Exception as e: - print(e) - return False \ No newline at end of file + raise ValueError( + f"Failed to check analysis metadata, error: {e}") + + +def validate_analysis_design( + raw_analysis_id: int) -> str: + try: + status = 'FAILED' + error_list = list() + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=raw_analysis_id) + # validation_schema = None + # raw_analysis_design = \ + # db.session.\ + # query(RawAnalysis).\ + # filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + # one_or_none() + # if raw_analysis_design is None: + # raise ValueError( + # f"No metadata entry found for id {raw_analysis_id}") + # analysis_yaml = \ + # raw_analysis_design.analysis_yaml + # analysis_yaml = \ + # analysis_yaml + # pipeline_id = \ + # raw_analysis_design.pipeline_id + # if pipeline_id is None: + # error_list.append("No pipeline info found") + # project_igf_id = \ + # raw_analysis_design.project.project_igf_id + # if project_igf_id is None: + # error_list.append("No project id found") + # else: + # raw_analysis_schema = \ + # db.session.\ + # query(RawAnalysisValidationSchema).\ + # filter(RawAnalysisValidationSchema.pipeline_id==pipeline_id).\ + # one_or_none() + # if raw_analysis_schema is None: + # error_list.append("No analysis schema found") + # else: + # validation_schema = \ + # raw_analysis_schema.json_schema + # if validation_schema is not None: + # try: + # # check against schema + # schema_validation_errors = \ + # _get_validation_status_for_analysis_design( + # analysis_yaml=analysis_yaml, + # validation_schema=validation_schema) + # if len(schema_validation_errors) > 0: + # error_list.extend(schema_validation_errors) + # except Exception as e: + # error_list.append("Failed to inspect design") + # if len(error_list) == 0: + # # its time to check igf ids + # # assuming it has sample_metadata as its passed validation checks + # json_data = \ + # load(analysis_yaml, Loader=Loader) + # sample_metadata = \ + # json_data.get('sample_metadata') + # if sample_metadata is None: + # error_list.append( + # 'sample_metadata missing after validation checks ??') + # else: + # sample_metadata_errors = \ + # _get_sample_metadata_checks_for_analysis( + # sample_metadata=sample_metadata, + # project_igf_id=project_igf_id) + # if len(sample_metadata_errors) > 0: + # error_list.extend(sample_metadata_errors) + if len(error_list) == 0: + status = 'VALIDATED' + errors = '' + else: + status = 'FAILED' + formatted_errors = list() + for i, e in enumerate(error_list): + formatted_errors.append(f"{i+1}. {e}") + errors = '\n'.join(formatted_errors) + try: + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + update({'status': status, 'report': errors}) + db.session.commit() + except: + db.session.rollback() + raise + return status + except Exception as e: + raise ValueError( + f"Failed to validate analysis design, error; {e}") + +def _fetch_all_samples_for_project(project_igf_id: str) -> list: + try: + sample_ids = list() + samples = \ + db.session.\ + query(Sample.sample_igf_id).\ + join(Project, Project.project_id==Sample.project_id).\ + filter(Project.project_igf_id==project_igf_id).\ + all() + sample_ids = [ + sample_id for (sample_id,) in samples] + return sample_ids + except Exception as e: + raise ValueError( + f"Failed to get sample list for project {project_igf_id}, error; {e}") + +def generate_analysis_template(project_igf_id: str, template_tag: str) -> str: + try: + sample_id_list = \ + _fetch_all_samples_for_project( + project_igf_id=project_igf_id) + template_data = \ + _get_analysis_template( + template_tag=template_tag) + template = \ + Template(template_data, keep_trailing_newline=True) + formatted_template = \ + template.render(SAMPLE_ID_LIST=sample_id_list) + return formatted_template + except Exception as e: + raise ValueError( + f"Failed to generate template project {project_igf_id}, error; {e}") + + +def _get_analysis_template( + template_tag: str, + default_template_path: str = os.path.join(os.path.dirname(__file__), 'default_analysis_template.txt')) \ + -> str: + try: + with open(default_template_path, 'r') as fp: + default_template = fp.read() + template_data = \ + db.session.\ + query(RawAnalysisTemplate.template_data).\ + filter(RawAnalysisTemplate.template_tag==template_tag).\ + one_or_none() + if template_data is None: + return default_template + else: + (template_data,) = template_data + return template_data + except Exception as e: + raise ValueError( + f"Failed to get template for tag {template_tag}, error; {e}") \ No newline at end of file diff --git a/app/raw_analysis_api.py b/app/raw_analysis_api.py new file mode 100644 index 0000000..1cf7087 --- /dev/null +++ b/app/raw_analysis_api.py @@ -0,0 +1,136 @@ +import json, logging, gzip +from yaml import load, Loader +from flask_appbuilder import ModelRestApi +from flask import request, send_file +from flask_appbuilder.api import expose, rison +from flask_appbuilder.models.sqla.interface import SQLAInterface +from flask_appbuilder.security.decorators import protect +from . import db +from io import BytesIO +from .models import RawAnalysis + +log = logging.getLogger(__name__) + +class RawAnalysisApi(ModelRestApi): + resource_name = "raw_analysis" + datamodel = SQLAInterface(RawAnalysis) + + @expose('/search_new_analysis', methods=['GET']) + @protect() + def search_new_analysis(self): + try: + new_analysis_list = \ + db.session.\ + query(RawAnalysis.raw_analysis_id).\ + filter(RawAnalysis.status=='VALIDATED').\ + all() + new_analysis_ids = [ + row for (row,) in new_analysis_list] + return self.response(200, new_analysis=new_analysis_ids) + except Exception as e: + log.error(e) + + + @expose('/get_raw_analysis_data/', methods=['POST']) + @protect() + def get_raw_analysis_data(self, raw_analysis_id): + try: + result = \ + db.session.\ + query( + RawAnalysis.project_id, + RawAnalysis.pipeline_id, + RawAnalysis.analysis_name, + RawAnalysis.analysis_yaml).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + filter(RawAnalysis.status=='VALIDATED').\ + one_or_none() + if result is None: + json_data = { + 'project_id': '', + 'pipeline_id': '', + 'analysis_name': '', + 'analysis_yaml': ''} + else: + (project_id, pipeline_id, analysis_name, analysis_yaml) = \ + result + # convert yaml to json + analysis_yaml_json = \ + load(analysis_yaml, Loader=Loader) + json_data = { + 'project_id': project_id, + 'pipeline_id': pipeline_id, + 'analysis_name': analysis_name, + 'analysis_yaml': analysis_yaml_json} + # dump to json text + json_data_dump = \ + json.dumps(json_data) + output = BytesIO(json_data_dump.encode()) + output.seek(0) + attachment_filename = \ + f"raw_analysis_{raw_analysis_id}.json" + return send_file(output, download_name=attachment_filename, as_attachment=True) + except Exception as e: + log.error(e) + + + @expose('/mark_analysis_synched/', methods=['POST']) + @protect() + def mark_analysis_synched(self, raw_analysis_id): + try: + result = \ + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + filter(RawAnalysis.status=='VALIDATED').\ + one_or_none() + if result is None: + # can't find any raw analysis + return self.response(200, status='failed') + try: + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + filter(RawAnalysis.status=='VALIDATED').\ + update({ + 'raw_analysis_id': raw_analysis_id, + 'status': 'SYNCHED'}) + db.session.commit() + except Exception as e: + db.session.rollback() + log.error(e) + return self.response(200, status='failed') + return self.response(200, status='success') + except Exception as e: + log.error(e) + + + @expose('/mark_analysis_rejected/', methods=['POST']) + @protect() + def mark_analysis_rejected(self, raw_analysis_id): + try: + result = \ + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + filter(RawAnalysis.status=='VALIDATED').\ + one_or_none() + if result is None: + # can't find any raw analysis + return self.response(200, status='failed') + try: + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==raw_analysis_id).\ + filter(RawAnalysis.status=='VALIDATED').\ + update({ + 'raw_analysis_id': raw_analysis_id, + 'status': 'REJECTED'}) + db.session.commit() + except Exception as e: + db.session.rollback() + log.error(e) + return self.response(200, status='success') + return self.response(200, status='failed') + except Exception as e: + log.error(e) \ No newline at end of file diff --git a/app/raw_analysis_view.py b/app/raw_analysis_view.py index 2ff778f..be47b65 100644 --- a/app/raw_analysis_view.py +++ b/app/raw_analysis_view.py @@ -1,53 +1,631 @@ -import logging, tempfile, os +import logging from io import BytesIO -from .models import RawAnalysis, RawMetadataModel +from . import db +from .models import RawAnalysis +from .models import RawAnalysisValidationSchema +from .models import RawAnalysisTemplate from flask_appbuilder import ModelView from flask_appbuilder.models.sqla.filters import FilterInFunction -from flask import redirect, flash, send_file +from flask import redirect, flash, url_for, send_file from flask_appbuilder.actions import action from flask_appbuilder.models.sqla.interface import SQLAInterface -from . import db +from wtforms_sqlalchemy.fields import QuerySelectField +from flask_appbuilder.fieldwidgets import Select2Widget from . import celery -from .raw_analysis.raw_analysis_util import validate_analysis_json +from .raw_analysis.raw_analysis_util import pipeline_query +from .raw_analysis.raw_analysis_util import project_query +from .raw_analysis.raw_analysis_util import validate_json_schema +from .raw_analysis.raw_analysis_util import validate_analysis_design +from .raw_analysis.raw_analysis_util import prepare_temple_for_analysis +from .raw_analysis.raw_analysis_util import generate_analysis_template + +log = logging.getLogger(__name__) + +class RawAnalysisTemplateView(ModelView): + datamodel = SQLAInterface(RawAnalysisTemplate) + label_columns = { + "template_tag": "Name", + "template_data": "Template" + } + base_permissions = [ + "can_list", + "can_show", + "can_add", + "can_edit", + "can_delete"] + base_order = ("template_id", "desc") + + + + +@celery.task(bind=True) +def async_validate_analysis_schema(self, id_list): + try: + results = list() + for raw_analysis_schema_id in id_list: + msg = \ + validate_json_schema( + raw_analysis_schema_id=raw_analysis_schema_id) + results.append(msg) + return dict(zip(id_list, results)) + except Exception as e: + log.error( + f"Failed to run celery job, error: {e}") + + +class RawAnalysisSchemaView(ModelView): + datamodel = SQLAInterface(RawAnalysisValidationSchema) + label_columns = { + "pipeline.pipeline_name": "Pipeline name", + "status": "Status", + "json_schema": "Schema" + } + list_columns = [ + "pipeline.pipeline_name", + "status", + "date_stamp"] + show_columns = [ + "pipeline.pipeline_name", + "status", + "date_stamp", + "json_schema"] + add_columns = [ + "pipeline", + "json_schema"] + base_permissions = [ + "can_list", + "can_show", + "can_add", + "can_delete"] + base_order = ("raw_analysis_schema_id", "desc") + + add_form_extra_fields = { + "pipeline": QuerySelectField( + "Pipeline", + query_factory=pipeline_query, + widget=Select2Widget() + ), + } + edit_form_extra_fields = { + "pipeline": QuerySelectField( + "Pipeline", + query_factory=pipeline_query, + widget=Select2Widget() + ) + } + + @action("validate_json_analysis_schema", "Validate JSON", confirmation="Run validate?", multiple=True, single=False, icon="fa-rocket") + def validate_json_analysis_schema(self, item): + try: + id_list = list() + pipeline_list = list() + if isinstance(item, list): + id_list = [i.raw_analysis_schema_id for i in item] + pipeline_list = [i.pipeline.pipeline_name for i in item] + else: + id_list = [item.raw_analysis_schema_id] + pipeline_list = [item.pipeline.pipeline_name] + _ = \ + async_validate_analysis_schema.\ + apply_async(args=[id_list]) + flash("Submitted jobs for {0}".format(', '.join(pipeline_list)), "info") + self.update_redirect() + return redirect(url_for('RawAnalysisSchemaView.list')) + except: + flash('Failed to validate analysis schema', 'danger') + return redirect(url_for('RawAnalysisSchemaView.list')) + + @action("download_json_analysis_schema", "Download JSON schema", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def download_json_analysis_schema(self, item): + try: + json_schema = item.json_schema + if json_schema is None: + json_schema = '{}' + output = BytesIO(json_schema.encode('utf-8')) + pipeline_name = item.pipeline.pipeline_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f'{pipeline_name}_schema.json', as_attachment=True) + except: + flash('Failed to download analysis schema', 'danger') + return redirect(url_for('RawAnalysisSchemaView.list')) + @celery.task(bind=True) def async_validate_analysis_yaml(self, id_list): try: - pass + results = list() + for raw_analysis_id in id_list: + msg = \ + validate_analysis_design( + raw_analysis_id=raw_analysis_id) + results.append(msg) + return dict(zip(id_list, results)) except Exception as e: - logging.error( - "Failed to run celery job, error: {0}".\ - format(e)) + log.error( + f"Failed to run celery job, error: {e}") + +def rename(newname): + def decorator(f): + f.__name__ = newname + return f + return decorator class RawAnalysisView(ModelView): datamodel = SQLAInterface(RawAnalysis) - list_columns = ["analysis_tag", "status", "date_stamp"] - show_columns = ["analysis_tag", "analysis_yaml", "status", "report", "date_stamp"] - add_columns = ["analysis_tag", "analysis_yaml"] - edit_columns = ["analysis_tag", "analysis_yaml"] + label_columns = { + "analysis_name": "Analysis name", + "project.project_igf_id": "Project name", + "pipeline.pipeline_name": "Pipeline name", + "status": "Status", + "date_stamp": "Updated on", + "analysis_yaml": "Yaml", + "report": "Report"} + list_columns = [ + "analysis_name", + "project.project_igf_id", + "pipeline.pipeline_name", + "status", + "date_stamp"] + show_columns = [ + "analysis_name", + "project.project_igf_id", + "pipeline.pipeline_name", + "status", + "date_stamp", + "analysis_yaml", + "report"] + add_columns = [ + "analysis_name", + "project", + "pipeline", + "analysis_yaml"] + edit_columns = [ + "analysis_name", + "project", + "pipeline", + "analysis_yaml"] base_filters = [ ["status", FilterInFunction, lambda: ["UNKNOWN", "FAILED"]]] base_order = ("raw_analysis_id", "desc") + base_permissions = [ + "can_list", + "can_show", + "can_add", + "can_edit"] - @action("validate_and_submit_analysis", "Validate and upload analysis", confirmation="Validate analysis design?", icon="fa-rocket") - def validate_and_submit_analysis(self, item): - analysis_list = list() - id_list = list() - if isinstance(item, list): - analysis_list = [i.analysis_tag for i in item] - id_list = [i.raw_analysis_id for i in item] - else: - analysis_list = [item.analysis_tag] - id_list = [item.raw_analysis_id] - flash("Submitted jobs for {0}".format(', '.join(analysis_list)), "info") - self.update_redirect() - return redirect(self.get_redirect()) + add_form_extra_fields = { + "project": QuerySelectField( + "Project", + query_factory=project_query, + widget=Select2Widget() + ), + "pipeline": QuerySelectField( + "Pipeline", + query_factory=pipeline_query, + widget=Select2Widget() + ), + } + edit_form_extra_fields = { + "project": QuerySelectField( + "Project", + query_factory=project_query, + widget=Select2Widget() + ), + "pipeline": QuerySelectField( + "Pipeline", + query_factory=pipeline_query, + widget=Select2Widget() + ) + } + @action("reject_raw_analysis", "Reject analysis", confirmation="Reject analysis design?", multiple=False, single=True, icon="fa-exclamation") + def reject_raw_analysis(self, item): + try: + if isinstance(item, list): + try: + for i in item: + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==i.raw_analysis_id).\ + update({'status': 'REJECTED'}) + db.session.commit() + except: + db.session.rollback() + raise + else: + try: + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.raw_analysis_id==item.raw_analysis_id).\ + update({'status': 'REJECTED'}) + db.session.commit() + except: + db.session.rollback() + raise + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + log.error(e) + flash('Failed to reject analysis design', 'danger') + return redirect(url_for('RawAnalysisView.list')) + + @action("validate_and_submit_analysis", "Validate and upload analysis", confirmation="Validate analysis design?", multiple=True, single=False, icon="fa-rocket") + def validate_and_submit_analysis(self, item): + try: + analysis_list = list() + id_list = list() + if isinstance(item, list): + analysis_list = [i.analysis_name for i in item] + id_list = [i.raw_analysis_id for i in item] + else: + analysis_list = [item.analysis_name] + id_list = [item.raw_analysis_id] + _ = \ + async_validate_analysis_yaml.\ + apply_async(args=[id_list]) + flash("Submitted jobs for {0}".format(', '.join(analysis_list)), "info") + self.update_redirect() + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + log.error(e) + flash('Failed to validate analysis design', 'danger') + return redirect(url_for('RawAnalysisView.list')) @action("download_raw_analysis_damp", "Download analysis yaml", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) def download_raw_analysis_damp(self, item): - output = BytesIO(item.analysis_yaml.encode('utf-8')) - analysis_tag = item.analysis_tag.encode('utf-8').decode() - output.seek(0) - self.update_redirect() - return send_file(output, attachment_filename='{0}_analysis.yaml'.format(analysis_tag), as_attachment=True) \ No newline at end of file + try: + analysis_yaml = item.analysis_yaml + if analysis_yaml is None: + analysis_yaml = '' + output = BytesIO(analysis_yaml.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_analysis.yaml", as_attachment=True) + except Exception as e: + flash('Failed to download raw analysis', 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_rna", "Template NF_RNA", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_rna(self, item): + try: + template_tag = "NF_RNA" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_sm_rna", "Template Snakemake_RNA", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_sm_rna(self, item): + try: + template_tag = "Snakemake_RNA" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_smrna", "Template NF_smRNA", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_smrna(self, item): + try: + template_tag = "NF_smRNA" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_chip", "Template NF_ChIP", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_chip(self, item): + try: + template_tag = "NF_ChIP" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_atac", "Template NF_ATAC", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_atac(self, item): + try: + template_tag ="NF_ATAC" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_hic", "Template NF_HI_C", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_hic(self, item): + try: + template_tag = "NF_HI_C" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + + @action("template_nf_methylseq", "Template NF_Methylseq", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_methylseq(self, item): + try: + template_tag = "NF_Methylseq" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + + @action("template_nf_sarek", "Template NF_Sarek", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_sarek(self, item): + try: + template_tag = "NF_Sarek" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_ampliseq", "Template NF_Ampliseq", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_ampliseq(self, item): + try: + template_tag = "NF_Ampliseq" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_cutandrun", "Template NF_CutAndRun", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_cutandrun(self, item): + try: + template_tag = "NF_CutAndRun" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_nf_bactmap", "Template NF_BactMap", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_nf_bactmap(self, item): + try: + template_tag = "NF_BactMap" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_custom", "Template Custom", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_custom(self, item): + try: + template_tag = "Custom" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_geomx_dcc", "Template GeoMx dcc", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_geomx_dcc(self, item): + try: + template_tag = "GEOMX_DCC" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + @action("template_cellranger_multi", "Template cellranger multi", confirmation=None, icon="fa-file-excel-o", multiple=False, single=True) + def template_cellranger_multi(self, item): + try: + template_tag = "CELLRANGER_MULTI" + if item.project_id is not None: + formatted_template = \ + generate_analysis_template( + project_igf_id=item.project.project_igf_id, + template_tag=template_tag) + output = BytesIO(formatted_template.encode('utf-8')) + analysis_name = item.analysis_name.encode('utf-8').decode() + output.seek(0) + self.update_redirect() + return send_file(output, download_name=f"{analysis_name}_{template_tag}_analysis.yaml", as_attachment=True) + else: + flash(f"Failed to generate {template_tag} template, no project", 'danger') + return redirect(url_for('RawAnalysisView.list')) + except Exception as e: + flash(f"Failed to generate {template_tag} template", 'danger') + log.error(e) + return redirect(url_for('RawAnalysisView.list')) + + +class RawAnalysisQueueView(ModelView): + datamodel = SQLAInterface(RawAnalysis) + label_columns = { + "analysis_name": "Analysis name", + "project.project_igf_id": "Project name", + "pipeline.pipeline_name": "Pipeline name", + "status": "Status", + "date_stamp": "Updated on", + "analysis_yaml": "Yaml", + "report": "Report" + } + list_columns = [ + "analysis_name", + "project.project_igf_id", + "pipeline.pipeline_name", + "status", + "date_stamp"] + show_columns = [ + "analysis_name", + "project.project_igf_id", + "pipeline.pipeline_name", + "status", + "date_stamp", + "analysis_yaml", + "report"] + base_filters = [ + ["status", FilterInFunction, lambda: ["VALIDATED",]]] + base_order = ("raw_analysis_id", "desc") + base_permissions = [ + "can_list", + "can_show"] \ No newline at end of file diff --git a/app/raw_metadata_api.py b/app/raw_metadata_api.py index 61d3e47..533cb09 100644 --- a/app/raw_metadata_api.py +++ b/app/raw_metadata_api.py @@ -10,6 +10,7 @@ from .raw_metadata.raw_metadata_util import search_metadata_table_and_get_new_projects from .raw_metadata.raw_metadata_util import parse_and_add_new_raw_metadata +log = logging.getLogger(__name__) class RawMetadataDataApi(ModelRestApi): resource_name = "raw_metadata" @@ -26,8 +27,14 @@ def search_metadata(self): file_obj.seek(0) json_data = file_obj.read() new_projects = \ - search_metadata_table_and_get_new_projects(data=json_data) - return self.response(200, new_projects=','.join(new_projects)) + search_metadata_table_and_get_new_projects( + data=json_data) + if len(new_projects) > 0: + new_projects = \ + ','.join(new_projects) + else: + new_projects = "" + return self.response(200, new_projects=new_projects) except Exception as e: logging.error(e) @@ -71,7 +78,7 @@ def download_ready_metadata(self): data = json.dumps(data) output = BytesIO(data.encode()) output.seek(0) - return send_file(output, attachment_filename='metadata.csv', as_attachment=True) + return send_file(output, download_name='metadata.json', as_attachment=True) except Exception as e: logging.error(e) diff --git a/app/raw_metadata_view.py b/app/raw_metadata_view.py index 73c33ba..e8236d9 100644 --- a/app/raw_metadata_view.py +++ b/app/raw_metadata_view.py @@ -73,7 +73,7 @@ def download_validated_metadata_csv(self, item): df.to_csv(output, index=False) output.seek(0) self.update_redirect() - return send_file(output, attachment_filename='{0}_formatted.csv'.format(tag), as_attachment=True) + return send_file(output, download_name=f"{tag}_formatted.csv", as_attachment=True) @action("upload_raw_metadata", "Mark for upload", confirmation="Change metadata status?", icon="fa-rocket") def upload_raw_metadata_csv(self, item): @@ -145,7 +145,7 @@ def download_raw_metadata_csv(self, item): df.to_csv(output, index=False) output.seek(0) self.update_redirect() - return send_file(output, attachment_filename='{0}_formatted.csv'.format(tag), as_attachment=True) + return send_file(output, download_name=f"{tag}_formatted.csv", as_attachment=True) @action("mark_raw_metadata_as_rejected", "Reject raw metadata", confirmation="Mark metadata as rejected ?", icon="fa-exclamation", multiple=False, single=True) def mark_raw_metadata_as_rejected(self, item): diff --git a/app/raw_seqrun/raw_seqrun_util.py b/app/raw_seqrun/raw_seqrun_util.py index 85c0f52..8cfc403 100644 --- a/app/raw_seqrun/raw_seqrun_util.py +++ b/app/raw_seqrun/raw_seqrun_util.py @@ -1,14 +1,15 @@ import typing import logging -from typing import Tuple, Any +from typing import Tuple, Any, Optional from .. import db from ..models import RawSeqrun, SampleSheetModel def check_and_add_new_raw_seqrun( - seqrun_id_list: list) \ + seqrun_id_list: list, + run_config_list: Optional[list] = None) \ -> bool: try: - for seqrun_id in seqrun_id_list: + for seqrun_list_index, seqrun_id in enumerate(seqrun_id_list): seqrun_id = \ seqrun_id.\ strip().\ @@ -19,7 +20,17 @@ def check_and_add_new_raw_seqrun( filter(RawSeqrun.raw_seqrun_igf_id==seqrun_id).\ one_or_none() if result is None: - db.session.add(RawSeqrun(raw_seqrun_igf_id=seqrun_id)) + ## try to get run config from the json data or set it to '' + run_config = None + if run_config_list is not None and \ + len(run_config_list)+1 >= seqrun_list_index: + run_config = run_config_list[seqrun_list_index] + if run_config is None: + run_config = '' + db.session.add( + RawSeqrun( + raw_seqrun_igf_id=seqrun_id, + run_config=run_config)) db.session.flush() db.session.commit() except Exception as e: diff --git a/app/raw_seqrun_api.py b/app/raw_seqrun_api.py index 5188ae5..dba2dc9 100644 --- a/app/raw_seqrun_api.py +++ b/app/raw_seqrun_api.py @@ -31,12 +31,14 @@ def add_new_seqrun(self): json_data = json_data.decode('utf-8') json_data = json.loads(json_data) seqrun_id_list = json_data.get("seqrun_id_list") + run_config_list = json_data.get("run_config_list") if seqrun_id_list is None: return self.response_400('No seqrun_id_list') if isinstance(seqrun_id_list, list) and \ len(seqrun_id_list) > 0: check_and_add_new_raw_seqrun( - seqrun_id_list) + seqrun_id_list, + run_config_list) return self.response(200, message='OK') else: return self.response_400('Empty seqrun_id_list') @@ -76,7 +78,7 @@ def search_run_samplesheet(self): output = BytesIO(csv_data.encode()) output.seek(0) attachment_filename = f"{tag}.csv" - return send_file(output, attachment_filename=attachment_filename, as_attachment=True) + return send_file(output, download_name=attachment_filename, as_attachment=True) except Exception as e: logging.error(e) diff --git a/app/raw_seqrun_view.py b/app/raw_seqrun_view.py index 6ffc964..35e7831 100644 --- a/app/raw_seqrun_view.py +++ b/app/raw_seqrun_view.py @@ -1,21 +1,30 @@ import os import time import logging -from wtforms.ext.sqlalchemy.fields import QuerySelectField +from datetime import datetime +from wtforms_sqlalchemy.fields import QuerySelectField from flask_appbuilder.fieldwidgets import Select2Widget from flask_appbuilder import ModelView from .models import RawSeqrun, SampleSheetModel -from flask import redirect, flash +from flask import redirect, flash, url_for from flask_appbuilder.actions import action from flask_appbuilder.models.sqla.interface import SQLAInterface from . import db from . import celery from .airflow.airflow_api_utils import trigger_airflow_pipeline +from .airflow.airflow_api_utils import get_airflow_dag_id +from flask_appbuilder.security.decorators import has_access +from flask_appbuilder.baseviews import expose log = logging.getLogger(__name__) +## TO DO: load DAG names from config file +TEST_BARCODE_DAG_TAG = 'de_multiplexing_test_barcode_dag' +PRODUCTION_PIPELINE_DAG_TAG = 'de_multiplexing_production_dag' +CLEAN_UP_DAG_TAG = 'de_multiplexing_cleanup_dag' + @celery.task(bind=True) -def async_trigger_airflow_pipeline(self, dag_id, run_list): +def async_trigger_airflow_pipeline(self, dag_id, run_list, update_trigger_date=False): try: results = list() run_id_list = list() @@ -27,21 +36,46 @@ def async_trigger_airflow_pipeline(self, dag_id, run_list): dag_id=dag_id, conf_data=entry, airflow_conf_file=os.environ['AIRFLOW_CONF_FILE']) + if res is not None and \ + update_trigger_date and \ + res.status_code == 200: + update_trigger_date_for_seqrun( + seqrun_id=entry.get('seqrun_id')) time.sleep(10) results.append(res.status_code) return dict(zip(run_id_list, results)) except Exception as e: - log.error(f"Failed to run celery job, error: {e}") + raise ValueError(f"Failed to run celery job, error: {e}") + + +def update_trigger_date_for_seqrun(seqrun_id: int) -> None: + try: + trigger_time = datetime.now() + try: + db.session.\ + query(RawSeqrun).\ + filter(RawSeqrun.raw_seqrun_igf_id==seqrun_id).\ + update({"trigger_time": trigger_time}) + db.session.commit() + except: + db.session.rollback() + raise + except Exception as e: + raise ValueError(f"Failed to ad trigger date, error: {e}") def samplesheet_query(): - results = \ - db.session.\ - query(SampleSheetModel).\ - order_by(SampleSheetModel.samplesheet_id.desc()).\ - limit(100).\ - all() - return results + try: + results = \ + db.session.\ + query(SampleSheetModel).\ + order_by(SampleSheetModel.samplesheet_id.desc()).\ + limit(100).\ + all() + return results + except Exception as e: + raise ValueError(f"Failed to get samplesheet list, error: {e}") + class RawSeqrunView(ModelView): datamodel = SQLAInterface(RawSeqrun) @@ -53,12 +87,17 @@ class RawSeqrunView(ModelView): "samplesheet.validation_time", "samplesheet.update_time", "override_cycles", - "date_stamp"] + "mismatches", + "date_stamp", + "trigger_time"] show_columns = [ "raw_seqrun_igf_id", "status", "override_cycles", + "mismatches", "date_stamp", + "trigger_time", + "run_config", "samplesheet.samplesheet_id", "samplesheet.samplesheet_tag", "samplesheet.status", @@ -75,16 +114,21 @@ class RawSeqrunView(ModelView): "samplesheet.validation_time": "Validated on", "samplesheet.update_time": "Updated on", "override_cycles": "Override cycles", - "date_stamp": "Run date"} + "mismatches": "Barcode mismatch", + "date_stamp": "Run date", + "run_config": "Run setting", + "trigger_time": "Trigger date", + "samplesheet.csv_data": "Samplesheet data"} edit_columns = [ "raw_seqrun_igf_id", "samplesheet", - "override_cycles"] + "override_cycles", + "mismatches"] base_permissions = [ "can_list", "can_show", "can_edit"] - base_order = ("raw_seqrun_igf_id", "desc") + base_order = ("raw_seqrun_id", "desc") add_form_extra_fields = { "samplesheet": QuerySelectField( "SampleSheetModel", @@ -103,113 +147,169 @@ class RawSeqrunView(ModelView): @action("run_demultiplexing", "Run De-multiplexing", confirmation="Run de-multiplexing pipeline ?", multiple=False, icon="fa-plane") def run_demultiplexing(self, item): - run_list = list() - run_id_list = list() - if isinstance(item, list): - for i in item: - if i.samplesheet is None or \ - i.samplesheet.status != 'PASS' or \ - i.samplesheet.validation_time < i.samplesheet.update_time: - flash(f"Invalide Samplesheet for {i.raw_seqrun_igf_id}", "danger") - else: - run_id_list.\ - append(i.raw_seqrun_igf_id) - run_list.\ - append({ - 'seqrun_id': i.raw_seqrun_igf_id, - 'samplesheet_tag': i.samplesheet.samplesheet_tag, - 'override_cycles': i.override_cycles}) - else: - if item.samplesheet is None or \ - item.samplesheet.status != 'PASS' or \ - item.samplesheet.validation_time < item.samplesheet.update_time: - flash(f"Invalide Samplesheet for {item.raw_seqrun_igf_id}", "danger") + try: + run_list = list() + run_id_list = list() + if isinstance(item, list): + for i in item: + if i.samplesheet is None or \ + i.samplesheet.status != 'PASS' or \ + i.samplesheet.validation_time < i.samplesheet.update_time: + flash(f"Invalide Samplesheet for {i.raw_seqrun_igf_id}", "danger") + else: + run_id_list.\ + append(i.raw_seqrun_igf_id) + run_list.\ + append({ + 'seqrun_id': i.raw_seqrun_igf_id, + 'samplesheet_tag': i.samplesheet.samplesheet_tag, + 'override_cycles': i.override_cycles, + 'mismatches': i.mismatches}) else: - run_list = [{ - 'seqrun_id': item.raw_seqrun_igf_id, - 'samplesheet_tag': item.samplesheet.samplesheet_tag, - 'override_cycles': item.override_cycles}] - run_id_list = [item.raw_seqrun_igf_id] - if len(run_list) > 0: - _ = \ - async_trigger_airflow_pipeline.\ - apply_async(args=['dag24_build_bclconvert_dynamic_dags', run_list]) - flash("Running de-multiplexing for {0}".format(', '.join(run_id_list)), "info") - self.update_redirect() - return redirect(self.get_redirect()) + if item.samplesheet is None or \ + item.samplesheet.status != 'PASS' or \ + item.samplesheet.validation_time < item.samplesheet.update_time: + flash(f"Invalide Samplesheet for {item.raw_seqrun_igf_id}", "danger") + else: + run_list = [{ + 'seqrun_id': item.raw_seqrun_igf_id, + 'samplesheet_tag': item.samplesheet.samplesheet_tag, + 'override_cycles': item.override_cycles, + 'mismatches': item.mismatches}] + run_id_list = [item.raw_seqrun_igf_id] + if len(run_list) > 0: + airflow_dag_id = \ + get_airflow_dag_id( + airflow_conf_file=os.environ['AIRFLOW_CONF_FILE'], + dag_tag=PRODUCTION_PIPELINE_DAG_TAG) + if airflow_dag_id is None: + raise ValueError( + f"Failed to get airflow dag id for {PRODUCTION_PIPELINE_DAG_TAG}") + _ = \ + async_trigger_airflow_pipeline.\ + apply_async(args=[airflow_dag_id, run_list, True]) + flash("Running de-multiplexing for {0}".format(', '.join(run_id_list)), "info") + self.update_redirect() + return redirect(url_for('RawSeqrunView.list')) + except Exception as e: + log.error(e) + flash(f"Failed to run de-multiplexing for {', '.join(run_id_list)}", "danger") + return redirect(url_for('RawSeqrunView.list')) @action("trigger_pre_demultiplexing", "Test barcodes", confirmation="Confirm test pipeline run ?", multiple=True, single=False, icon="fa-rocket") def trigger_pre_demultiplexing(self, item): - run_list = list() - run_id_list = list() - if isinstance(item, list): - for i in item: - if i.samplesheet is None or \ - i.samplesheet.status != 'PASS' or \ - i.samplesheet.validation_time < i.samplesheet.update_time: - flash(f"Invalide Samplesheet for {i.raw_seqrun_igf_id}", "danger") - else: - run_id_list.\ - append(i.raw_seqrun_igf_id) - run_list.\ - append({ - 'seqrun_id': i.raw_seqrun_igf_id, - 'samplesheet_tag': i.samplesheet.samplesheet_tag, - 'override_cycles': i.override_cycles}) - else: - if item.samplesheet is None or \ - item.samplesheet.status != 'PASS' or \ - item.samplesheet.validation_time < item.samplesheet.update_time: - flash(f"Invalide Samplesheet for {item.raw_seqrun_igf_id}", "danger") + try: + run_list = list() + run_id_list = list() + if isinstance(item, list): + for i in item: + if i.samplesheet is None or \ + i.samplesheet.status != 'PASS' or \ + i.samplesheet.validation_time < i.samplesheet.update_time: + flash(f"Invalide Samplesheet for {i.raw_seqrun_igf_id}", "danger") + else: + run_id_list.\ + append(i.raw_seqrun_igf_id) + run_list.\ + append({ + 'seqrun_id': i.raw_seqrun_igf_id, + 'samplesheet_tag': i.samplesheet.samplesheet_tag, + 'override_cycles': i.override_cycles, + 'mismatches': i.mismatches}) else: - run_list = [{ - 'seqrun_id': item.raw_seqrun_igf_id, - 'samplesheet_tag': item.samplesheet.samplesheet_tag, - 'override_cycles': item.override_cycles}] - run_id_list = [item.raw_seqrun_igf_id] - if len(run_list) > 0: - _ = \ - async_trigger_airflow_pipeline.\ - apply_async(args=['dag23_test_bclconvert_demult', run_list]) - flash("Running test for {0}".format(', '.join(run_id_list)), "info") - self.update_redirect() - return redirect(self.get_redirect()) + if item.samplesheet is None or \ + item.samplesheet.status != 'PASS' or \ + item.samplesheet.validation_time < item.samplesheet.update_time: + flash(f"Invalide Samplesheet for {item.raw_seqrun_igf_id}", "danger") + else: + run_list = [{ + 'seqrun_id': item.raw_seqrun_igf_id, + 'samplesheet_tag': item.samplesheet.samplesheet_tag, + 'override_cycles': item.override_cycles, + 'mismatches': item.mismatches}] + run_id_list = [item.raw_seqrun_igf_id] + if len(run_list) > 0: + airflow_dag_id = \ + get_airflow_dag_id( + airflow_conf_file=os.environ['AIRFLOW_CONF_FILE'], + dag_tag=TEST_BARCODE_DAG_TAG) + if airflow_dag_id is None: + raise ValueError( + f"Failed to get airflow dag id for {TEST_BARCODE_DAG_TAG}") + _ = \ + async_trigger_airflow_pipeline.\ + apply_async(args=[airflow_dag_id, run_list, True]) + flash("Running test for {0}".format(', '.join(run_id_list)), "info") + self.update_redirect() + return redirect(url_for('RawSeqrunView.list')) + except Exception as e: + log.error(e) + flash("Failed to run test for {0}".format(', '.join(run_id_list)), "danger") @action("cleanup_demultiplexing", "Remove fastqs for re-run", confirmation="Delete fastqs for all projects before re-run ?", multiple=False, icon="fa-exclamation") def cleanup_demultiplexing(self, item): - run_list = list() - run_id_list = list() - if isinstance(item, list): - for i in item: - if i.samplesheet is None or \ - i.samplesheet.status != 'PASS' or \ - i.samplesheet.validation_time < i.samplesheet.update_time: - flash(f"Invalide Samplesheet for {i.raw_seqrun_igf_id}", "danger") - else: - run_id_list.\ - append(i.raw_seqrun_igf_id) - run_list.\ - append({ - 'seqrun_id': i.raw_seqrun_igf_id, - 'samplesheet_tag': i.samplesheet.samplesheet_tag, - 'override_cycles': i.override_cycles}) - else: - if item.samplesheet is None or \ - item.samplesheet.status != 'PASS' or \ - item.samplesheet.validation_time < item.samplesheet.update_time: - flash(f"Invalide Samplesheet for {item.raw_seqrun_igf_id}", "danger") + try: + run_list = list() + run_id_list = list() + if isinstance(item, list): + for i in item: + if i.samplesheet is None or \ + i.samplesheet.status != 'PASS' or \ + i.samplesheet.validation_time < i.samplesheet.update_time: + flash(f"Invalide Samplesheet for {i.raw_seqrun_igf_id}", "danger") + else: + run_id_list.\ + append(i.raw_seqrun_igf_id) + run_list.\ + append({ + 'seqrun_id': i.raw_seqrun_igf_id, + 'samplesheet_tag': i.samplesheet.samplesheet_tag, + 'override_cycles': i.override_cycles}) else: - run_list = [{ - 'seqrun_id': item.raw_seqrun_igf_id, - 'samplesheet_tag': item.samplesheet.samplesheet_tag, - 'override_cycles': item.override_cycles}] - run_id_list = [item.raw_seqrun_igf_id] - #if len(run_list) > 0: - # _ = \ - # async_trigger_airflow_pipeline.\ - # apply_async(args=['TODO', run_list]) - flash("Removing fastqs for {0}".format(', '.join(run_id_list)), "info") - self.update_redirect() - return redirect(self.get_redirect()) \ No newline at end of file + if item.samplesheet is None or \ + item.samplesheet.status != 'PASS' or \ + item.samplesheet.validation_time < item.samplesheet.update_time: + flash(f"Invalide Samplesheet for {item.raw_seqrun_igf_id}", "danger") + else: + run_list = [{ + 'seqrun_id': item.raw_seqrun_igf_id, + 'samplesheet_tag': item.samplesheet.samplesheet_tag, + 'override_cycles': item.override_cycles}] + run_id_list = [item.raw_seqrun_igf_id] + if len(run_list) > 0: + airflow_dag_id = \ + get_airflow_dag_id( + airflow_conf_file=os.environ['AIRFLOW_CONF_FILE'], + dag_tag=CLEAN_UP_DAG_TAG) + if airflow_dag_id is None: + raise ValueError( + f"Failed to get airflow dag id for {CLEAN_UP_DAG_TAG}") + _ = \ + async_trigger_airflow_pipeline.\ + apply_async(args=[airflow_dag_id, run_list]) + flash("Removing fastqs for {0}".format(', '.join(run_id_list)), "info") + self.update_redirect() + return redirect(url_for('RawSeqrunView.list')) + except Exception as e: + log.error(e) + flash("failed to remove fastqs for {0}".format(', '.join(run_id_list)), "danger") + + # @expose('/get_samplesheet_data/') + # @has_access + # def get_samplesheet_data(self, id): + # try: + # result = \ + # db.session.\ + # query(SampleSheetModel.csv_data).\ + # join(RawSeqrun, RawSeqrun.samplesheet_id==SampleSheetModel.samplesheet_id).\ + # filter(RawSeqrun.raw_seqrun_id==id).\ + # one_or_none() + # if result is None: + # result = '' + # return result + # except Exception as e: + # log.error(e) + # result = '' + diff --git a/app/rds_project_backup_view.py b/app/rds_project_backup_view.py index 98a403f..1c5e44e 100644 --- a/app/rds_project_backup_view.py +++ b/app/rds_project_backup_view.py @@ -1,4 +1,4 @@ -from wtforms.ext.sqlalchemy.fields import QuerySelectField +from wtforms_sqlalchemy.fields import QuerySelectField from flask_appbuilder.fieldwidgets import Select2Widget from flask_appbuilder import ModelView from .models import RDSProject_backup, Project diff --git a/app/samplesheet_view.py b/app/samplesheet_view.py index 203f4ef..4fd36f1 100644 --- a/app/samplesheet_view.py +++ b/app/samplesheet_view.py @@ -63,7 +63,7 @@ def download_samplesheet(self, item): samplesheet_tag = samplesheet_tag.decode() output.seek(0) self.update_redirect() - return send_file(output, attachment_filename='SampleSheet_{0}.csv'.format(samplesheet_tag), as_attachment=True) + return send_file(output, download_name=f"SampleSheet_{samplesheet_tag}.csv", as_attachment=True) except: flash('Failed to download samplesheet', 'danger') return redirect(url_for('SampleSheetView.list')) @@ -91,7 +91,7 @@ def download_samplesheet_with_I5_rc(self, item): samplesheet_tag = samplesheet_tag.decode() output.seek(0) #self.update_redirect() - return send_file(output, attachment_filename='SampleSheet-I5_RC_{0}.csv'.format(samplesheet_tag), as_attachment=True) + return send_file(output, download_name=f"SampleSheet-I5_RC_{samplesheet_tag}.csv", as_attachment=True) except: flash('Failed to download I5 RC samplesheet', 'danger') return redirect(url_for('SampleSheetView.list')) @@ -118,7 +118,7 @@ def download_v2_samplesheet(self, item): samplesheet_tag = samplesheet_tag.decode() output.seek(0) self.update_redirect() - return send_file(output, attachment_filename='SampleSheet-V2_{0}.csv'.format(samplesheet_tag), as_attachment=True) + return send_file(output, download_name=f"SampleSheet-V2_{samplesheet_tag}.csv", as_attachment=True) except: flash('Failed to download v2 samplesheet', 'danger') return redirect(url_for('SampleSheetView.list')) @@ -142,4 +142,4 @@ def validate_samplesheet(self, item): return redirect(self.get_redirect()) except: flash('Failed to validate samplesheet', 'danger') - return redirect(url_for('SampleSheetView.list')) + return redirect(url_for('SampleSheetView.list')) \ No newline at end of file diff --git a/app/templates/iframe.html b/app/templates/iframe.html index f7615b1..33adaab 100644 --- a/app/templates/iframe.html +++ b/app/templates/iframe.html @@ -1,6 +1,6 @@ {% extends "appbuilder/base.html" %} {% block content %} -

Go Back to run list

- +

Go back to previous page

+ {% endblock %} \ No newline at end of file diff --git a/app/templates/iframe_pdf.html b/app/templates/iframe_pdf.html new file mode 100644 index 0000000..0f7c488 --- /dev/null +++ b/app/templates/iframe_pdf.html @@ -0,0 +1,8 @@ +{% extends "appbuilder/base.html" %} + +{% block content %} +

Go back to previous page

+ + + +{% endblock %} \ No newline at end of file diff --git a/app/templates/project_info.html b/app/templates/project_info.html new file mode 100644 index 0000000..eaad713 --- /dev/null +++ b/app/templates/project_info.html @@ -0,0 +1,272 @@ +{% extends "appbuilder/base.html" %} +{% block content %} + + + + + +

{{ project_igf_id }}

+ +

List of analysis

+ + +

Project summary plot

+ +
+
+
+
+
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+
+
+
+
+
+ + + +

Sample read counts

+
+
+
+
+
+ +

List of raw data report

+
+
+
+
+
+ +

List of analysis

+
+
+
+
+
+ + + + + + + + + + + + + + + + + +{% endblock %} \ No newline at end of file diff --git a/app/views.py b/app/views.py index 678ad4c..ddc07ee 100644 --- a/app/views.py +++ b/app/views.py @@ -15,11 +15,13 @@ from .samplesheet_view import SampleSheetView from .raw_metadata_view import RawMetadataValidationView, RawMetadataSubmitView from .raw_seqrun_view import RawSeqrunView -from .metadata_view import ProjectView, UserView, SeqrunView, AnalysisView, SampleProjectView, SampleView -from .raw_analysis_view import RawAnalysisView +from .analysis_view import AnalysisView +from .metadata_view import ProjectView, UserView, SeqrunView, SampleProjectView, SampleView +from .raw_analysis_view import RawAnalysisView, RawAnalysisSchemaView, RawAnalysisQueueView, RawAnalysisTemplateView from .rds_project_backup_view import RDSProjectBackupView from .pipeline_trigger_view import PipelineTriggerView from .index_table_view import ProjectIndexView, SampleIndexView +from .iframe_view import IFrameView """ Application wide 404 error handler @@ -80,6 +82,13 @@ def page_not_found(e): appbuilder.\ add_view_no_menu(HomeView()) +""" +IFrame view +""" + +appbuilder.\ + add_view_no_menu(IFrameView()) + """ Seqrun view """ @@ -142,17 +151,40 @@ def page_not_found(e): appbuilder.\ add_view( RawAnalysisView, - "Add and submit analysis design", + "Create new analysis design", category_icon="fa-flask", icon="fa-file-text-o", category="Analysis") +appbuilder.\ + add_view( + RawAnalysisQueueView, + "View analysis upload queue", + category_icon="fa-flask", + icon="fa fa-binoculars", + category="Analysis") appbuilder.\ add_view( AnalysisView, - "Trigger analysis pipelines", + "Trigger analysis pipeline", category_icon="fa-flask", icon="fa-space-shuttle", category="Analysis") +appbuilder.\ + add_view( + RawAnalysisSchemaView, + "Validation schema", + category_icon="fa-flask", + icon="fa fa-check-square-o", + category="Analysis") +appbuilder.\ + add_view( + RawAnalysisTemplateView, + "Analysis template", + category_icon="fa-flask", + icon="fa fa-magic", + category="Analysis") + + """ Data transfer """ diff --git a/config.py b/config.py index 81129a9..889bc46 100644 --- a/config.py +++ b/config.py @@ -7,18 +7,24 @@ AUTH_OAUTH, ) -basedir = os.path.abspath(os.path.dirname(__file__)) +#basedir = os.path.abspath(os.path.dirname(__file__)) +basedir = '/tmp' + +MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # Celery CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", 'memory://') CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", 'cache+memory://') CELERY_WORK_DIR = os.environ.get("CELERY_WORK_DIR", '/tmp') +# cache +CACHE_REDIS_URL = os.environ.get("CACHE_REDIS_URL", 'unix://') + # Your App secret key SECRET_KEY = os.environ.get("SECRET_KEY", "\2\1thisismyscretkey\1\2\e\y\y\h") # The SQLAlchemy connection string. -SQLALCHEMY_DATABASE_URI = os.environ.get("SQLALCHEMY_DATABASE_URI", "sqlite:///" + os.path.join(basedir, "app.db")) +SQLALCHEMY_DATABASE_URI = os.environ.get("SQLALCHEMY_DATABASE_URI", "sqlite:///" + os.path.join('/tmp', "app.db")) # SQLALCHEMY_DATABASE_URI = 'mysql://myapp@localhost/myapp' # SQLALCHEMY_DATABASE_URI = 'postgresql://root:password@localhost/myapp' @@ -102,6 +108,9 @@ # Setup image size default is (300, 200, True) # IMG_SIZE = (300, 200, True) +## report upload folder +REPORT_UPLOAD_PATH = "/data/static/reports/" + # Theme configuration # these are located on static/appbuilder/css/themes # you can create your own and easily use them placing them on the same dir structure to override diff --git a/docker-compose-igf-lims_v2.yaml b/docker-compose-igf-lims_v2.yaml new file mode 100644 index 0000000..00c7d41 --- /dev/null +++ b/docker-compose-igf-lims_v2.yaml @@ -0,0 +1,136 @@ +version: '3.9' +networks: + portal_network: + driver: bridge +services: + portal_db: + image: mysql:5.7 + env_file: env + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" + volumes: + - /home/igf/igf_portal/portal_v2/mysqlappdb:/var/lib/mysql:rw + container_name: portal_db + networks: + - portal_network + redis_db: + image: redis:6.2 + restart: unless-stopped + container_name: redis_db + logging: + driver: "json-file" + options: + max-size: "2048m" + volumes: + - /home/igf/igf_portal/portal_v2/redis_data:/data:rw + networks: + - portal_network + webserver: + image: imperialgenomicsfacility/igfportal:v0.0.2.1 + env_file: env + user: "${PORTAL_UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "2048m" + restart: unless-stopped + links: + - portal_db + - redis_db + depends_on: + - portal_db + - redis_db + volumes: + - /home/igf/igf_portal/portal_v2/IGFPortal:/github/IGFPortal:ro + - /home/igf/igf_portal/ssl_cert:/SSL:ro + - /home/igf/igf_portal/portal_v2/static:/data/static:rw + - /home/igf/igf_portal/secret/airflow_conf.json:/secret/airflow_conf.json:ro + - /home/igf/igf_portal/portal_v2/celery_tmp:/TMP_WORK_DIR:rw + container_name: webserver + command: ["gunicorn -b 0.0.0.0:8080 --threads 4 server:app"] + networks: + - portal_network + nginx: + image: nginx:1.23.3 + logging: + driver: "json-file" + options: + max-size: "2048m" + ports: + - "80:80" + - "443:443" + links: + - webserver + depends_on: + - webserver + volumes: + - /home/igf/igf_portal/ssl_cert:/SSL:ro + - /home/igf/igf_portal/portal_v2/nginx/nginx.conf:/etc/nginx/nginx.conf:ro + - /home/igf/igf_portal/portal_v2/nginx:/var/nginx/:rw + container_name: nginx + restart: unless-stopped + networks: + - portal_network + celery_worker1: + image: imperialgenomicsfacility/igfportal:v0.0.2.1 + env_file: env + user: "${PORTAL_UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "2048m" + links: + - portal_db + - redis_db + depends_on: + - portal_db + - redis_db + restart: unless-stopped + volumes: + - /home/igf/igf_portal/portal_v2/IGFPortal:/github/IGFPortal:ro + - /home/igf/igf_portal/secret/airflow_conf.json:/secret/airflow_conf.json:ro + - /home/igf/igf_portal/portal_v2/celery_tmp:/TMP_WORK_DIR:rw + - /home/igf/igf_portal/portal_v2/static:/data/static:rw + container_name: celery_worker1 + command: ["celery -A app.celery worker --loglevel=WARNING"] + networks: + - portal_network + celery_flower: + image: imperialgenomicsfacility/igfportal:v0.0.2.1 + env_file: env + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" + ports: + - "5556:5555" + links: + - redis_db + - portal_db + depends_on: + - redis_db + - portal_db + volumes: + - /home/igf/igf_portal/portal_v2/IGFPortal:/github/IGFPortal:ro + container_name: celery_flower + command: ["celery -A app.celery flower --basic_auth=$${BASIC_AUTH}"] + networks: + - portal_network + adminer: + image: adminer:latest + logging: + driver: "json-file" + options: + max-size: "2048m" + links: + - portal_db + ports: + - "8081:8080" + restart: unless-stopped + container_name: adminer + networks: + - portal_network \ No newline at end of file diff --git a/docker-compose-igf-lims_v2_db.yaml b/docker-compose-igf-lims_v2_db.yaml new file mode 100644 index 0000000..2070566 --- /dev/null +++ b/docker-compose-igf-lims_v2_db.yaml @@ -0,0 +1,32 @@ +version: '3.9' +networks: + portal_network: + driver: bridge +services: + portal_db: + image: mysql:5.7 + env_file: env + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" + volumes: + - /home/igf/igf_portal/portal_v2/mysqlappdb:/var/lib/mysql:rw + container_name: portal_db + networks: + - portal_network + adminer: + image: adminer:latest + logging: + driver: "json-file" + options: + max-size: "2048m" + links: + - portal_db + ports: + - "8081:8080" + restart: unless-stopped + container_name: adminer + networks: + - portal_network \ No newline at end of file diff --git a/docker-compose-igfportal.yaml b/docker-compose-igfportal.yaml new file mode 100644 index 0000000..04d6c02 --- /dev/null +++ b/docker-compose-igfportal.yaml @@ -0,0 +1,141 @@ +version: '3.9' +networks: + portal_network: + driver: bridge +services: + portal_db: + image: mysql:5.7.42 + env_file: env + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "1024m" + volumes: + - /home/igf/dev/mysqlappdb:/var/lib/mysql:rw + container_name: portal_db + networks: + - portal_network + redis_db: + image: redis:6.2 + restart: unless-stopped + container_name: redis_db + logging: + driver: "json-file" + options: + max-size: "1024m" + networks: + - portal_network + volumes: + - /home/igf/dev/redis_data:/data:rw + webserver: + image: imperialgenomicsfacility/igfportal:v0.0.2.1 + env_file: env + user: "${PORTAL_UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "1024m" + ports: + - 8080:8080 + restart: unless-stopped + links: + - portal_db + - redis_db + depends_on: + - portal_db + - redis_db + volumes: + - /home/igf/dev/IGFPortal:/github/IGFPortal:ro + - /home/igf/dev/ssl_cert:/SSL:ro + - /home/igf/dev/static:/data/static:rw + - /home/igf/dev/secret/airflow_conf.json:/secret/airflow_conf.json:ro + - /home/igf/dev/celery_tmp:/TMP_WORK_DIR:rw + container_name: webserver + command: ["gunicorn -b 0.0.0.0:8080 --threads 4 server:app"] + networks: + - portal_network + nginx: + image: nginx:1.23.3 + logging: + driver: "json-file" + options: + max-size: "1024m" + ports: + - "80:80" + - "443:443" + links: + - webserver + depends_on: + - webserver + volumes: + - /home/igf/dev/ssl_cert:/SSL:ro + - /home/igf/dev/nginx.conf:/etc/nginx/nginx.conf:ro + - /home/igf/dev/nginx:/var/nginx/:rw + - /home/igf/dev/static:/data/static:rw + container_name: nginx + restart: unless-stopped + networks: + - portal_network + celery_worker1: + image: imperialgenomicsfacility/igfportal:v0.0.2.1 + env_file: env + user: "${PORTAL_UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "1024m" + links: + - portal_db + - redis_db + depends_on: + - portal_db + - redis_db + restart: unless-stopped + volumes: + - /home/igf/dev/IGFPortal:/github/IGFPortal:ro + - /home/igf/dev/secret/airflow_conf.json:/secret/airflow_conf.json:ro + - /home/igf/dev/celery_tmp:/TMP_WORK_DIR:rw + - /home/igf/dev/static:/data/static:rw + container_name: celery_worker1 + command: ["celery -A app.celery worker --loglevel=WARNING"] + networks: + - portal_network + celery_flower: + image: imperialgenomicsfacility/igfportal:v0.0.2.1 + env_file: env + restart: unless-stopped + user: "${PORTAL_UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "1024m" + ports: + - "5555:5555" + links: + - redis_db + - portal_db + depends_on: + - redis_db + - portal_db + volumes: + - /home/igf/dev/IGFPortal:/github/IGFPortal:ro + container_name: celery_flower + command: ["celery -A app.celery flower --basic_auth=$${BASIC_AUTH}"] + networks: + - portal_network + adminer: + image: adminer:latest + user: "${PORTAL_UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "1024m" + links: + - portal_db + ports: + - "8081:8080" + restart: unless-stopped + container_name: adminer + networks: + - portal_network diff --git a/docker-compose-igfportal_db.yaml b/docker-compose-igfportal_db.yaml new file mode 100644 index 0000000..b305a93 --- /dev/null +++ b/docker-compose-igfportal_db.yaml @@ -0,0 +1,32 @@ +version: '3.9' +networks: + portal_network: + driver: bridge +services: + portal_db: + image: mysql:5.7.42 + env_file: env + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" + volumes: + - /home/igf/dev/mysqlappdb_v5:/var/lib/mysql:rw + container_name: portal_db + networks: + - portal_network + adminer: + image: adminer:latest + logging: + driver: "json-file" + options: + max-size: "2048m" + links: + - portal_db + ports: + - "8081:8080" + restart: unless-stopped + container_name: adminer + networks: + - portal_network \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index d604cdf..e4f8cc9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,26 +6,41 @@ services: portal_db: image: mysql:5.7 env_file: env - restart: always + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" ports: - "3306:3306" volumes: - - /home/vmuser/mysqlappdb:/var/lib/mysql:rw + - /home/vmuser/mysqlappdb3:/var/lib/mysql:rw container_name: portal_db networks: - portal_network redis_db: image: redis:6.2 - restart: always + restart: unless-stopped container_name: redis_db + logging: + driver: "json-file" + options: + max-size: "2048m" + ports: + - 6379:6379 networks: - portal_network webserver: - image: igfportal:v0.0.1 + image: imperialgenomicsfacility/igfportal:v0.0.2.1 env_file: env + user: "${UID}:${GID}" ports: - 8080:8080 - restart: always + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" links: - portal_db - redis_db @@ -36,12 +51,18 @@ services: - /home/vmuser/github/IGFPortal:/github/IGFPortal:ro - /home/vmuser/github/ssl:/SSL:ro - ./static:/data/static:rw + - /home/vmuser/secrets/airflow_conf.json:/secret/airflow_conf.json:ro + - ./celery_tmp:/TMP_WORK_DIR:rw container_name: webserver command: ["flask run -h 0.0.0.0 -p 8080 --reload --debugger --without-threads"] networks: - portal_network nginx: image: nginx:1.23.3 + logging: + driver: "json-file" + options: + max-size: "2048m" ports: - "80:80" - "443:443" @@ -53,31 +74,42 @@ services: - /home/vmuser/github/ssl:/SSL:ro - /home/vmuser/github/IGFPortal/nginx.conf:/etc/nginx/nginx.conf:ro - /tmp/nginx:/var/nginx/:rw - - ./static:/data/static:rw container_name: nginx + restart: unless-stopped networks: - portal_network celery_worker1: - image: igfportal:v0.0.1 + image: imperialgenomicsfacility/igfportal:v0.0.2.1 env_file: env + user: "${UID}:${GID}" + logging: + driver: "json-file" + options: + max-size: "2048m" links: - portal_db - redis_db depends_on: - portal_db - redis_db - restart: always + restart: unless-stopped volumes: - /home/vmuser/github/IGFPortal:/github/IGFPortal:ro - /home/vmuser/secrets/airflow_conf.json:/secret/airflow_conf.json:ro + - ./celery_tmp:/TMP_WORK_DIR:rw + - ./static:/data/static:rw container_name: celery_worker1 - command: ["celery -A app.celery worker --loglevel=INFO"] + command: ["celery -A app.celery worker --loglevel=WARNING"] networks: - portal_network celery_flower: - image: igfportal:v0.0.1 + image: imperialgenomicsfacility/igfportal:v0.0.2.1 env_file: env - restart: always + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "2048m" ports: - "5555:5555" links: @@ -94,11 +126,15 @@ services: - portal_network adminer: image: adminer:latest + logging: + driver: "json-file" + options: + max-size: "2048m" links: - portal_db ports: - "8081:8080" - restart: always + restart: unless-stopped container_name: adminer networks: - portal_network diff --git a/docker_start.sh b/docker_start.sh index f619449..b5b0b55 100644 --- a/docker_start.sh +++ b/docker_start.sh @@ -1 +1,2 @@ -docker-compose -f docker-compose-igf-lims.yaml -p igfportal up -d \ No newline at end of file +docker-compose -f docker-compose-igf-lims.yaml -p igfportal up -d +#docker-compose -f docker-compose-igfportal.yaml -p igfportal up -d \ No newline at end of file diff --git a/docker_start_igf_lims_v2.sh b/docker_start_igf_lims_v2.sh new file mode 100644 index 0000000..3225999 --- /dev/null +++ b/docker_start_igf_lims_v2.sh @@ -0,0 +1,2 @@ +#docker-compose -f docker-compose-igf-lims_v2.yaml -p igfportal_v2 up -d +PORTAL_UID="$(id -u)" GID="$(id -g)" docker-compose -f docker-compose-igf-lims_v2_db.yaml -p igfportal_v2 up -d \ No newline at end of file diff --git a/docker_start_igfportal_v2.sh b/docker_start_igfportal_v2.sh new file mode 100644 index 0000000..fc247e5 --- /dev/null +++ b/docker_start_igfportal_v2.sh @@ -0,0 +1,3 @@ +#docker compose -f docker-compose-igfportal_db.yaml -p igfportal up -d +#UID="$(id -u)" GID="$(id -g)" docker compose -f docker-compose-igfportal.yaml -p igfportal up -d +PORTAL_UID="$(id -u)" GID="$(id -g)" docker compose -f docker-compose-igfportal.yaml -p igfportal up -d \ No newline at end of file diff --git a/migrations/README b/migrations/README new file mode 100644 index 0000000..0e04844 --- /dev/null +++ b/migrations/README @@ -0,0 +1 @@ +Single-database configuration for Flask. diff --git a/migrations/alembic.ini b/migrations/alembic.ini new file mode 100644 index 0000000..ec9d45c --- /dev/null +++ b/migrations/alembic.ini @@ -0,0 +1,50 @@ +# A generic, single database configuration. + +[alembic] +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic,flask_migrate + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[logger_flask_migrate] +level = INFO +handlers = +qualname = flask_migrate + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/migrations/env.py b/migrations/env.py new file mode 100644 index 0000000..89f80b2 --- /dev/null +++ b/migrations/env.py @@ -0,0 +1,110 @@ +import logging +from logging.config import fileConfig + +from flask import current_app + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) +logger = logging.getLogger('alembic.env') + + +def get_engine(): + try: + # this works with Flask-SQLAlchemy<3 and Alchemical + return current_app.extensions['migrate'].db.get_engine() + except TypeError: + # this works with Flask-SQLAlchemy>=3 + return current_app.extensions['migrate'].db.engine + + +def get_engine_url(): + try: + return get_engine().url.render_as_string(hide_password=False).replace( + '%', '%%') + except AttributeError: + return str(get_engine().url).replace('%', '%%') + + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +config.set_main_option('sqlalchemy.url', get_engine_url()) +target_db = current_app.extensions['migrate'].db + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def get_metadata(): + if hasattr(target_db, 'metadatas'): + return target_db.metadatas[None] + return target_db.metadata + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, target_metadata=get_metadata(), literal_binds=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + # this callback is used to prevent an auto-migration from being generated + # when there are no changes to the schema + # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html + def process_revision_directives(context, revision, directives): + if getattr(config.cmd_opts, 'autogenerate', False): + script = directives[0] + if script.upgrade_ops.is_empty(): + directives[:] = [] + logger.info('No changes in schema detected.') + + connectable = get_engine() + + with connectable.connect() as connection: + context.configure( + connection=connection, + target_metadata=get_metadata(), + process_revision_directives=process_revision_directives, + **current_app.extensions['migrate'].configure_args + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/migrations/script.py.mako b/migrations/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/migrations/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/migrations/sql/initial_db_migrate_for_portal.sql b/migrations/sql/initial_db_migrate_for_portal.sql new file mode 100644 index 0000000..2aff363 --- /dev/null +++ b/migrations/sql/initial_db_migrate_for_portal.sql @@ -0,0 +1,10 @@ +CREATE TABLE alembic_version ( + version_num VARCHAR(32) NOT NULL, + CONSTRAINT alembic_version_pkc PRIMARY KEY (version_num) +); + +-- Running upgrade -> 78b82238be89 + + +INSERT INTO alembic_version (version_num) VALUES ('78b82238be89'); + diff --git a/migrations/sql/project_data_db_migrate_for_portal.sql b/migrations/sql/project_data_db_migrate_for_portal.sql new file mode 100644 index 0000000..2a013e5 --- /dev/null +++ b/migrations/sql/project_data_db_migrate_for_portal.sql @@ -0,0 +1,46 @@ +CREATE TABLE alembic_version ( + version_num VARCHAR(32) NOT NULL, + CONSTRAINT alembic_version_pkc PRIMARY KEY (version_num) +); + +-- Running upgrade -> 78b82238be89 + + +INSERT INTO alembic_version (version_num) VALUES ('78b82238be89'); + +-- Running upgrade 78b82238be89 -> a38c16db0e8d + +ALTER TABLE pre_demultiplexing_data ADD COLUMN file_path VARCHAR(500); + +ALTER TABLE pre_demultiplexing_data ADD COLUMN status ENUM('ACTIVE','WITHDRAWN','UNKNOWN') NOT NULL DEFAULT 'UNKNOWN'; + +ALTER TABLE raw_analysis ADD COLUMN project_id INTEGER UNSIGNED; + +ALTER TABLE raw_analysis ADD COLUMN pipeline_id INTEGER UNSIGNED; + +ALTER TABLE raw_analysis ADD COLUMN analysis_name VARCHAR(120) NOT NULL; + +ALTER TABLE raw_analysis MODIFY analysis_yaml LONGTEXT NULL; + +ALTER TABLE raw_analysis MODIFY report LONGTEXT NULL; + +ALTER TABLE raw_analysis ADD UNIQUE (analysis_name, project_id); + +ALTER TABLE raw_analysis ADD FOREIGN KEY(pipeline_id) REFERENCES pipeline (pipeline_id) ON DELETE SET NULL ON UPDATE CASCADE; + +ALTER TABLE raw_analysis ADD FOREIGN KEY(project_id) REFERENCES project (project_id) ON DELETE SET NULL ON UPDATE CASCADE; + +ALTER TABLE raw_analysis DROP COLUMN analysis_tag; + +ALTER TABLE raw_seqrun ADD COLUMN mismatches ENUM('0','1','2') DEFAULT '1'; + +ALTER TABLE raw_seqrun ADD COLUMN trigger_time TIMESTAMP NULL; + +ALTER TABLE raw_seqrun ADD COLUMN run_config LONGTEXT; + +ALTER TABLE raw_seqrun MODIFY COLUMN status ENUM("ACTIVE", "REJECTED", "PREDEMULT", "READY", "FINISHED") NOT NULL DEFAULT 'ACTIVE'; + +ALTER TABLE sample_index ADD FOREIGN KEY(project_index_id) REFERENCES project_index (project_index_id) ON DELETE SET NULL ON UPDATE CASCADE; + +UPDATE alembic_version SET version_num='a38c16db0e8d' WHERE alembic_version.version_num = '78b82238be89'; + diff --git a/migrations/versions/78b82238be89_.py b/migrations/versions/78b82238be89_.py new file mode 100644 index 0000000..0e746e9 --- /dev/null +++ b/migrations/versions/78b82238be89_.py @@ -0,0 +1,74 @@ +"""empty message + +Revision ID: 78b82238be89 +Revises: +Create Date: 2023-04-04 16:09:56.224833 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '78b82238be89' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('experiment_attribute', schema=None) as batch_op: + batch_op.drop_constraint('experiment_id', type_='unique') + batch_op.drop_index('experiment_id') + + with op.batch_alter_table('project_attribute', schema=None) as batch_op: + batch_op.drop_constraint('project_id', type_='unique') + batch_op.drop_index('project_id') + + with op.batch_alter_table('run', schema=None) as batch_op: + batch_op.drop_constraint('experiment_id', type_='unique') + batch_op.drop_index('experiment_id') + + with op.batch_alter_table('run_attribute', schema=None) as batch_op: + batch_op.drop_constraint('run_id', type_='unique') + batch_op.drop_index('run_id') + + with op.batch_alter_table('seqrun_attribute', schema=None) as batch_op: + batch_op.drop_constraint('seqrun_id', type_='unique') + batch_op.drop_index('seqrun_id') + + with op.batch_alter_table('seqrun_stats', schema=None) as batch_op: + batch_op.drop_constraint('seqrun_id', type_='unique') + batch_op.drop_index('seqrun_id') + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('seqrun_stats', schema=None) as batch_op: + batch_op.create_index('seqrun_id', ['seqrun_id', 'lane_number'], unique=False) + batch_op.create_unique_constraint('seqrun_id', ['seqrun_id', 'lane_number']) + + with op.batch_alter_table('seqrun_attribute', schema=None) as batch_op: + batch_op.create_index('seqrun_id', ['seqrun_id', 'attribute_name', 'attribute_value'], unique=False) + batch_op.create_unique_constraint('seqrun_id', ['seqrun_id', 'attribute_name', 'attribute_value']) + + with op.batch_alter_table('run_attribute', schema=None) as batch_op: + batch_op.create_index('run_id', ['run_id', 'attribute_name', 'attribute_value'], unique=False) + batch_op.create_unique_constraint('run_id', ['run_id', 'attribute_name', 'attribute_value']) + + with op.batch_alter_table('run', schema=None) as batch_op: + batch_op.create_index('experiment_id', ['experiment_id', 'seqrun_id', 'lane_number'], unique=False) + batch_op.create_unique_constraint('experiment_id', ['experiment_id', 'seqrun_id', 'lane_number']) + + with op.batch_alter_table('project_attribute', schema=None) as batch_op: + batch_op.create_index('project_id', ['project_id', 'attribute_name', 'attribute_value'], unique=False) + batch_op.create_unique_constraint('project_id', ['project_id', 'attribute_name', 'attribute_value']) + + with op.batch_alter_table('experiment_attribute', schema=None) as batch_op: + batch_op.create_index('experiment_id', ['experiment_id', 'attribute_name', 'attribute_value'], unique=False) + batch_op.create_unique_constraint('experiment_id', ['experiment_id', 'attribute_name', 'attribute_value']) + + # ### end Alembic commands ### diff --git a/migrations/versions/a38c16db0e8d_.py b/migrations/versions/a38c16db0e8d_.py new file mode 100644 index 0000000..3e27c05 --- /dev/null +++ b/migrations/versions/a38c16db0e8d_.py @@ -0,0 +1,143 @@ +"""empty message + +Revision ID: a38c16db0e8d +Revises: 78b82238be89 +Create Date: 2023-04-04 16:33:12.174319 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql +from app.models import LONGTEXTType +from app.models import JSONType +# revision identifiers, used by Alembic. +revision = 'a38c16db0e8d' +down_revision = '78b82238be89' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('ab_user_role', schema=None) as batch_op: + batch_op.drop_constraint('user_id', type_='unique') + batch_op.drop_index('user_id') + + with op.batch_alter_table('experiment', schema=None) as batch_op: + batch_op.drop_constraint('sample_id', type_='unique') + batch_op.drop_index('sample_id') + + with op.batch_alter_table('pre_demultiplexing_data', schema=None) as batch_op: + batch_op.add_column(sa.Column('file_path', sa.String(length=500), nullable=True)) + batch_op.add_column(sa.Column('status', sa.Enum('ACTIVE', 'WITHDRAWN', 'UNKNOWN'), server_default='UNKNOWN', nullable=False)) + + with op.batch_alter_table('project_analysis_info_data', schema=None) as batch_op: + batch_op.drop_constraint('project_id', type_='unique') + batch_op.drop_index('project_id') + + with op.batch_alter_table('project_seqrun_info_data', schema=None) as batch_op: + batch_op.drop_constraint('project_id', type_='unique') + batch_op.drop_index('project_id') + + with op.batch_alter_table('raw_analysis', schema=None) as batch_op: + batch_op.add_column(sa.Column('project_id', mysql.INTEGER(unsigned=True), nullable=True)) + batch_op.add_column(sa.Column('pipeline_id', mysql.INTEGER(unsigned=True), nullable=True)) + batch_op.add_column(sa.Column('analysis_name', sa.String(length=120), nullable=False)) + batch_op.alter_column('analysis_yaml', + existing_type=mysql.LONGTEXT(), + nullable=True) + batch_op.alter_column('report', + existing_type=mysql.TEXT(), + type_=LONGTEXTType(), + existing_nullable=True) + batch_op.drop_index('analysis_tag') + batch_op.create_unique_constraint(None, ['analysis_name', 'project_id']) + batch_op.create_foreign_key(None, 'pipeline', ['pipeline_id'], ['pipeline_id'], onupdate='CASCADE', ondelete='SET NULL') + batch_op.create_foreign_key(None, 'project', ['project_id'], ['project_id'], onupdate='CASCADE', ondelete='SET NULL') + batch_op.drop_column('analysis_tag') + + with op.batch_alter_table('raw_seqrun', schema=None) as batch_op: + batch_op.add_column(sa.Column('mismatches', sa.Enum('0', '1', '2'), server_default='1', nullable=True)) + batch_op.add_column(sa.Column('trigger_time', sa.TIMESTAMP(), nullable=True)) + batch_op.add_column(sa.Column('run_config', LONGTEXTType(), nullable=True)) + + with op.batch_alter_table('run', schema=None) as batch_op: + batch_op.drop_constraint('experiment_id', type_='unique') + batch_op.drop_index('experiment_id') + + with op.batch_alter_table('sample_attribute', schema=None) as batch_op: + batch_op.drop_constraint('sample_id', type_='unique') + batch_op.drop_index('sample_id') + + with op.batch_alter_table('sample_index', schema=None) as batch_op: + batch_op.drop_constraint('sample_index_ibfk_1', type_='foreignkey') + batch_op.create_foreign_key(None, 'project_index', ['project_index_id'], ['project_index_id'], onupdate='CASCADE', ondelete='SET NULL') + + with op.batch_alter_table('seqrun_stats', schema=None) as batch_op: + batch_op.drop_constraint('seqrun_id', type_='unique') + batch_op.drop_index('seqrun_id') + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('seqrun_stats', schema=None) as batch_op: + batch_op.create_index('seqrun_id', ['seqrun_id', 'lane_number'], unique=False) + batch_op.create_unique_constraint('seqrun_id', ['seqrun_id', 'lane_number']) + + with op.batch_alter_table('sample_index', schema=None) as batch_op: + batch_op.drop_constraint(None, type_='foreignkey') + batch_op.create_foreign_key('sample_index_ibfk_1', 'project_index', ['project_index_id'], ['project_index_id']) + + with op.batch_alter_table('sample_attribute', schema=None) as batch_op: + batch_op.create_index('sample_id', ['sample_id', 'attribute_name', 'attribute_value'], unique=False) + batch_op.create_unique_constraint('sample_id', ['sample_id', 'attribute_name', 'attribute_value']) + + with op.batch_alter_table('run', schema=None) as batch_op: + batch_op.create_index('experiment_id', ['experiment_id', 'seqrun_id', 'lane_number'], unique=False) + batch_op.create_unique_constraint('experiment_id', ['experiment_id', 'seqrun_id', 'lane_number']) + + with op.batch_alter_table('raw_seqrun', schema=None) as batch_op: + batch_op.drop_column('run_config') + batch_op.drop_column('trigger_time') + batch_op.drop_column('mismatches') + + with op.batch_alter_table('raw_analysis', schema=None) as batch_op: + batch_op.add_column(sa.Column('analysis_tag', mysql.VARCHAR(length=50), nullable=False)) + batch_op.drop_constraint(None, type_='foreignkey') + batch_op.drop_constraint(None, type_='foreignkey') + batch_op.drop_constraint(None, type_='unique') + batch_op.create_index('analysis_tag', ['analysis_tag'], unique=False) + batch_op.alter_column('report', + existing_type=LONGTEXTType(), + type_=mysql.TEXT(), + existing_nullable=True) + batch_op.alter_column('analysis_yaml', + existing_type=mysql.LONGTEXT(), + nullable=False) + batch_op.drop_column('analysis_name') + batch_op.drop_column('pipeline_id') + batch_op.drop_column('project_id') + + with op.batch_alter_table('project_seqrun_info_data', schema=None) as batch_op: + batch_op.create_index('project_id', ['project_id', 'seqrun_id', 'lane_number', 'index_group_tag'], unique=False) + batch_op.create_unique_constraint('project_id', ['project_id', 'seqrun_id', 'lane_number', 'index_group_tag']) + + with op.batch_alter_table('project_analysis_info_data', schema=None) as batch_op: + batch_op.create_index('project_id', ['project_id', 'analysis_id'], unique=False) + batch_op.create_unique_constraint('project_id', ['project_id', 'analysis_id']) + + with op.batch_alter_table('pre_demultiplexing_data', schema=None) as batch_op: + batch_op.drop_column('status') + batch_op.drop_column('file_path') + + with op.batch_alter_table('experiment', schema=None) as batch_op: + batch_op.create_index('sample_id', ['sample_id', 'library_name', 'platform_name'], unique=False) + batch_op.create_unique_constraint('sample_id', ['sample_id', 'library_name', 'platform_name']) + + with op.batch_alter_table('ab_user_role', schema=None) as batch_op: + batch_op.create_index('user_id', ['user_id', 'role_id'], unique=False) + batch_op.create_unique_constraint('user_id', ['user_id', 'role_id']) + + # ### end Alembic commands ### diff --git a/nginx_template.conf b/nginx_template.conf index 78e985f..a4ef719 100644 --- a/nginx_template.conf +++ b/nginx_template.conf @@ -21,6 +21,8 @@ http { ssl_session_cache shared:SSL:10m; ssl_session_timeout 10m; + client_max_body_size 50M; + upstream app_server { server webserver:8080 fail_timeout=0; } @@ -33,9 +35,10 @@ http { server { # Redirect http to https - listen 80 default_server; + #listen 80 default_server; + listen 80; server_name localhost; - return 301 https://localhost$request_uri; + return 301 https://$server_name$request_uri; } server { @@ -44,6 +47,9 @@ http { ssl_certificate_key /SSL/key.pem; keepalive_timeout 70s; server_name localhost; + + client_max_body_size 50M; + location / { # checks for static file, if not found proxy to app try_files $uri @proxy_to_app; diff --git a/requirements.txt b/requirements.txt index d04a62a..9e6de89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,82 +1,36 @@ -amqp==5.0.7 apache-airflow-client @ git+https://github.com/apache/airflow-client-python.git@99b3d15620e543cf7b01da8f076bb78ae3eb9b71 -apispec==3.3.2 -attrs==21.2.0 -Babel==2.9.1 -billiard==3.6.4.0 -Brotli==1.0.9 -cached-property==1.5.2 -celery==5.2.1 -certifi==2021.10.8 -charset-normalizer==2.0.9 -click==8.0.0 -click-didyoumean==0.3.0 -click-plugins==1.1.1 -click-repl==0.2.0 -colorama==0.4.4 -dash==2.0.0 -dash-core-components==2.0.0 -dash-html-components==2.0.0 -dash-table==5.0.0 -defusedxml==0.7.1 -Deprecated==1.2.13 -dnspython==2.1.0 -email-validator==1.1.3 -Flask==1.1.2 -Flask-AppBuilder==3.4.1 -Flask-Babel==2.0.0 -Flask-Compress==1.10.1 -Flask-JWT-Extended==3.25.1 -Flask-Login==0.4.1 -Flask-OpenID==1.3.0 +celery==5.2.7 +coverage==7.0.3 +Flask==2.2.2 +Flask-AppBuilder==4.1.6 +Flask-Caching==2.0.1 +Flask-Login==0.6.2 Flask-SQLAlchemy==2.5.1 -Flask-WTF==0.14.3 -flower==1.0.0 +Flask-WTF==1.0.1 +Flask-Migrate==4.0.4 +flower==1.2.0 gunicorn==20.1.0 -humanize==3.13.1 -idna==3.3 -importlib-metadata==4.9.0 -itsdangerous==1.1.0 -Jinja2==2.11.3 -jsonschema==3.2.0 -kombu==5.2.2 -lxml==4.7.1 -MarkupSafe==2.0.1 -marshmallow==3.14.1 -marshmallow-enum==1.5.1 -marshmallow-sqlalchemy==0.26.1 -mongoengine==0.23.1 -numpy==1.21.5 -pandas==1.3.5 +gviz-api==1.10.0 +Jinja2==3.1.2 +jsonschema==4.17.3 +lxml==4.9.2 +numpy==1.24.0 +pandas==1.5.2 pandas-datareader==0.10.0 Pillow==8.4.0 -plotly==5.5.0 -prison==0.2.1 -prometheus-client==0.12.0 -prompt-toolkit==3.0.24 -PyJWT==1.7.1 -pymongo==3.12.3 +plotly==5.11.0 PyMySQL==1.0.2 -pyOpenSSL==22.0.0 -pyrsistent==0.18.0 -pytest==6.2.5 +pyOpenSSL==22.1.0 +pytest==7.2.0 +pytest-cov==4.0.0 python-dateutil==2.8.2 -python3-openid==3.2.0 -pytz==2021.3 PyYAML==6.0 -redis==3.5.3 -requests==2.26.0 -six==1.16.0 -SQLAlchemy==1.3.24 -SQLAlchemy-Utils==0.37.9 -tenacity==8.0.1 -tornado==6.1 -tox==3.24.5 -typing_extensions==4.0.1 -urllib3==1.26.7 -vine==5.0.0 -wcwidth==0.2.5 -Werkzeug==1.0.1 -wrapt==1.13.3 -WTForms==2.3.3 -zipp==3.6.0 +redis==4.4.0 +requests==2.28.1 +SQLAlchemy==1.4.45 +SQLAlchemy-Utils==0.39.0 +tox==4.2.0 +typing_extensions==4.4.0 +WTForms==3.0.1 +WTForms-SQLAlchemy==0.3 +Werkzeug==2.2.2 diff --git a/setup.py b/setup.py index d943bb1..ee46dcc 100644 --- a/setup.py +++ b/setup.py @@ -6,5 +6,6 @@ tests_require = ["tox"] setup( + packages=['app',], test_suite = "tests.get_tests" ) \ No newline at end of file diff --git a/static/predemult/bclconvert_report_v0.03.html b/static/predemult/bclconvert_report_v0.03.html new file mode 100644 index 0000000..d17837d --- /dev/null +++ b/static/predemult/bclconvert_report_v0.03.html @@ -0,0 +1,15844 @@ + + + + + +bclconvert_report_v0.03 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/__init__.py b/tests/__init__.py index c926310..e69de29 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,31 +0,0 @@ -import unittest - -def get_tests(): - return full_suite() - -def full_suite(): - # import statements - from .test_apis import TestApiCase - from .test_pre_demultiplexing_view import TestPreDemultView - from .test_samplesheet_util import TestSampleSheetUtil, TestSampleSheetDbUpdate - from .test_raw_metadata_util import TestMetaDataValidation1, TestMetaDataValidation2, TestMetadataApiutil1 - from .test_metadata_util import TestMetadataUtil1, TestMetadataUtil2, TestMetadataUtil3 - from .test_admin_home_utils import TestAdminHomeUtil1 - from .test_rawseqrun_utils import TestRawSeqrunA - return unittest.TestSuite([ - unittest.TestLoader().loadTestsFromTestCase(t) - for t in [ - TestApiCase, - TestPreDemultView, - TestSampleSheetUtil, - TestSampleSheetDbUpdate, - TestMetaDataValidation1, - TestMetaDataValidation2, - TestMetadataApiutil1, - TestMetadataUtil1, - TestMetadataUtil2, - TestMetadataUtil3, - TestAdminHomeUtil1, - TestRawSeqrunA - ] - ]) \ No newline at end of file diff --git a/tests/config_test.py b/tests/config_test.py new file mode 100644 index 0000000..c7fd31f --- /dev/null +++ b/tests/config_test.py @@ -0,0 +1,16 @@ +import os + +TESTING = True +CSRF_ENABLED = False +SECRET_KEY = "thisismyscretkey" +SQLALCHEMY_TRACK_MODIFICATIONS = False +WTF_CSRF_ENABLED = False +AUTH_ROLE_ADMIN = "Admin" +AUTH_USER_REGISTRATION_ROLE = "Admin" +REPORT_UPLOAD_PATH = "/tmp" +SQLALCHEMY_DATABASE_URI = \ + os.environ.get("SQLALCHEMY_DATABASE_URI", "sqlite:///" + '/tmp/app.db') +AUTH_ROLES_MAPPING = { + "User": ["User"], + "Admin": ["admin"], +} \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2cc6cc1 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,216 @@ +import pytest +from app import app, appbuilder + +# pytest_plugins = ("celery.contrib.pytest", ) + +@pytest.fixture(scope="function") +def db(): + from app import db + db.drop_all() + db.create_all() + yield db + +@pytest.fixture(scope="function") +def test_client(db): + app.config.update({ + "TESTING": True, + "CSRF_ENABLED": False, + "SQLALCHEMY_TRACK_MODIFICATIONS": False, + "WTF_CSRF_ENABLED": False + }) + #db.create_all() + admin_role = \ + app.appbuilder.sm.find_role("Admin") + if admin_role is None: + admin_role = app.appbuilder.sm.add_role("Admin") + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_show", + "HomeView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "HomeView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_admin_home", + "HomeView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_general", + "HomeView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "HomeView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "IlluminaInteropDataView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "PreDeMultiplexingDataView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "RawSeqrunView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "trigger_pre_demultiplexing", + "RawSeqrunView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "RawMetadataValidationView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "RawMetadataSubmitView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "ProjectIndexView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "SampleIndexView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "RawAnalysisView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "RDSProjectBackupView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "AnalysisView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "ProjectView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "UserView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "SeqrunView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "SampleSheetView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_list", + "SampleProjectView")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_get", "api")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_post", "api")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_search_new_analysis", "RawAnalysisApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_get_raw_analysis_data", "RawAnalysisApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_mark_analysis_synched", "RawAnalysisApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_mark_analysis_rejected", "RawAnalysisApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_search_metadata", "RawMetadataDataApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_add_raw_metadata", "RawMetadataDataApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_download_ready_metadata", "RawMetadataDataApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_mark_ready_metadata_as_synced", "RawMetadataDataApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_add_new_seqrun", "RawSeqrunApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_search_run_samplesheet", "RawSeqrunApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_get_run_override_cycle", "RawSeqrunApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_get_samplesheet_id", "RawSeqrunApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_update_admin_view_data", "AdminHomeApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_submit_cleanup_job", "MetadataLoadApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_add_report", "PreDeMultiplexingDataApi")) + app.appbuilder.sm.add_permission_role( + admin_role, + app.appbuilder.sm.add_permission_view_menu( + "can_add_report", "SeqrunInteropApi")) + user = \ + app.appbuilder.sm.find_user(email="admin@fab.org") + if user is None: + app.appbuilder.sm.add_user( + "admin", + "admin", + "user", + "admin@fab.org", + [admin_role], + "password") + with app.test_client() as testing_client: + with app.app_context(): + # db.init_app(app) + yield testing_client diff --git a/tests/test_admin_home_api.py b/tests/test_admin_home_api.py new file mode 100644 index 0000000..bbb1222 --- /dev/null +++ b/tests/test_admin_home_api.py @@ -0,0 +1,120 @@ +from app.admin_home_api import async_parse_and_add_new_admin_view_data +from app.models import AdminHomeData +import os, json +from io import BytesIO +from unittest.mock import patch +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_admin_home_api1(db, test_client): + results = \ + db.session.\ + query(AdminHomeData).\ + filter(AdminHomeData.admin_data_tag=='test').\ + one_or_none() + assert results is None + json_data = { + 'admin_data_tag': 'test', + 'recent_finished_runs': 1, + 'recent_finished_analysis': 1, + 'ongoing_runs': 1, + 'ongoing_analysis': 1, + 'sequence_counts_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]}, + 'storage_stat_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]} + } + json_data = \ + json.dumps(json_data) + res = \ + test_client.post( + '/api/v1/admin_home/update_admin_view_data', + data=dict(file=(BytesIO(json_data.encode()), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code != 200 + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + json_data = { + 'admin_data_tag': 'test', + 'recent_finished_runs': 1, + 'recent_finished_analysis': 1, + 'ongoing_runs': 1, + 'ongoing_analysis': 1, + 'sequence_counts_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]}, + 'storage_stat_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]} + } + json_data = \ + json.dumps(json_data) + res = \ + test_client.post( + '/api/v1/admin_home/update_admin_view_data', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(json_data.encode()), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + assert json.loads(res.data.decode('utf-8')).get("message") == 'loaded new data' + + +def test_async_parse_and_add_new_admin_view_data(db, tmp_path): + json_data = { + 'admin_data_tag': 'test', + 'recent_finished_runs': 1, + 'recent_finished_analysis': 1, + 'ongoing_runs': 1, + 'ongoing_analysis': 1, + 'sequence_counts_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]}, + 'storage_stat_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]} + } + temp_json_file = \ + os.path.join( + tmp_path, + 'admin_home1.json') + with open(temp_json_file, 'w') as fp: + json.dump(json_data, fp) + _ = \ + async_parse_and_add_new_admin_view_data( + temp_json_file) + results = \ + db.session.\ + query(AdminHomeData).\ + filter(AdminHomeData.admin_data_tag=='test').\ + one_or_none() + assert results is not None + assert results.recent_finished_runs == 1 + assert isinstance(results.sequence_counts_plot, str) + json_data = { + 'admin_data_tag': 'test', + 'recent_finished_runs': 2, + 'recent_finished_analysis': 1, + 'ongoing_runs': 1, + 'ongoing_analysis': 1, + 'sequence_counts_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]}, + 'storage_stat_plot': {'labels': ['a', 'b', 'c'], 'datasets': [{'label': 'test1', 'data': [0, 1, 2]}]}} + temp_json_file = \ + os.path.join( + tmp_path, + 'admin_home2.json') + with open(temp_json_file, 'w') as fp: + json.dump(json_data, fp) + _ = \ + async_parse_and_add_new_admin_view_data( + temp_json_file) + results = \ + db.session.\ + query(AdminHomeData).\ + filter(AdminHomeData.admin_data_tag=='test').\ + one_or_none() + assert results is not None + assert results.recent_finished_runs == 2 \ No newline at end of file diff --git a/tests/test_admin_home_utils.py b/tests/test_admin_home_utils.py index 3233c50..d26c37c 100644 --- a/tests/test_admin_home_utils.py +++ b/tests/test_admin_home_utils.py @@ -1,16 +1,28 @@ import os, unittest, json, tempfile -from app import appbuilder, db +# from app import db, app from app.models import AdminHomeData from app.admin_home.admin_home_utils import parse_and_add_new_admin_view_data -class TestAdminHomeUtil1(unittest.TestCase): - def setUp(self): - db.create_all() +# class TestAdminHomeUtil1(unittest.TestCase): +# def setUp(self): +# # os.environ['SQLALCHEMY_DATABASE_URI'] = "sqlite:////tmp/app_admin_home.db" +# app.config.update({ +# "TESTING": True, +# "CSRF_ENABLED": False, +# # "SQLALCHEMY_DATABASE_URI": "sqlite:////tmp/app_admin_home.db", +# }) +# db.create_all() +# self.app_context = app.app_context() +# self.app_context.push() +# print(db.get_engine().url) - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.session.remove() +# self.app_context.pop() +# db.drop_all() - def test_parse_and_add_new_admin_view_data(self): +def test_parse_and_add_new_admin_view_data(db): + # def test_parse_and_add_new_admin_view_data(self): json_data = { 'admin_data_tag': 'test', 'recent_finished_runs': 1, @@ -33,9 +45,12 @@ def test_parse_and_add_new_admin_view_data(self): query(AdminHomeData).\ filter(AdminHomeData.admin_data_tag=='test').\ one_or_none() - self.assertIsNotNone(results) - self.assertEqual(results.recent_finished_runs, 1) - self.assertTrue(isinstance(results.sequence_counts_plot, str)) + # self.assertIsNotNone(results) + # self.assertEqual(results.recent_finished_runs, 1) + # self.assertTrue(isinstance(results.sequence_counts_plot, str)) + assert results is not None + assert results.recent_finished_runs == 1 + assert isinstance(results.sequence_counts_plot, str) json_data = { 'admin_data_tag': 'test', 'recent_finished_runs': 2, @@ -52,8 +67,10 @@ def test_parse_and_add_new_admin_view_data(self): query(AdminHomeData).\ filter(AdminHomeData.admin_data_tag=='test').\ one_or_none() - self.assertIsNotNone(results) - self.assertEqual(results.recent_finished_runs, 2) + # self.assertIsNotNone(results) + # self.assertEqual(results.recent_finished_runs, 2) + assert results is not None + assert results.recent_finished_runs == 2 -if __name__ == '__main__': - unittest.main() \ No newline at end of file +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_airflow_api_utils.py b/tests/test_airflow_api_utils.py new file mode 100644 index 0000000..3ecf4b4 --- /dev/null +++ b/tests/test_airflow_api_utils.py @@ -0,0 +1,89 @@ +import os +import json +import requests +import tempfile +from unittest.mock import MagicMock +from unittest.mock import patch +from app.airflow.airflow_api_utils import get_airflow_dag_id +from app.airflow.airflow_api_utils import post_to_airflow_api +from app.airflow.airflow_api_utils import trigger_airflow_pipeline + +@patch('app.airflow.airflow_api_utils.requests', return_value=requests.patch('https://httpbin.org/patch', data ={'key': 'value'}, headers={'Content-Type': 'application/json'})) +def test_get_airflow_dag_id(mock_object, tmp_path): + config_file_path = os.path.join(tmp_path, 'airflow_conf.json') + conf_data = { + "url": "https://airflow.test/api/v1/", + "username": "airflow", + "password": "airflow", + "de_multiplexing_test_barcode_dag": "dag23_test_bclconvert_demult", + "de_multiplexing_production_dag": "dag24_build_bclconvert_dynamic_dags", + "de_multiplexing_cleanup_dag": "dag27_cleanup_demultiplexing_output" + } + with open(config_file_path, "w") as fp: + json.dump(conf_data, fp) + assert os.path.exists(config_file_path) + dag_id = \ + get_airflow_dag_id( + airflow_conf_file=os.path.abspath(config_file_path), + dag_tag="de_multiplexing_test_barcode_dag") + assert dag_id == "dag23_test_bclconvert_demult" + dag_id = \ + get_airflow_dag_id( + airflow_conf_file=config_file_path, + dag_tag="no match") + assert dag_id is None + + +@patch('app.airflow.airflow_api_utils.requests', return_value=requests.patch('https://httpbin.org/patch', data=json.dumps({'key': 'value'}), headers={'Content-Type': 'application/json'})) +def test_post_to_airflow_api(mock_object, tmp_path): + config_file_path = os.path.join(tmp_path, 'airflow_conf.json') + conf_data = { + "url": "https://airflow.test/api/v1/", + "username": "airflow", + "password": "airflow", + "de_multiplexing_test_barcode_dag": "dag23_test_bclconvert_demult", + "de_multiplexing_production_dag": "dag24_build_bclconvert_dynamic_dags", + "de_multiplexing_cleanup_dag": "dag27_cleanup_demultiplexing_output" + } + with open(config_file_path, "w") as fp: + json.dump(conf_data, fp) + res = \ + post_to_airflow_api( + airflow_conf_file=config_file_path, + url_suffix="test", + data={"key": "val"}, + dry_run=True + ) + mock_object.post.assert_called_once() + mock_object.post.assert_called_with(url="https://airflow.test/api/v1/test", + data=json.dumps({"key": "val"}), + headers={"Content-Type": "application/json"}, + auth=("airflow", "airflow"), + verify=False) + + +@patch('app.airflow.airflow_api_utils.requests', return_value=requests.patch('https://httpbin.org/patch', data ={'key': 'value'}, headers={'Content-Type': 'application/json'})) +def test_trigger_airflow_pipeline(mock_object, tmp_path): + config_file_path = os.path.join(tmp_path, 'airflow_conf.json') + conf_data = { + "url": "https://airflow.test/api/v1/", + "username": "airflow", + "password": "airflow", + "de_multiplexing_test_barcode_dag": "dag23_test_bclconvert_demult", + "de_multiplexing_production_dag": "dag24_build_bclconvert_dynamic_dags", + "de_multiplexing_cleanup_dag": "dag27_cleanup_demultiplexing_output" + } + with open(config_file_path, "w") as fp: + json.dump(conf_data, fp) + res = \ + trigger_airflow_pipeline( + dag_id="dag23_test_bclconvert_demult", + conf_data={"key": "value"}, + airflow_conf_file=config_file_path, + dry_run=True) + mock_object.post.assert_called_once() + mock_object.post.assert_called_with(url="https://airflow.test/api/v1/dags/dag23_test_bclconvert_demult/dagRuns", + data=json.dumps({"conf": {"key": "value"}}), + headers={"Content-Type": "application/json"}, + auth=("airflow", "airflow"), + verify=False) diff --git a/tests/test_analysis_view.py b/tests/test_analysis_view.py new file mode 100644 index 0000000..8a75422 --- /dev/null +++ b/tests/test_analysis_view.py @@ -0,0 +1,167 @@ +import os +import requests +from app.models import Analysis +from app.models import Pipeline_seed +from app.models import Pipeline +from app.models import Project +from unittest.mock import patch +from app.analysis_view import get_analysis_pipeline_seed_status +from app.analysis_view import async_submit_analysis_pipeline + +def test_get_analysis_pipeline_seed_status(db): + project1 = \ + Project( + project_igf_id='project1') + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + pipeline2 = \ + Pipeline( + pipeline_name='pipeline2', + pipeline_db='test', + pipeline_type='AIRFLOW') + analysis1 = \ + Analysis( + project=project1, + analysis_name='analysis1', + analysis_type='pipeline1', + analysis_description='test' + ) + analysis2 = \ + Analysis( + project=project1, + analysis_name='analysis2', + analysis_type='pipeline1', + analysis_description='test' + ) + analysis3 = \ + Analysis( + project=project1, + analysis_name='analysis3', + analysis_type='pipeline2', + analysis_description='test' + ) + try: + db.session.add(project1) + db.session.add(pipeline1) + db.session.add(pipeline2) + db.session.add(analysis1) + db.session.add(analysis2) + db.session.add(analysis3) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + pipeline_seed1 = \ + Pipeline_seed( + seed_id=analysis1.analysis_id, + seed_table='analysis', + pipeline=pipeline1, + status='SEEDED' + ) + pipeline_seed2 = \ + Pipeline_seed( + seed_id=analysis2.analysis_id, + seed_table='analysis', + pipeline=pipeline1, + status='FINISHED' + ) + try: + db.session.add(pipeline_seed1) + db.session.add(pipeline_seed2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + assert pipeline_seed1.seed_id != pipeline_seed2.seed_id + assert pipeline_seed2.status == 'FINISHED' + status1 = \ + get_analysis_pipeline_seed_status( + analysis_id=analysis1.analysis_id) + assert status1 == 'VALID' + status2 = \ + get_analysis_pipeline_seed_status( + analysis_id=analysis2.analysis_id) + assert status2 == 'INVALID' + status3 = \ + get_analysis_pipeline_seed_status( + analysis_id=analysis2.analysis_id) + assert status3 == 'INVALID' + +@patch('app.analysis_view.trigger_airflow_pipeline', return_value=requests.patch('https://httpbin.org/patch', data ={'key': 'value'}, headers={'Content-Type': 'application/json'})) +def test_async_submit_analysis_pipeline(mock_object, db): + project1 = \ + Project( + project_igf_id='project1') + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + pipeline2 = \ + Pipeline( + pipeline_name='pipeline2', + pipeline_db='test', + pipeline_type='AIRFLOW') + analysis1 = \ + Analysis( + project=project1, + analysis_name='analysis1', + analysis_type='pipeline1', + analysis_description='test' + ) + analysis2 = \ + Analysis( + project=project1, + analysis_name='analysis2', + analysis_type='pipeline1', + analysis_description='test' + ) + analysis3 = \ + Analysis( + project=project1, + analysis_name='analysis3', + analysis_type='pipeline2', + analysis_description='test' + ) + try: + db.session.add(project1) + db.session.add(pipeline1) + db.session.add(pipeline2) + db.session.add(analysis1) + db.session.add(analysis2) + db.session.add(analysis3) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + pipeline_seed1 = \ + Pipeline_seed( + seed_id=analysis1.analysis_id, + seed_table='analysis', + pipeline=pipeline1, + status='SEEDED' + ) + pipeline_seed2 = \ + Pipeline_seed( + seed_id=analysis2.analysis_id, + seed_table='analysis', + pipeline=pipeline1, + status='FINISHED' + ) + try: + db.session.add(pipeline_seed1) + db.session.add(pipeline_seed2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + os.environ['AIRFLOW_CONF_FILE'] = '/tmp/' + result = async_submit_analysis_pipeline([analysis1.analysis_id]) + assert analysis1.analysis_id in result diff --git a/tests/test_apis.py b/tests/test_apis.py index 04ba28a..a9bb7e9 100644 --- a/tests/test_apis.py +++ b/tests/test_apis.py @@ -1,125 +1,138 @@ -import unittest, json -from app import appbuilder, db -from app.interop_data_api import search_interop_for_run -from app.interop_data_api import add_interop_data -from app.interop_data_api import edit_interop_data -from app.interop_data_api import add_or_edit_interop_data -from app.pre_demultiplexing_data_api import search_predemultiplexing_data -from app.pre_demultiplexing_data_api import add_predemultiplexing_data -from app.pre_demultiplexing_data_api import edit_predemultiplexing_data -from app.pre_demultiplexing_data_api import add_or_edit_predemultiplexing_data +# import os, unittest, json +# from app import db +# # from app.interop_data_api import search_interop_for_run +# # from app.interop_data_api import add_interop_data +# # from app.interop_data_api import edit_interop_data +# # from app.interop_data_api import add_or_edit_interop_data +# # from app.pre_demultiplexing_data_api import search_predemultiplexing_data +# # from app.pre_demultiplexing_data_api import add_predemultiplexing_data +# # from app.pre_demultiplexing_data_api import edit_predemultiplexing_data +# # from app.pre_demultiplexing_data_api import add_or_edit_predemultiplexing_data -class TestApiCase(unittest.TestCase): - def setUp(self): - db.create_all() - self.json_file = "data/interop_example.json" - self.demult_file = "data/demultiplexing_example.json" +# class TestApiCase(unittest.TestCase): +# def setUp(self): +# # os.environ['SQLALCHEMY_DATABASE_URI'] = "sqlite:////tmp/app_api.db" +# # from app import app +# # app.config.update({ +# # "TESTING": True, +# # "CSRF_ENABLED": False, +# # # "SQLALCHEMY_DATABASE_URI": "sqlite:////tmp/app_api.db", +# # }) +# # self.app_context = app.app_context() +# # self.app_context.push() +# db.create_all() +# print(db.get_engine().url) +# self.json_file = "data/interop_example.json" +# self.demult_file = "data/demultiplexing_example.json", - def tearDown(self): - db.drop_all() - def test_search_interop_for_run(self): - result = \ - search_interop_for_run(run_name='AAAA') - self.assertTrue(result is None) +# def tearDown(self): +# db.session.remove() +# # self.app_context.pop() +# db.drop_all() - def test_add_interop_data(self): - with open(self.json_file, 'r') as fp: - json_data = json.load(fp) - run_name = json_data.get("run_name") - result = \ - search_interop_for_run(run_name=run_name) - self.assertTrue(result is None) - add_interop_data(run_data=json_data) - result = \ - search_interop_for_run(run_name=run_name) - self.assertTrue(result is not None) +# def test_search_interop_for_run(self): +# result = \ +# search_interop_for_run(run_name='AAAA') +# self.assertTrue(result is None) - def test_edit_interop_data(self): - with open(self.json_file, 'r') as fp: - json_data = json.load(fp) - run_name = json_data.get("run_name") - add_interop_data(run_data=json_data) - json_data['table_data'] = "AAAAA" - edit_interop_data(run_data=json_data) - result = \ - search_interop_for_run(run_name=run_name) - self.assertEqual(result.table_data, "AAAAA") +# # def test_add_interop_data(self): +# # with open(self.json_file, 'r') as fp: +# # json_data = json.load(fp) +# # run_name = json_data.get("run_name") +# # result = \ +# # search_interop_for_run(run_name=run_name) +# # self.assertTrue(result is None) +# # add_interop_data(run_data=json_data) +# # result = \ +# # search_interop_for_run(run_name=run_name) +# # self.assertTrue(result is not None) - def test_add_or_edit_interop_data(self): - with open(self.json_file, 'r') as fp: - json_data = json.load(fp) - run_name = json_data.get("run_name") - add_or_edit_interop_data(run_data=json_data) - result = \ - search_interop_for_run(run_name=run_name) - self.assertTrue(result is not None) - json_data['table_data'] = "AAAAA" - add_or_edit_interop_data(run_data=json_data) - result = \ - search_interop_for_run(run_name=run_name) - self.assertEqual(result.table_data, "AAAAA") +# def test_edit_interop_data(self): +# with open(self.json_file, 'r') as fp: +# json_data = json.load(fp) +# run_name = json_data.get("run_name") +# add_interop_data(run_data=json_data) +# json_data['table_data'] = "AAAAA" +# edit_interop_data(run_data=json_data) +# result = \ +# search_interop_for_run(run_name=run_name) +# self.assertEqual(result.table_data, "AAAAA") - def test_search_predemultiplexing_data(self): - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is None) +# def test_add_or_edit_interop_data(self): +# with open(self.json_file, 'r') as fp: +# json_data = json.load(fp) +# run_name = json_data.get("run_name") +# add_or_edit_interop_data(run_data=json_data) +# result = \ +# search_interop_for_run(run_name=run_name) +# self.assertTrue(result is not None) +# json_data['table_data'] = "AAAAA" +# add_or_edit_interop_data(run_data=json_data) +# result = \ +# search_interop_for_run(run_name=run_name) +# self.assertEqual(result.table_data, "AAAAA") - def test_add_predemultiplexing_data(self): - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is None) - with open(self.demult_file, 'r') as fp: - json_data = json.load(fp) - add_predemultiplexing_data(data=json_data) - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is not None) +# # def test_search_predemultiplexing_data(self): +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertTrue(result is None) - def test_edit_predemultiplexing_data(self): - with open(self.demult_file, 'r') as fp: - json_data = json.load(fp) - add_predemultiplexing_data(data=json_data) - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is not None) - json_data["flowcell_cluster_plot"] = "CCCC" - edit_predemultiplexing_data(data=json_data) - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertEqual(result.flowcell_cluster_plot, "CCCC") +# # def test_add_predemultiplexing_data(self): +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertTrue(result is None) +# # with open(self.demult_file, 'r') as fp: +# # json_data = json.load(fp) +# # add_predemultiplexing_data(data=json_data) +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertTrue(result is not None) - def test_add_or_edit_predemultiplexing_data(self): - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is None) - with open(self.demult_file, 'r') as fp: - json_data = json.load(fp) - add_or_edit_predemultiplexing_data(data=json_data) - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is not None) - json_data["flowcell_cluster_plot"] = "CCCC" - add_or_edit_predemultiplexing_data(data=json_data) - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertEqual(result.flowcell_cluster_plot, "CCCC") +# # def test_edit_predemultiplexing_data(self): +# # with open(self.demult_file, 'r') as fp: +# # json_data = json.load(fp) +# # add_predemultiplexing_data(data=json_data) +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertTrue(result is not None) +# # json_data["flowcell_cluster_plot"] = "CCCC" +# # edit_predemultiplexing_data(data=json_data) +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertEqual(result.flowcell_cluster_plot, "CCCC") -if __name__ == '__main__': - unittest.main() \ No newline at end of file +# # def test_add_or_edit_predemultiplexing_data(self): +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertTrue(result is None) +# # with open(self.demult_file, 'r') as fp: +# # json_data = json.load(fp) +# # add_or_edit_predemultiplexing_data(data=json_data) +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertTrue(result is not None) +# # json_data["flowcell_cluster_plot"] = "CCCC" +# # add_or_edit_predemultiplexing_data(data=json_data) +# # result = \ +# # search_predemultiplexing_data( +# # run_name="AAAA", +# # samplesheet_tag="BBBB") +# # self.assertEqual(result.flowcell_cluster_plot, "CCCC") + +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_app.py b/tests/test_app.py deleted file mode 100644 index 09e558d..0000000 --- a/tests/test_app.py +++ /dev/null @@ -1,36 +0,0 @@ -import os, unittest, tempfile -from flask.testing import FlaskClient -from app import app, db, appbuilder -from flask import Flask, g, url_for - - -class TestCase1(unittest.TestCase): - def setUp(self): - app.app_context().push() - app.config.from_object("flask_appbuilder.tests.config_api") - db.create_all() - self.client = app.test_client(use_cookies=True) - app.appbuilder.sm.add_user( - "admin", - "admin", - "user", - "admin@fab.org", - "Admin", - "password") - - - def tearDown(self): - self.appbuilder = None - db.drop_all() - - def test_access_server(self): - uri = "/" - rv = self.client.get(uri) - #self.assertEqual(rv.status_code, 200) - print(rv.status_code) - - - - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/tests/test_app_view.py b/tests/test_app_view.py new file mode 100644 index 0000000..8d8965b --- /dev/null +++ b/tests/test_app_view.py @@ -0,0 +1,312 @@ +def test_admin_view(test_client): + rv = test_client.get('/admin_home') + assert rv.status_code == 302 + rv = test_client.get('/illuminainteropdataview/list/') + assert rv.status_code == 302 + rv = test_client.get('/predemultiplexingdataview/list/') + assert rv.status_code == 302 + rv = test_client.get('/samplesheetview/list/') + assert rv.status_code == 302 + rv = test_client.get('/rawseqrunview/list/') + assert rv.status_code == 302 + rv = test_client.get('/rawmetadatavalidationview/list/') + assert rv.status_code == 302 + rv = test_client.get('/rawmetadatasubmitview/list/') + assert rv.status_code == 302 + rv = test_client.get('/projectindexview/list/') + assert rv.status_code == 302 + rv = test_client.get('/sampleindexview/list/') + assert rv.status_code == 302 + rv = test_client.get('/rawanalysisview/list/') + assert rv.status_code == 302 + rv = test_client.get('/analysisview/list/') + assert rv.status_code == 302 + rv = test_client.get('/rdsprojectbackupview/list/') + assert rv.status_code == 302 + rv = test_client.get('/projectview/list/') + assert rv.status_code == 302 + rv = test_client.get('/sampleprojectview/list/') + assert rv.status_code == 302 + rv = test_client.get('/userview/list/') + assert rv.status_code == 302 + rv = test_client.get('/seqrunview/list/') + assert rv.status_code == 302 + rv = test_client.post("/login/", data=dict( + username='admin', + password='password' + ), follow_redirects=True) + assert rv.status_code == 200 + with test_client.session_transaction() as session: + session['user_id'] = 1 + session['_fresh'] = True + rv = test_client.get('/admin_home') + assert rv.status_code == 200 + rv = test_client.get('/illuminainteropdataview/list/') + assert rv.status_code == 200 + rv = test_client.get('/predemultiplexingdataview/list/') + assert rv.status_code == 200 + rv = test_client.get('/samplesheetview/list/') + assert rv.status_code == 200 + rv = test_client.get('/rawseqrunview/list/') + assert rv.status_code == 200 + rv = test_client.get('/rawmetadatavalidationview/list/') + assert rv.status_code == 200 + rv = test_client.get('/rawmetadatasubmitview/list/') + assert rv.status_code == 200 + rv = test_client.get('/projectindexview/list/') + assert rv.status_code == 200 + rv = test_client.get('/sampleindexview/list/') + assert rv.status_code == 200 + rv = test_client.get('/rawanalysisview/list/') + assert rv.status_code == 200 + rv = test_client.get('/analysisview/list/') + assert rv.status_code == 200 + rv = test_client.get('/rdsprojectbackupview/list/') + assert rv.status_code == 200 + rv = test_client.get('/projectview/list/') + assert rv.status_code == 200 + rv = test_client.get('/sampleprojectview/list/') + assert rv.status_code == 200 + rv = test_client.get('/userview/list/') + assert rv.status_code == 200 + rv = test_client.get('/seqrunview/list/') + assert rv.status_code == 200 + +# def test_view(test_client): +# rv = test_client.get('/admin_home') +# assert rv.status_code == 302 +# rv = test_client.get('/illuminainteropdataview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/predemultiplexingdataview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/samplesheetview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/rawseqrunview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/rawmetadatavalidationview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/rawmetadatasubmitview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/projectindexview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/sampleindexview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/rawanalysisview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/analysisview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/rdsprojectbackupview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/projectview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/sampleprojectview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/userview/list/') +# assert rv.status_code == 302 +# rv = test_client.get('/seqrunview/list/') +# assert rv.status_code == 302 +# rv = test_client.post("/login", data=dict( +# username='admin', +# password='password' +# ), follow_redirects=True) +# assert rv.status_code == 200 + # rv1 = test_client.get('/admin_home') + # assert rv1.status_code == 200 + # rv = test_client.get('/illuminainteropdataview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/predemultiplexingdataview/list/') + # assert rv.status_code == 201 + # rv = test_client.get('/samplesheetview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/rawseqrunview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/rawmetadatavalidationview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/rawmetadatasubmitview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/projectindexview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/sampleindexview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/rawanalysisview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/analysisview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/rdsprojectbackupview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/projectview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/sampleprojectview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/userview/list/') + # assert rv.status_code == 200 + # rv = test_client.get('/seqrunview/list/') + # assert rv.status_code == 200 + + +# import os, unittest, time +# from flask_appbuilder import AppBuilder +# from app import db, app, appbuilder + +# class TestApp1(unittest.TestCase): +# def setUp(self): +# app.config.update({ +# "TESTING": True, +# "CSRF_ENABLED": False, +# "SECRET_KEY": "thisismyscretkey", +# "SQLALCHEMY_TRACK_MODIFICATIONS": False, +# "WTF_CSRF_ENABLED": False, +# "AUTH_ROLE_ADMIN": 'Admin', +# "AUTH_USER_REGISTRATION_ROLE": "Admin", +# "AUTH_ROLES_MAPPING": { +# "User": ["User"], +# "Admin": ["admin"], +# }, +# "SQLALCHEMY_DATABASE_URI": "sqlite:////tmp/app2.db", +# }) +# print(db.get_engine().url) +# db.create_all() +# print(db.session.is_active) +# # db.drop_all() +# #appbuilder#.init_app(app, db.session) +# # db.create_all() +# # app +# print(db.get_engine().url) +# role_admin = \ +# app.appbuilder.sm.find_role("Admin") +# print(role_admin) +# self.assertIsNotNone(role_admin) +# user = app.appbuilder.sm.find_user(email="admin@fab.org") +# if user is None: +# app.appbuilder.sm.add_user( +# "admin", +# "admin", +# "user", +# "admin@fab.org", +# role_admin, +# "password") +# self.app_context = app.app_context() +# self.app_context.push() +# self.client = app.test_client(use_cookies=False) + + +# def tearDown(self): +# db.session.remove() +# self.app_context.pop() +# db.drop_all() +# if os.path.exists("/tmp/app2.db"): +# os.remove("/tmp/app2.db") + + +# def test_server_access_without_login(self): +# rv2 = self.client.get('/admin_home') +# self.assertEqual(rv2.status_code, 302) +# rv3 = self.client.get('/illuminainteropdataview/list/') +# self.assertEqual(rv3.status_code, 302) +# rv4 = self.client.get('/predemultiplexingdataview/list/') +# self.assertEqual(rv4.status_code, 302) +# rv5 = self.client.get('/samplesheetview/list/') +# self.assertEqual(rv5.status_code, 302) +# rv6 = self.client.get('/rawseqrunview/list/') +# self.assertEqual(rv6.status_code, 302) +# rv7 = self.client.get('/rawmetadatavalidationview/list/') +# self.assertEqual(rv7.status_code, 302) +# rv8 = self.client.get('/rawmetadatasubmitview/list/') +# self.assertEqual(rv8.status_code, 302) +# rv9 = self.client.get('/projectindexview/list/') +# self.assertEqual(rv9.status_code, 302) +# rv10 = self.client.get('/sampleindexview/list/') +# self.assertEqual(rv10.status_code, 302) +# rv11 = self.client.get('/rawanalysisview/list/') +# self.assertEqual(rv11.status_code, 302) +# rv12 = self.client.get('/analysisview/list/') +# self.assertEqual(rv12.status_code, 302) +# rv13 = self.client.get('/rdsprojectbackupview/list/') +# self.assertEqual(rv13.status_code, 302) +# rv14 = self.client.get('/projectview/list/') +# self.assertEqual(rv14.status_code, 302) +# rv15 = self.client.get('/sampleprojectview/list/') +# self.assertEqual(rv15.status_code, 302) +# rv16 = self.client.get('/userview/list/') +# self.assertEqual(rv16.status_code, 302) +# rv17 = self.client.get('/seqrunview/list/') +# self.assertEqual(rv17.status_code, 302) +# rv8 = self.client.post("/login", data=dict( +# username='admin', +# password='password' +# ), follow_redirects=True) +# self.assertEqual(rv8.status_code, 200) +# rv9 = self.client.get('/admin_home') +# self.assertEqual(rv9.status_code, 200) +# rv10 = self.client.get('/illuminainteropdataview/list/') +# self.assertEqual(rv10.status_code, 200) + +# class TestApp2(unittest.TestCase): +# def setUp(self): +# db.drop_all() +# os.environ['SQLALCHEMY_DATABASE_URI'] = "sqlite:////tmp/app4.db" +# app.config.update({ +# "TESTING": True, +# "CSRF_ENABLED": False, +# }) +# self.app_context = app.app_context() +# self.app_context.push() +# db.create_all() +# app.config.from_object("flask_appbuilder.tests.config_api") +# self.client = app.test_client(use_cookies=True) +# role_admin = \ +# app.appbuilder.sm.find_role( +# app.appbuilder.sm.auth_role_admin) +# app.appbuilder.sm.add_user( +# "admin", +# "admin", +# "user", +# "admin@fab.org", +# role_admin, +# "password") + + +# def tearDown(self): +# db.session.remove() +# self.app_context.pop() +# db.drop_all() + +# def test_access_server(self): +# rv = self.client.post("/login", data=dict( +# username='admin', +# password='password' +# ), follow_redirects=True) +# self.assertEqual(rv.status_code, 200) +# rv2 = self.client.get('/admin_home') +# self.assertEqual(rv2.status_code, 200) +# rv3 = self.client.get('/illuminainteropdataview/list/') +# self.assertEqual(rv3.status_code, 200) +# rv5 = self.client.get('/samplesheetview/list/') +# self.assertEqual(rv5.status_code, 200) +# rv6 = self.client.get('/rawseqrunview/list/') +# self.assertEqual(rv6.status_code, 200) +# rv7 = self.client.get('/rawmetadatavalidationview/list/') +# self.assertEqual(rv7.status_code, 200) +# rv8 = self.client.get('/rawmetadatasubmitview/list/') +# self.assertEqual(rv8.status_code, 200) +# rv9 = self.client.get('/projectindexview/list/') +# self.assertEqual(rv9.status_code, 200) +# rv10 = self.client.get('/sampleindexview/list/') +# self.assertEqual(rv10.status_code, 200) +# rv11 = self.client.get('/rawanalysisview/list/') +# self.assertEqual(rv11.status_code, 200) +# rv12 = self.client.get('/analysisview/list/') +# self.assertEqual(rv12.status_code, 200) +# rv13 = self.client.get('/rdsprojectbackupview/list/') +# self.assertEqual(rv13.status_code, 200) +# rv14 = self.client.get('/projectview/list/') +# self.assertEqual(rv14.status_code, 200) +# rv15 = self.client.get('/sampleprojectview/list/') +# self.assertEqual(rv15.status_code, 200) +# rv16 = self.client.get('/userview/list/') +# self.assertEqual(rv16.status_code, 200) +# rv17 = self.client.get('/seqrunview/list/') +# self.assertEqual(rv17.status_code, 200) + +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_interop_api.py b/tests/test_interop_api.py new file mode 100644 index 0000000..997cbd3 --- /dev/null +++ b/tests/test_interop_api.py @@ -0,0 +1,85 @@ +import json +import os +import tempfile +from io import BytesIO +from app.models import IlluminaInteropData +from app.interop_data_api import ( + load_interop_report, + async_load_interop_report, + SeqrunInteropApi) +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_load_interop_report(db, tmp_path): + temp_report_dir = \ + tempfile.mkdtemp(dir=tmp_path) + temp_base_dir = \ + tempfile.mkdtemp(dir=tmp_path) + # Create a dummy report + temp_report_path = os.path.join(temp_report_dir, 'report.html') + with open(temp_report_path, 'w') as fp: + fp.write('

Its as test report

') + load_interop_report( + run_name='test1', + tag='test 1', + file_path=temp_report_path, + base_path=temp_base_dir) + # check if its loaded + record = db.session.query(IlluminaInteropData).filter_by(run_name='test1').first() + assert record is not None + assert record.run_name == 'test1' + assert record.tag == 'test 1' + assert os.path.basename(record.file_path) == 'report.html' + assert os.path.exists(record.file_path) + assert record.file_path != temp_report_path + assert temp_base_dir in record.file_path + +def test_async_load_interop_report(db, tmp_path): + temp_report_dir = \ + tempfile.mkdtemp(dir=tmp_path) + temp_base_dir = \ + tempfile.mkdtemp(dir=tmp_path) + # Create a dummy report + temp_report_path = os.path.join(temp_report_dir, 'report.html') + with open(temp_report_path, 'w') as fp: + fp.write('

Its as test report

') + async_load_interop_report( + run_name='test1', + tag='test 1', + file_path=temp_report_path, + base_path=temp_base_dir) + # check if its loaded + record = db.session.query(IlluminaInteropData).filter_by(run_name='test1').first() + assert record is not None + assert record.run_name == 'test1' + assert record.tag == 'test 1' + assert os.path.basename(record.file_path) == 'report.html' + assert os.path.exists(record.file_path) + assert record.file_path != temp_report_path + assert temp_base_dir in record.file_path + +def test_SeqrunInteropApi1(db, test_client, tmp_path): + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + report_file_data = \ + BytesIO(b'

Its as test report

') + res = \ + test_client.post( + '/api/v1/interop_data/add_report', + data=dict(file=(report_file_data, 'report.html'),run_name="test1",tag="test 1"), + headers={"Authorization": f"Bearer {token}"}, + content_type='multipart/form-data') + assert res.status_code == 200 + assert json.loads(res.data.decode('utf-8')).get("message") == 'successfully submitted interop report loading job for report.html' diff --git a/tests/test_metadata_api.py b/tests/test_metadata_api.py new file mode 100644 index 0000000..ccc329e --- /dev/null +++ b/tests/test_metadata_api.py @@ -0,0 +1,91 @@ +import os +import json +from io import BytesIO +from app.models import Project, Sample, IgfUser +from app.metadata_api import async_cleanup_and_load_new_data_to_metadata_tables +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_metadata_api1(test_client): + json_data = { + "project": [{ + "project_id": 1, + "project_igf_id": "test1"}]} + json_data = \ + json.dumps(json_data) + res = \ + test_client.post( + '/api/v1/metadata/load_metadata', + data=dict(file=(BytesIO(json_data.encode()), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code != 200 + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + json_data = { + "project": [{ + "project_id": 1, + "project_igf_id": "test1"}]} + json_data = \ + json.dumps(json_data) + res = \ + test_client.post( + '/api/v1/metadata/load_metadata', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(json_data.encode()), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + assert json.loads(res.data.decode('utf-8')).get("message") == 'successfully submitted metadata update job' + + + +def test_async_cleanup_and_load_new_data_to_metadata_tables(db, tmp_path): + project = \ + Project( + project_id=2, + project_igf_id="test1") + try: + db.session.add(project) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + result = \ + db.session.\ + query(Project).\ + filter(Project.project_igf_id=="test1").\ + one_or_none() + assert result is not None + assert result.project_id == 2 + json_data = { + "project": [{ + "project_id": 1, + "project_igf_id": "test1"}]} + temp_json_file = \ + os.path.join(tmp_path, 'metadata_db.json') + with open(temp_json_file, 'w') as fp: + json.dump(json_data, fp) + _ = \ + async_cleanup_and_load_new_data_to_metadata_tables( + temp_json_file) + result = \ + db.session.\ + query(Project).\ + filter(Project.project_igf_id=="test1").\ + one_or_none() + assert result is not None + assert result.project_id == 1 \ No newline at end of file diff --git a/tests/test_metadata_util.py b/tests/test_metadata_util.py index 549aa12..5d1bdaa 100644 --- a/tests/test_metadata_util.py +++ b/tests/test_metadata_util.py @@ -1,25 +1,205 @@ import os, unittest, json, tempfile -from app import appbuilder, db -from app.models import Project, Sample, IgfUser -from app.metadata.metadata_util import cleanup_and_load_new_data_to_metadata_tables -from app.metadata.metadata_util import check_for_projects_in_metadata_db -from app.metadata.metadata_util import check_sample_and_project_ids_in_metadata_db -from app.metadata.metadata_util import check_user_name_and_email_in_metadata_db +# from app import appbuilder, db +from app.models import ( + Project, + Sample, + IgfUser, + Pipeline, + Platform, + Seqrun, + Analysis, + RawAnalysis, + RawAnalysisValidationSchema, + RawAnalysisTemplate, + Project_info_data, + Project_seqrun_info_data, + Project_seqrun_info_file, + Project_analysis_info_data, + Project_analysis_info_file) +from app.metadata.metadata_util import ( + backup_specific_portal_tables, + cleanup_and_load_new_data_to_metadata_tables, + check_for_projects_in_metadata_db, + check_sample_and_project_ids_in_metadata_db, + check_user_name_and_email_in_metadata_db) -class TestMetadataUtil1(unittest.TestCase): - def setUp(self): - db.create_all() +def test_backup_specific_portal_tables(db, tmp_path): + project3 = \ + Project( + project_id=3, + project_igf_id="test3") + ## add raw analysis before loading new data + pipeline = \ + Pipeline( + pipeline_name="pipeline1", + pipeline_db="", + pipeline_type='AIRFLOW') + raw_analysis = \ + RawAnalysis( + analysis_name="analysis1", + analysis_yaml="test", + status="VALIDATED", + project=project3, + pipeline=pipeline) + try: + db.session.add(project3) + db.session.add(pipeline) + db.session.add(raw_analysis) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + result = \ + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.analysis_name=="analysis1").\ + one_or_none() + assert result is not None + assert result.raw_analysis_id == 1 + assert result.project_id == 3 + (_, json_file) = \ + tempfile.mkstemp( + dir=tmp_path, + suffix='.json', + prefix='portal_metadata_',) + assert os.path.exists(json_file) + json_file = \ + backup_specific_portal_tables(json_file) + assert os.path.exists(json_file) + with open(json_file, 'r') as fp: + json_data = json.load(fp) + assert 'raw_analysis' in json_data + raw_analysis_data = \ + json_data.get('raw_analysis') + assert isinstance(raw_analysis_data, list) + assert len(raw_analysis_data) == 1 + assert 'analysis_name' in raw_analysis_data[0] + assert raw_analysis_data[0].get('analysis_name') == 'analysis1' + result = \ + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.analysis_name=="analysis1").\ + one_or_none() + assert result is not None + assert result.raw_analysis_id == 1 + assert result.project_id == 3 - def tearDown(self): - db.drop_all() - def test_cleanup_and_load_new_data_to_metadata_tables(self): - project = \ +# class TestMetadataUtil1(unittest.TestCase): +# def setUp(self): +# # db.drop_all() +# # if os.path.exists('/tmp/app.db'): +# # os.remove('/tmp/app.db') +# db.create_all() + +# def tearDown(self): +# db.session.remove() +# db.drop_all() + +def test_cleanup_and_load_new_data_to_metadata_tables(db, tmp_path): + # def test_cleanup_and_load_new_data_to_metadata_tables(self): + project2 = \ Project( project_id=2, - project_igf_id="test1") + project_igf_id="test2") + project3 = \ + Project( + project_id=3, + project_igf_id="test3") + ## add raw analysis before loading new data + pipeline = \ + Pipeline( + pipeline_id=1, + pipeline_name="pipeline1", + pipeline_db="", + pipeline_type='AIRFLOW' + ) + raw_analysis = \ + RawAnalysis( + raw_analysis_id=1, + analysis_name="analysis1", + analysis_yaml="test", + status="VALIDATED", + project=project3, + pipeline=pipeline + ) + raw_validation_schema = \ + RawAnalysisValidationSchema( + raw_analysis_schema_id=1, + pipeline=pipeline, + json_schema="", + status="VALIDATED") + raw_analysis_template = \ + RawAnalysisTemplate( + template_id=1, + template_tag="test", + template_data="test") + platform = \ + Platform( + platform_id=1, + platform_igf_id="test_platform", + model_name="NOVASEQ6000", + vendor_name="ILLUMINA", + software_name="RTA") + seqrun = \ + Seqrun( + seqrun_id=1, + seqrun_igf_id="test_seqrun", + flowcell_id="FLOWCELL1", + platform=platform) + analysis = \ + Analysis( + analysis_id=1, + project=project3, + analysis_name="test_analysis", + analysis_type="test_analysis_type") + project_info_data = \ + Project_info_data( + project_info_data_id=1, + sample_read_count_data="test", + project_history_data="test", + project=project3) + project_seqrun_info_data = \ + Project_seqrun_info_data( + project_seqrun_info_data_id=1, + project=project3, + seqrun=seqrun, + lane_number='1', + index_group_tag="test_ig", + project_info_data=project_info_data) + project_seqrun_info_file = \ + Project_seqrun_info_file( + project_seqrun_info_file_id=1, + project_seqrun_info_data=project_seqrun_info_data, + file_path="test") + project_analysis_info_data = \ + Project_analysis_info_data( + project_analysis_info_data_id=1, + project=project3, + analysis=analysis, + analysis_tag="test_analysis_tag", + project_info_data=project_info_data) + project_analysis_info_file = \ + Project_analysis_info_file( + project_analysis_info_file_id=1, + project_analysis_info_data=project_analysis_info_data, + file_path="test") try: - db.session.add(project) + db.session.add(project2) + db.session.add(project3) + db.session.add(pipeline) + db.session.add(raw_analysis) + db.session.add(raw_validation_schema) + db.session.add(raw_analysis_template) + db.session.add(platform) + db.session.add(seqrun) + db.session.add(analysis) + db.session.add(project_info_data) + db.session.add(project_seqrun_info_data) + db.session.add(project_seqrun_info_file) + db.session.add(project_analysis_info_data) + db.session.add(project_analysis_info_file) db.session.flush() db.session.commit() except: @@ -28,42 +208,119 @@ def test_cleanup_and_load_new_data_to_metadata_tables(self): result = \ db.session.\ query(Project).\ - filter(Project.project_igf_id=="test1").\ + filter(Project.project_igf_id=="test2").\ one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.project_id, 2) + assert result is not None + assert result.project_id == 2 json_data = { "project": [{ "project_id": 1, - "project_igf_id": "test1"}]} - with tempfile.TemporaryDirectory() as temp_dir : - temp_json_file = \ - os.path.join( - temp_dir, - 'metadata_db.json') - with open(temp_json_file, 'w') as fp: - json.dump(json_data, fp) - cleanup_and_load_new_data_to_metadata_tables(temp_json_file) - result = \ - db.session.\ - query(Project).\ - filter(Project.project_igf_id=="test1").\ - one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.project_id, 1) + "project_igf_id": "test1"},{ + "project_id": 3, + "project_igf_id": "test3"}], + "pipeline": [{ + "pipeline_id": 1, + "pipeline_name": "pipeline1", + "pipeline_db": "", + "pipeline_type": "AIRFLOW"}], + "analysis": [dict( + analysis_id=1, + project_id=3, + analysis_name="test_analysis", + analysis_type="test_analysis_type")], + "platform": [dict( + platform_id=1, + platform_igf_id="test_platform", + model_name="NOVASEQ6000", + vendor_name="ILLUMINA", + software_name="RTA")], + "seqrun": [dict( + seqrun_id=1, + seqrun_igf_id="test_seqrun", + flowcell_id="FLOWCELL1", + platform_id=1)]} + temp_json_file = \ + os.path.join(tmp_path, 'metadata_db.json') + with open(temp_json_file, 'w') as fp: + json.dump(json_data, fp) + cleanup_and_load_new_data_to_metadata_tables(temp_json_file) + result = \ + db.session.\ + query(Project).\ + filter(Project.project_igf_id=="test1").\ + one_or_none() + assert result is not None + assert result.project_id == 1 + result = \ + db.session.\ + query(Project).\ + filter(Project.project_igf_id=="test2").\ + one_or_none() + assert result is None + result = \ + db.session.\ + query(RawAnalysis).\ + filter(RawAnalysis.analysis_name=="analysis1").\ + one_or_none() + assert result is not None + assert result.analysis_name == "analysis1" + assert result.project_id == 3 + assert result.pipeline_id == 1 + result = \ + db.session.\ + query(Project_info_data).\ + filter(Project_info_data.project_info_data_id==1).\ + one_or_none() + assert result is not None + assert result.sample_read_count_data == "test" + assert result.project_id == 3 + result = \ + db.session.\ + query(Project_seqrun_info_data).\ + filter(Project_seqrun_info_data.project_seqrun_info_data_id==1).\ + one_or_none() + assert result is not None + assert result.project_id == 3 + assert result.project_info_data_id == 1 + result = \ + db.session.\ + query(Project_seqrun_info_file).\ + filter(Project_seqrun_info_file.project_seqrun_info_file_id==1).\ + one_or_none() + assert result is not None + assert result.project_seqrun_info_data_id == 1 + assert result.file_path == "test" + result = \ + db.session.\ + query(Project_analysis_info_data).\ + filter(Project_analysis_info_data.project_analysis_info_data_id==1).\ + one_or_none() + assert result is not None + assert result.analysis_tag == "test_analysis_tag" + assert result.analysis_id == 1 + result = \ + db.session.\ + query(Project_analysis_info_file).\ + filter(Project_analysis_info_file.project_analysis_info_file_id==1).\ + one_or_none() + assert result is not None + assert result.file_path == "test" + + -class TestMetadataUtil2(unittest.TestCase): - def setUp(self): - db.create_all() +# class TestMetadataUtil2(unittest.TestCase): +# def setUp(self): +# db.create_all() - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_check_for_projects_in_metadata_db(self): +def test_check_for_projects_in_metadata_db(db, tmp_path): + # def test_check_for_projects_in_metadata_db(self): project = \ Project( project_id=1, - project_igf_id="test1") + project_igf_id="test11") try: db.session.add(project) db.session.flush() @@ -74,24 +331,30 @@ def test_check_for_projects_in_metadata_db(self): result = \ db.session.\ query(Project).\ - filter(Project.project_igf_id=="test1").\ + filter(Project.project_igf_id=="test11").\ one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.project_id, 1) + # self.assertTrue(result is not None) + # self.assertEqual(result.project_id, 1) + assert result is not None + assert result.project_id == 1 output, errors = \ - check_for_projects_in_metadata_db(project_list=["test1", "test2"]) - self.assertTrue(output.get('test1')) - self.assertFalse(output.get('test2')) - self.assertEqual(len(errors), 1) + check_for_projects_in_metadata_db(project_list=["test11", "test12"]) + # self.assertTrue(output.get('test11')) + # self.assertFalse(output.get('test12')) + # self.assertEqual(len(errors), 1) + assert output.get('test11') is not None + assert len(errors) == 1 -class TestMetadataUtil3(unittest.TestCase): - def setUp(self): - db.create_all() +# class TestMetadataUtil3(unittest.TestCase): +# def setUp(self): +# db.drop_all() +# db.create_all() - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_check_sample_and_project_ids_in_metadata_db(self): +def test_check_sample_and_project_ids_in_metadata_db(db, tmp_path): + # def test_check_sample_and_project_ids_in_metadata_db(self): project1 = \ Project( project_id=1, @@ -136,10 +399,13 @@ def test_check_sample_and_project_ids_in_metadata_db(self): errors = \ check_sample_and_project_ids_in_metadata_db( sample_project_list) - self.assertTrue('Missing metadata for sample test_sample3' in errors) - self.assertTrue("Sample test_sample2 is linked to project test2, not test1" in errors) + # self.assertTrue('Missing metadata for sample test_sample3' in errors) + # self.assertTrue("Sample test_sample2 is linked to project test2, not test1" in errors) + assert 'Missing metadata for sample test_sample3' in errors + assert "Sample test_sample2 is linked to project test2, not test1" in errors - def test_check_user_name_and_email_in_metadata_db(self): +def test_check_user_name_and_email_in_metadata_db(db, tmp_path): + # def test_check_user_name_and_email_in_metadata_db(self): user = \ IgfUser( name='User A', @@ -154,28 +420,36 @@ def test_check_user_name_and_email_in_metadata_db(self): data1 = [{'name':'User B', 'email_id': 'b@g.com'}] errors = \ check_user_name_and_email_in_metadata_db(data1) - self.assertTrue('Missing name User B in db' in errors) - self.assertTrue('Missing email b@g.com in db' in errors) + # self.assertTrue('Missing name User B in db' in errors) + # self.assertTrue('Missing email b@g.com in db' in errors) + assert 'Missing name User B in db' in errors + assert 'Missing email b@g.com in db' in errors data1 = [{'name':'User A', 'email_id': 'a@g.com'}] errors = \ check_user_name_and_email_in_metadata_db(data1) - self.assertEqual(len(errors), 0) + # self.assertEqual(len(errors), 0) + assert len(errors) == 0 data1 = [{'name':'User B', 'email_id': 'a@g.com'}] errors = \ check_user_name_and_email_in_metadata_db(data1) - self.assertTrue('Missing name User B in db' in errors) - self.assertTrue("Email a@g.com registered with name User A, not User B" in errors) + # self.assertTrue('Missing name User B in db' in errors) + # self.assertTrue("Email a@g.com registered with name User A, not User B" in errors) + assert 'Missing name User B in db' in errors + assert "Email a@g.com registered with name User A, not User B" in errors data1 = [{'name':'User A', 'email_id': 'b@g.com'}] errors = \ check_user_name_and_email_in_metadata_db(data1) - self.assertTrue('Missing email b@g.com in db' in errors) - self.assertTrue("User User A registered with email id a@g.com, not b@g.com" in errors) + # self.assertTrue('Missing email b@g.com in db' in errors) + # self.assertTrue("User User A registered with email id a@g.com, not b@g.com" in errors) + assert 'Missing email b@g.com in db' in errors + assert "User User A registered with email id a@g.com, not b@g.com" in errors data1 = [{'name':'User B', 'email_id': 'b@g.com'}] errors = \ check_user_name_and_email_in_metadata_db( name_email_list=data1, check_missing=False) - self.assertEqual(len(errors), 0) + # self.assertEqual(len(errors), 0) + assert len(errors) == 0 -if __name__ == '__main__': - unittest.main() \ No newline at end of file +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_pre_demultiplexing_view.py b/tests/test_pre_demultiplexing_view.py index 34c741d..eb63b26 100644 --- a/tests/test_pre_demultiplexing_view.py +++ b/tests/test_pre_demultiplexing_view.py @@ -1,32 +1,32 @@ -import unittest, json -from app import appbuilder, db -from app.pre_demultiplexing_data_api import search_predemultiplexing_data -from app.pre_demultiplexing_data_api import add_predemultiplexing_data -from app.pre_demultiplexing_view import get_pre_demultiplexing_data +# import unittest, json +# from app import appbuilder, db +# from app.pre_demultiplexing_data_api import search_predemultiplexing_data +# from app.pre_demultiplexing_data_api import add_predemultiplexing_data +# from app.pre_demultiplexing_view import get_pre_demultiplexing_data -class TestPreDemultView(unittest.TestCase): - def setUp(self): - db.create_all() - self.demult_file = "data/demultiplexing_example.json" +# class TestPreDemultView(unittest.TestCase): +# def setUp(self): +# db.create_all() +# self.demult_file = "data/demultiplexing_example.json" - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_get_pre_demultiplexing_data(self): - with open(self.demult_file, 'r') as fp: - json_data = json.load(fp) - add_predemultiplexing_data(data=json_data) - result = \ - search_predemultiplexing_data( - run_name="AAAA", - samplesheet_tag="BBBB") - self.assertTrue(result is not None) - (run_name, samplesheet_tag, flowcell_cluster_plot, project_summary_table, project_summary_plot,\ - sample_table, sample_plot, undetermined_table, undetermined_plot, date_stamp) = \ - get_pre_demultiplexing_data(demult_id=1) - self.assertEqual(run_name,"AAAA") - self.assertTrue("plot1" in flowcell_cluster_plot) - self.assertEqual(flowcell_cluster_plot.get("plot1"), "data1") +# def test_get_pre_demultiplexing_data(self): +# with open(self.demult_file, 'r') as fp: +# json_data = json.load(fp) +# add_predemultiplexing_data(data=json_data) +# result = \ +# search_predemultiplexing_data( +# run_name="AAAA", +# samplesheet_tag="BBBB") +# self.assertTrue(result is not None) +# (run_name, samplesheet_tag, flowcell_cluster_plot, project_summary_table, project_summary_plot,\ +# sample_table, sample_plot, undetermined_table, undetermined_plot, date_stamp) = \ +# get_pre_demultiplexing_data(demult_id=1) +# self.assertEqual(run_name,"AAAA") +# self.assertTrue("plot1" in flowcell_cluster_plot) +# self.assertEqual(flowcell_cluster_plot.get("plot1"), "data1") -if __name__ == '__main__': - unittest.main() \ No newline at end of file +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_predemult_api.py b/tests/test_predemult_api.py new file mode 100644 index 0000000..94c396e --- /dev/null +++ b/tests/test_predemult_api.py @@ -0,0 +1,88 @@ +import json +import os +import tempfile +from io import BytesIO +from app.models import PreDeMultiplexingData +from app.pre_demultiplexing_data_api import ( + load_predemult_report, + async_load_predemult_report, + PreDeMultiplexingDataApi) +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_load_predemult_report(db, tmp_path): + temp_report_dir = \ + tempfile.mkdtemp(dir=tmp_path) + temp_base_dir = \ + tempfile.mkdtemp(dir=tmp_path) + # Create a dummy report + temp_report_path = os.path.join(temp_report_dir, 'report.html') + with open(temp_report_path, 'w') as fp: + fp.write('

Its as test report

') + load_predemult_report( + run_name='test1', + tag_name='test 1', + file_path=temp_report_path, + base_path=temp_base_dir) + # check if its loaded + record = db.session.query(PreDeMultiplexingData).filter_by(run_name='test1').first() + assert record is not None + assert record.run_name == 'test1' + assert record.samplesheet_tag == 'test 1' + assert os.path.basename(record.file_path) == 'report.html' + assert os.path.exists(record.file_path) + assert record.file_path != temp_report_path + assert temp_base_dir in record.file_path + +def test_async_load_predemult_report(db, tmp_path): + temp_report_dir = \ + tempfile.mkdtemp(dir=tmp_path) + temp_base_dir = \ + tempfile.mkdtemp(dir=tmp_path) + # Create a dummy report + temp_report_path = os.path.join(temp_report_dir, 'report.html') + with open(temp_report_path, 'w') as fp: + fp.write('

Its as test report

') + async_load_predemult_report( + run_name='test1', + tag_name='test 1', + file_path=temp_report_path, + base_path=temp_base_dir) + # check if its loaded + record = db.session.query(PreDeMultiplexingData).filter_by(run_name='test1').first() + assert record is not None + assert record.run_name == 'test1' + assert record.samplesheet_tag == 'test 1' + assert os.path.basename(record.file_path) == 'report.html' + assert os.path.exists(record.file_path) + assert record.file_path != temp_report_path + assert temp_base_dir in record.file_path + +def test_PreDeMultiplexingDataApi1(db, test_client, tmp_path): + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + # temp_base_dir = \ + # tempfile.mkdtemp(dir=tmp_path) + # app.config['REPORT_UPLOAD_PATH'] = temp_base_dir + report_file_data = \ + BytesIO(b'

Its as test report

') + res = \ + test_client.post( + '/api/v1/predemultiplexing_data/add_report', + data=dict(file=(report_file_data, 'report.html'),run_name="test1",samplesheet_tag="test 1"), + headers={"Authorization": f"Bearer {token}"}, + content_type='multipart/form-data') + assert res.status_code == 200 + assert json.loads(res.data.decode('utf-8')).get("message") == 'successfully submitted demult report loading job for report.html' diff --git a/tests/test_raw_analysis.py b/tests/test_raw_analysis.py index e5e8f1f..0afc1eb 100644 --- a/tests/test_raw_analysis.py +++ b/tests/test_raw_analysis.py @@ -1,19 +1,1365 @@ import unittest, json -from app import appbuilder, db -from app.raw_analysis.raw_analysis_util import validate_analysis_json +from app.models import RawAnalysis +from app.models import RawAnalysisValidationSchema +from app.models import RawAnalysis +from app.models import Sample +from app.models import Experiment +from app.models import Run +from app.models import File +from app.models import Collection +from app.models import Collection_group +from app.models import Seqrun +from app.models import Platform +from app.models import Project +from app.models import Pipeline +from app.models import RawAnalysisValidationSchema +from app.models import RawAnalysisTemplate +from app.raw_analysis.raw_analysis_util import pipeline_query +from app.raw_analysis.raw_analysis_util import project_query +from app.raw_analysis.raw_analysis_util import validate_json_schema +from app.raw_analysis.raw_analysis_util import validate_analysis_design +from app.raw_analysis.raw_analysis_util import _get_validation_status_for_analysis_design +from app.raw_analysis.raw_analysis_util import _get_project_id_for_samples +from app.raw_analysis.raw_analysis_util import _get_file_collection_for_samples +from app.raw_analysis.raw_analysis_util import _get_sample_metadata_checks_for_analysis +from app.raw_analysis.raw_analysis_util import _get_validation_errors_for_analysis_design +from app.raw_analysis.raw_analysis_util import _fetch_all_samples_for_project +from app.raw_analysis.raw_analysis_util import _get_analysis_template +from app.raw_analysis.raw_analysis_util import generate_analysis_template +from app.raw_analysis_view import async_validate_analysis_yaml +from app.raw_analysis_view import async_validate_analysis_schema -class TestRawAnalysisUtil(unittest.TestCase): - def setUp(self): - db.create_all() - self.schema_file = 'app/raw_analysis/analysis_validation.json' +def test_project_query(db): + project1 = \ + Project( + project_id=1, + project_igf_id="test1") + project2 = \ + Project( + project_id=2, + project_igf_id="test2") + try: + db.session.add(project1) + db.session.flush() + db.session.add(project2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + projects = project_query() + assert len(projects) == 2 + assert 'test2' in [p.project_igf_id for p in projects] + assert 'test3' not in [p.project_igf_id for p in projects] - def tearDown(self): - db.drop_all() - pass - def test_validate_analysis_json(self): - with open(self.schema_file, 'r') as fp: - json_data = json.load(fp) +def test_pipeline_query(db): + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test1') + pipeline2 = \ + Pipeline( + pipeline_name='dag_pipeline2', + pipeline_db='test2', + pipeline_type='AIRFLOW') + try: + db.session.add(pipeline1) + db.session.flush() + db.session.add(pipeline2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + pipelines = pipeline_query() + assert len(pipelines) == 1 + assert 'dag_pipeline2' in [p.pipeline_name for p in pipelines] + assert 'pipeline1' not in [p.pipeline_name for p in pipelines] -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +def test_get_project_id_for_samples(db): + project1 = \ + Project(project_igf_id='project1') + project2 = \ + Project(project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='sample1', + project_id=1) + sample2 = \ + Sample( + sample_igf_id='sample2', + project_id=1) + try: + db.session.add(project1) + db.session.flush() + db.session.add(project1) + db.session.flush() + db.session.add(sample1) + db.session.flush() + db.session.add(sample2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + project_list = \ + _get_project_id_for_samples( + ['sample1', 'sample2']) + assert len(project_list) == 1 + + +def test_get_file_collection_for_samples(db): + project1 = \ + Project( + project_igf_id='project1') + sample1 = \ + Sample( + sample_igf_id='sample1', + project=project1) + sample2 = \ + Sample( + sample_igf_id='sample2', + project=project1) + experiment1 = \ + Experiment( + experiment_igf_id='experiment1', + platform_name="NEXTSEQ2000", + status='ACTIVE', + library_name="sample1", + project=project1, + sample=sample1) + platform1 = \ + Platform( + platform_igf_id="platform1", + model_name="NEXTSEQ2000", + vendor_name='ILLUMINA', + software_name="RTA", + software_version="x.y.z") + seqrun1 = \ + Seqrun( + seqrun_igf_id="seqrun1", + flowcell_id="XXX", + platform=platform1) + run1 = \ + Run( + run_igf_id="run1", + experiment=experiment1, + seqrun=seqrun1, + status='ACTIVE', + lane_number='1') + collection1 = \ + Collection( + name="run1", + type="demultiplexed_fastq") + file1 = \ + File( + file_path="/path/file1", + status='ACTIVE') + collection_group1 = \ + Collection_group( + collection=collection1, + file=file1) + try: + db.session.add(project1) + db.session.add(sample1) + db.session.add(sample2) + db.session.add(experiment1) + db.session.add(platform1) + db.session.add(seqrun1) + db.session.add(run1) + db.session.add(collection1) + db.session.add(file1) + db.session.add(collection_group1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + sample_with_files = \ + _get_file_collection_for_samples( + sample_igf_id_list=['sample1', 'sample2']) + assert len(sample_with_files) == 1 + assert 'sample1' in sample_with_files + +def test_get_validation_status_for_analysis_design(): + schema_file = 'app/raw_analysis/analysis_validation_nfcore_v1.json' + with open(schema_file, 'r') as fp: + schema_data = fp.read() + ## valid design + design_1 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_1, + validation_schema=schema_data) + assert len(error_list) == 0 + ## invalid design + schema_data_2 = """ + {'sample_metadata'} + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_1, + validation_schema=schema_data_2) + assert len(error_list) == 1 + assert 'Failed to load validation schema. Invalid format.' in error_list + ## invalid design + design_2 = """ + sample_metadata1: + sample111: "" + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_2, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'sample_metadata' in e: + check_error = True + assert check_error + ## invalid design + design_3 = """ + sample_metadata: + sample111: "" + analysis_metadata: + NXF_VER1: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_3, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'NXF_VER' in e: + check_error = True + assert check_error + ## invalid design + design_4 = """ + sample_metadata: + sample111: "" + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline1: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_4, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'nfcore_pipeline' in e: + check_error = True + assert check_error + ## invalid design + design_5 = """ + sample_metadata: + sample111: "" + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq1 + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_5, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'nf-core/methylseq1' in e: + check_error = True + assert check_error + ## invalid design + design_6 = """ + sample_metadata: + sample111: "" + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params1: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_6, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'nextflow_params' in e: + check_error = True + assert check_error + ## invalid design + design_7 = """ + sample_metadata: + IGF111: + condition: aaa + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_7, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'aaa' in e: + check_error = True + assert check_error + ## invalid design + design_8 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: notsure + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + error_list = \ + _get_validation_status_for_analysis_design( + analysis_yaml=design_8, + validation_schema=schema_data) + check_error = False + for e in error_list: + if 'notsure' in e: + check_error = True + assert check_error + +def test_get_sample_metadata_checks_for_analysis(db): + project1 = \ + Project( + project_igf_id='project1') + project2 = \ + Project( + project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='sample1', + project=project1) + sample2 = \ + Sample( + sample_igf_id='sample2', + project=project1) + sample3 = \ + Sample( + sample_igf_id='sample3', + project=project2) + experiment1 = \ + Experiment( + experiment_igf_id='experiment1', + platform_name="NEXTSEQ2000", + status='ACTIVE', + library_name="sample1", + project=project1, + sample=sample1) + platform1 = \ + Platform( + platform_igf_id="platform1", + model_name="NEXTSEQ2000", + vendor_name='ILLUMINA', + software_name="RTA", + software_version="x.y.z") + seqrun1 = \ + Seqrun( + seqrun_igf_id="seqrun1", + flowcell_id="XXX", + platform=platform1) + run1 = \ + Run( + run_igf_id="run1", + experiment=experiment1, + seqrun=seqrun1, + status='ACTIVE', + lane_number='1') + collection1 = \ + Collection( + name="run1", + type="demultiplexed_fastq") + file1 = \ + File( + file_path="/path/file1", + status='ACTIVE') + collection_group1 = \ + Collection_group( + collection=collection1, + file=file1) + try: + db.session.add(project1) + db.session.add(project2) + db.session.add(sample1) + db.session.add(sample2) + db.session.add(sample3) + db.session.add(experiment1) + db.session.add(platform1) + db.session.add(seqrun1) + db.session.add(run1) + db.session.add(collection1) + db.session.add(file1) + db.session.add(collection_group1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + ## valid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={'sample1': ''}, + project_igf_id='project1') + assert len(error_list) == 0 + ## invalid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata=['sample1'], + project_igf_id='project1') + assert len(error_list) == 1 + assert f'sample_metadata has type {type([])}' in error_list + ## invalid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={}, + project_igf_id='project1') + assert len(error_list) == 1 + assert 'No sample ids found in sample_metadata' in error_list + ## invalid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={'sample2': ''}, + project_igf_id='project1') + assert len(error_list) == 1 + assert 'No sample has fastq' in error_list + ## invalid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={'sample1': '', 'sample2': ''}, + project_igf_id='project1') + assert len(error_list) == 1 + assert "Missing fastq for samples: sample2" in error_list + ## invalid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={'sample1': ''}, + project_igf_id='project2') + assert len(error_list) == 1 + assert 'Analysis is linked to project project2 but samples are linked to project project1' in error_list + ## invalid input + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={'sample1': '', 'sample3': ''}, + project_igf_id='project1') + assert len(error_list) == 2 + assert "samples are linked to multiple projects: project1, project2" in error_list + ## missing fastq for partial sample + error_list = \ + _get_sample_metadata_checks_for_analysis( + sample_metadata={'sample1': '', 'sample2': ''}, + project_igf_id='project1') + assert len(error_list) == 1 + assert "Missing fastq for samples: sample2" in error_list + + + +def test_get_validation_errors_for_analysis_design(db): + ## setup metadata + project1 = \ + Project( + project_igf_id='project1') + project2 = \ + Project( + project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='IGF111', + project=project1) + sample2 = \ + Sample( + sample_igf_id='IGF112', + project=project1) + sample3 = \ + Sample( + sample_igf_id='IGF113', + project=project2) + experiment1 = \ + Experiment( + experiment_igf_id='experiment1', + platform_name="NEXTSEQ2000", + status='ACTIVE', + library_name="sample1", + project=project1, + sample=sample1) + platform1 = \ + Platform( + platform_igf_id="platform1", + model_name="NEXTSEQ2000", + vendor_name='ILLUMINA', + software_name="RTA", + software_version="x.y.z") + seqrun1 = \ + Seqrun( + seqrun_igf_id="seqrun1", + flowcell_id="XXX", + platform=platform1) + run1 = \ + Run( + run_igf_id="run1", + experiment=experiment1, + seqrun=seqrun1, + status='ACTIVE', + lane_number='1') + collection1 = \ + Collection( + name="run1", + type="demultiplexed_fastq") + file1 = \ + File( + file_path="/path/file1", + status='ACTIVE') + collection_group1 = \ + Collection_group( + collection=collection1, + file=file1) + try: + db.session.add(project1) + db.session.add(project2) + db.session.add(sample1) + db.session.add(sample2) + db.session.add(sample3) + db.session.add(experiment1) + db.session.add(platform1) + db.session.add(seqrun1) + db.session.add(run1) + db.session.add(collection1) + db.session.add(file1) + db.session.add(collection_group1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + ## setup design schema + schema_file = 'app/raw_analysis/analysis_validation_nfcore_v1.json' + with open(schema_file, 'r') as fp: + schema_data = fp.read() + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + analysis_schema1 = \ + RawAnalysisValidationSchema( + json_schema=schema_data, + pipeline=pipeline1, + status="VALIDATED") + try: + db.session.add(pipeline1) + db.session.add(analysis_schema1) + db.session.commit() + except: + db.session.rollback() + raise + ## setup analysis design + ## valid design + design_1 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis1 = \ + RawAnalysis( + analysis_name='analysis1', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_1) + try: + db.session.add(raw_analysis1) + db.session.commit() + except: + db.session.rollback() + raise + ## valid design errors + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=raw_analysis1.raw_analysis_id) + assert len(error_list) == 0 + ## invalid design + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=100) + assert len(error_list) == 1 + assert "No metadata entry found for id 100" in error_list + ## invalid design + design_2 = None + raw_analysis2 = \ + RawAnalysis( + analysis_name='analysis2', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_2) + try: + db.session.add(raw_analysis2) + db.session.commit() + except: + db.session.rollback() + raise + ## invalid design errors + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=raw_analysis2.raw_analysis_id) + assert len(error_list) == 1 + assert "No analysis design found" in error_list + ## invalid analysis + raw_analysis3 = \ + RawAnalysis( + analysis_name='analysis3', + analysis_yaml=design_1) + try: + db.session.add(raw_analysis3) + db.session.commit() + except: + db.session.rollback() + raise + ## invalid analysis design errors + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=raw_analysis3.raw_analysis_id) + assert len(error_list) == 3 + assert "No pipeline info found" in error_list + assert "No project id found" in error_list + assert "No analysis schema found" in error_list + ## invalid design + design_4 = """ + sample_metadata1: + IGF111: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis4 = \ + RawAnalysis( + analysis_name='analysis4', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_4) + try: + db.session.add(raw_analysis4) + db.session.commit() + except: + db.session.rollback() + raise + ## invalid analysis design errors + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=raw_analysis4.raw_analysis_id) + assert len(error_list) == 1 + check_error = False + for e in error_list: + if 'sample_metadata' in e: + check_error = True + assert check_error + ## invalid design + design_5 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + IGF112: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis5 = \ + RawAnalysis( + analysis_name='analysis5', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_5) + try: + db.session.add(raw_analysis5) + db.session.commit() + except: + db.session.rollback() + raise + ## invalid analysis design errors + error_list = \ + _get_validation_errors_for_analysis_design( + raw_analysis_id=raw_analysis5.raw_analysis_id) + assert len(error_list) == 1 + assert "Missing fastq for samples: IGF112" in error_list + + +def test_validate_analysis_design(db): + ## setup metadata + project1 = \ + Project( + project_igf_id='project1') + project2 = \ + Project( + project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='IGF111', + project=project1) + sample2 = \ + Sample( + sample_igf_id='IGF112', + project=project1) + sample3 = \ + Sample( + sample_igf_id='IGF113', + project=project2) + experiment1 = \ + Experiment( + experiment_igf_id='experiment1', + platform_name="NEXTSEQ2000", + status='ACTIVE', + library_name="sample1", + project=project1, + sample=sample1) + platform1 = \ + Platform( + platform_igf_id="platform1", + model_name="NEXTSEQ2000", + vendor_name='ILLUMINA', + software_name="RTA", + software_version="x.y.z") + seqrun1 = \ + Seqrun( + seqrun_igf_id="seqrun1", + flowcell_id="XXX", + platform=platform1) + run1 = \ + Run( + run_igf_id="run1", + experiment=experiment1, + seqrun=seqrun1, + status='ACTIVE', + lane_number='1') + collection1 = \ + Collection( + name="run1", + type="demultiplexed_fastq") + file1 = \ + File( + file_path="/path/file1", + status='ACTIVE') + collection_group1 = \ + Collection_group( + collection=collection1, + file=file1) + try: + db.session.add(project1) + db.session.add(project2) + db.session.add(sample1) + db.session.add(sample2) + db.session.add(sample3) + db.session.add(experiment1) + db.session.add(platform1) + db.session.add(seqrun1) + db.session.add(run1) + db.session.add(collection1) + db.session.add(file1) + db.session.add(collection_group1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + ## setup design schema + schema_file = 'app/raw_analysis/analysis_validation_nfcore_v1.json' + with open(schema_file, 'r') as fp: + schema_data = fp.read() + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + analysis_schema1 = \ + RawAnalysisValidationSchema( + json_schema=schema_data, + pipeline=pipeline1, + status="VALIDATED") + try: + db.session.add(pipeline1) + db.session.add(analysis_schema1) + db.session.commit() + except: + db.session.rollback() + raise + ## setup analysis design + ## valid design + design_1 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis1 = \ + RawAnalysis( + analysis_name='analysis1', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_1) + try: + db.session.add(raw_analysis1) + db.session.commit() + except: + db.session.rollback() + raise + ## valid design status + status = \ + validate_analysis_design( + raw_analysis_id=raw_analysis1.raw_analysis_id) + assert status == 'VALIDATED' + ## invalid design + design_5 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + IGF112: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis5 = \ + RawAnalysis( + analysis_name='analysis5', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_5) + try: + db.session.add(raw_analysis5) + db.session.commit() + except: + db.session.rollback() + raise + ## invalid analysis design errors + status = \ + validate_analysis_design( + raw_analysis_id=raw_analysis5.raw_analysis_id) + assert status == 'FAILED' + + +def test_async_validate_analysis_schema(db): + ## setup metadata + project1 = \ + Project( + project_igf_id='project1') + project2 = \ + Project( + project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='IGF111', + project=project1) + sample2 = \ + Sample( + sample_igf_id='IGF112', + project=project1) + sample3 = \ + Sample( + sample_igf_id='IGF113', + project=project2) + experiment1 = \ + Experiment( + experiment_igf_id='experiment1', + platform_name="NEXTSEQ2000", + status='ACTIVE', + library_name="sample1", + project=project1, + sample=sample1) + platform1 = \ + Platform( + platform_igf_id="platform1", + model_name="NEXTSEQ2000", + vendor_name='ILLUMINA', + software_name="RTA", + software_version="x.y.z") + seqrun1 = \ + Seqrun( + seqrun_igf_id="seqrun1", + flowcell_id="XXX", + platform=platform1) + run1 = \ + Run( + run_igf_id="run1", + experiment=experiment1, + seqrun=seqrun1, + status='ACTIVE', + lane_number='1') + collection1 = \ + Collection( + name="run1", + type="demultiplexed_fastq") + file1 = \ + File( + file_path="/path/file1", + status='ACTIVE') + collection_group1 = \ + Collection_group( + collection=collection1, + file=file1) + try: + db.session.add(project1) + db.session.add(project2) + db.session.add(sample1) + db.session.add(sample2) + db.session.add(sample3) + db.session.add(experiment1) + db.session.add(platform1) + db.session.add(seqrun1) + db.session.add(run1) + db.session.add(collection1) + db.session.add(file1) + db.session.add(collection_group1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + ## setup design schema + schema_file = 'app/raw_analysis/analysis_validation_nfcore_v1.json' + with open(schema_file, 'r') as fp: + schema_data = fp.read() + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + analysis_schema1 = \ + RawAnalysisValidationSchema( + json_schema=schema_data, + pipeline=pipeline1, + status="VALIDATED") + try: + db.session.add(pipeline1) + db.session.add(analysis_schema1) + db.session.commit() + except: + db.session.rollback() + raise + ## setup analysis design + ## valid design + design_1 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis1 = \ + RawAnalysis( + analysis_name='analysis1', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_1) + try: + db.session.add(raw_analysis1) + db.session.commit() + except: + db.session.rollback() + raise + ## valid design status + status_dict = \ + async_validate_analysis_yaml( + id_list=[raw_analysis1.raw_analysis_id]) + assert len(status_dict) == 1 + assert raw_analysis1.raw_analysis_id in status_dict + assert status_dict.get(raw_analysis1.raw_analysis_id) == 'VALIDATED' + ## invalid design + design_5 = """ + sample_metadata: + IGF111: + condition: AAA + strandedness: reverse + IGF112: + condition: AAA + strandedness: reverse + analysis_metadata: + NXF_VER: x.y.z + nfcore_pipeline: nf-core/methylseq + nextflow_params: + - "-profile singularity" + - "-r a.b.c" + - "--genome GRCh38" + """ + raw_analysis5 = \ + RawAnalysis( + analysis_name='analysis5', + project=project1, + pipeline=pipeline1, + analysis_yaml=design_5) + try: + db.session.add(raw_analysis5) + db.session.commit() + except: + db.session.rollback() + raise + ## invalid analysis design errors + status_dict = \ + async_validate_analysis_yaml( + id_list=[raw_analysis5.raw_analysis_id]) + assert len(status_dict) == 1 + assert raw_analysis5.raw_analysis_id in status_dict + assert status_dict.get(raw_analysis5.raw_analysis_id) == 'FAILED' + + +def test_validate_json_schema(db): + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + schema_file = 'app/raw_analysis/analysis_validation_nfcore_v1.json' + with open(schema_file, 'r') as fp: + schema_data = fp.read() + schema1 = \ + RawAnalysisValidationSchema( + pipeline=pipeline1, + json_schema=schema_data) + try: + db.session.add(pipeline1) + db.session.add(schema1) + db.session.commit() + except: + db.session.rollback() + raise + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema1.raw_analysis_schema_id).\ + one_or_none() + assert status == 'UNKNOWN' + ## valid design + validate_json_schema(schema1.raw_analysis_schema_id) + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema1.raw_analysis_schema_id).\ + one_or_none() + assert status == 'VALIDATED' + ## invalid schema + pipeline2 = \ + Pipeline( + pipeline_name='pipeline2', + pipeline_db='test', + pipeline_type='AIRFLOW') + schema_data = """{'a': 'b'} + """ + schema2 = \ + RawAnalysisValidationSchema( + pipeline=pipeline2, + json_schema=schema_data) + try: + db.session.add(pipeline2) + db.session.add(schema2) + db.session.commit() + except: + db.session.rollback() + raise + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema2.raw_analysis_schema_id).\ + one_or_none() + assert status == 'UNKNOWN' + validate_json_schema(schema2.raw_analysis_schema_id) + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema2.raw_analysis_schema_id).\ + one_or_none() + assert status == 'FAILED' + + +def test_async_validate_analysis_schema(db): + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test', + pipeline_type='AIRFLOW') + schema_file = 'app/raw_analysis/analysis_validation_nfcore_v1.json' + with open(schema_file, 'r') as fp: + schema_data = fp.read() + schema1 = \ + RawAnalysisValidationSchema( + pipeline=pipeline1, + json_schema=schema_data) + try: + db.session.add(pipeline1) + db.session.add(schema1) + db.session.commit() + except: + db.session.rollback() + raise + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema1.raw_analysis_schema_id).\ + one_or_none() + assert status == 'UNKNOWN' + ## valid design + status_dict = \ + async_validate_analysis_schema( + id_list=[schema1.raw_analysis_schema_id]) + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema1.raw_analysis_schema_id).\ + one_or_none() + assert status == 'VALIDATED' + assert len(status_dict) == 1 + assert schema1.raw_analysis_schema_id in status_dict + assert status_dict.get(schema1.raw_analysis_schema_id) == 'VALIDATED' + ## invalid schema + pipeline2 = \ + Pipeline( + pipeline_name='pipeline2', + pipeline_db='test', + pipeline_type='AIRFLOW') + schema_data = """{'a': 'b'} + """ + schema2 = \ + RawAnalysisValidationSchema( + pipeline=pipeline2, + json_schema=schema_data) + try: + db.session.add(pipeline2) + db.session.add(schema2) + db.session.commit() + except: + db.session.rollback() + raise + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema2.raw_analysis_schema_id).\ + one_or_none() + assert status == 'UNKNOWN' + status_dict = \ + async_validate_analysis_schema( + id_list=[schema2.raw_analysis_schema_id]) + (status,) = \ + db.session.\ + query(RawAnalysisValidationSchema.status).\ + filter(RawAnalysisValidationSchema.raw_analysis_schema_id==schema2.raw_analysis_schema_id).\ + one_or_none() + assert status == 'FAILED' + assert len(status_dict) == 1 + assert schema2.raw_analysis_schema_id in status_dict + assert status_dict.get(schema2.raw_analysis_schema_id) == 'FAILED' + + +def test_fetch_all_samples_for_project(db): + ## setup metadata + project1 = \ + Project( + project_igf_id='project1') + project2 = \ + Project( + project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='IGF111', + project=project1) + sample2 = \ + Sample( + sample_igf_id='IGF112', + project=project1) + try: + db.session.add(project1) + db.session.add(project2) + db.session.add(sample1) + db.session.add(sample2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + sample_ids = \ + _fetch_all_samples_for_project( + project_igf_id='project1') + assert len(sample_ids) == 2 + assert 'IGF111' in sample_ids + assert 'IGF112' in sample_ids + sample_ids = \ + _fetch_all_samples_for_project( + project_igf_id='project2') + assert len(sample_ids) == 0 + +def test_get_analysis_template(db): + template_data = \ + _get_analysis_template(template_tag='xyz') + assert template_data is not None + assert 'condition: CONDITION_NAME' not in [t.strip() for t in template_data.split('\n')] + template = \ + """sample_metadata: + {% for SAMPLE_ID in SAMPLE_ID_LIST %} + {{ SAMPLE_ID }}: + condition: CONDITION_NAME + strandedness: reverse + {% endfor %}analysis_metadata: + pipeline_name: xyz + """ + template1 = \ + RawAnalysisTemplate( + template_tag="template1", + template_data=template) + try: + db.session.add(template1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + template_data = \ + _get_analysis_template(template_tag='template1') + assert template_data is not None + assert 'condition: CONDITION_NAME' in [t.strip() for t in template_data.split('\n')] + +def test_generate_analysis_template(db): + ## setup metadata + template = \ + """sample_metadata: + {% for SAMPLE_ID in SAMPLE_ID_LIST %} + {{ SAMPLE_ID }}: + condition: CONDITION_NAME + strandedness: reverse + {% endfor %}analysis_metadata: + pipeline_name: xyz""" + project1 = \ + Project( + project_igf_id='project1') + project2 = \ + Project( + project_igf_id='project2') + sample1 = \ + Sample( + sample_igf_id='IGF111', + project=project1) + sample2 = \ + Sample( + sample_igf_id='IGF112', + project=project1) + template1 = \ + RawAnalysisTemplate( + template_tag="template1", + template_data=template) + try: + db.session.add(project1) + db.session.add(project2) + db.session.add(sample1) + db.session.add(sample2) + db.session.add(template1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + ## valid project and template + formatted_template = \ + generate_analysis_template( + project_igf_id="project1", + template_tag="template1") + assert 'IGF111:' in [line.strip() for line in formatted_template.split('\n')] + assert 'condition: CONDITION_NAME' in [line.strip() for line in formatted_template.split('\n')] + ## invalid project valid template + formatted_template = \ + generate_analysis_template( + project_igf_id="project2", + template_tag="template1") + assert len([line.strip() for line in formatted_template.split('\n')]) == 3 + ## valid project invalid template + formatted_template = \ + generate_analysis_template( + project_igf_id="project1", + template_tag="xxx") + assert "IGF111: ''" in [line.strip() for line in formatted_template.split('\n')] + assert 'condition: CONDITION_NAME' not in [line.strip() for line in formatted_template.split('\n')] + ## invalid project invalid template + formatted_template = \ + generate_analysis_template( + project_igf_id="project2", + template_tag="xyz") + assert len([line.strip() for line in formatted_template.split('\n')]) == 2 \ No newline at end of file diff --git a/tests/test_raw_analysis_api.py b/tests/test_raw_analysis_api.py new file mode 100644 index 0000000..98deef5 --- /dev/null +++ b/tests/test_raw_analysis_api.py @@ -0,0 +1,139 @@ +import json +import os +from app.models import ( + RawAnalysis, + Project, + Pipeline) +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_raw_analysis_api1(db, test_client, tmp_path): + # with test_client.session_transaction() as session: + # session['user_id'] = 1 + # session['_fresh'] = True + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + # res = test_client.get('/raw_analysis/search_new_analysis') + # res = res.json() + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + res = \ + test_client.get( + '/api/v1/raw_analysis/search_new_analysis', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 200 + assert json.loads(res.data.decode("utf-8")).get('new_analysis') == [] + pipeline1 = \ + Pipeline( + pipeline_name='pipeline1', + pipeline_db='test') + project1 = \ + Project( + project_igf_id='project1') + raw_analysis1 = \ + RawAnalysis( + analysis_name='raw_analysis1', + analysis_yaml='yaml_data', + project=project1, + pipeline=pipeline1, + status='FAILED') + analysis_yaml = """sample_metadata: + sample1: + condition: test + sample2: + condition: test + """ + raw_analysis2 = \ + RawAnalysis( + analysis_name='raw_analysis2', + analysis_yaml=analysis_yaml, + project=project1, + pipeline=pipeline1, + status='VALIDATED') + try: + db.session.add(pipeline1) + db.session.add(project1) + db.session.add(raw_analysis1) + db.session.add(raw_analysis2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + res = \ + test_client.get( + '/api/v1/raw_analysis/search_new_analysis', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 200 + assert json.loads(res.data.decode("utf-8")).get('new_analysis') == [2] + res = \ + test_client.post( + '/api/v1/raw_analysis/get_raw_analysis_data/2', + headers={"Authorization": f"Bearer {token}"}) + json_file = \ + os.path.join(tmp_path, 'raw_analysis1.json') + with open(json_file, 'wb') as fp: + fp.write(res.data) + with open(json_file, 'r') as fp: + json_data = json.load(fp) + assert 'analysis_name' in json_data + assert json_data.get('analysis_name') == 'raw_analysis2' + assert 'project_id' in json_data + assert json_data.get('project_id') == project1.project_id + assert 'pipeline_id' in json_data + assert json_data.get('pipeline_id') == pipeline1.pipeline_id + res = \ + test_client.post( + '/api/v1/raw_analysis/get_raw_analysis_data/1', + headers={"Authorization": f"Bearer {token}"}) + json_file = \ + os.path.join(tmp_path, 'raw_analysis2.json') + with open(json_file, 'wb') as fp: + fp.write(res.data) + with open(json_file, 'r') as fp: + json_data = json.load(fp) + assert 'analysis_name' in json_data + assert 'project_id' in json_data + assert json_data.get('analysis_name') == '' + assert json_data.get('project_id') == '' + res = \ + test_client.post( + '/api/v1/raw_analysis/get_raw_analysis_data/3', + headers={"Authorization": f"Bearer {token}"}) + json_file = \ + os.path.join(tmp_path, 'raw_analysis3.json') + with open(json_file, 'wb') as fp: + fp.write(res.data) + with open(json_file, 'r') as fp: + json_data = json.load(fp) + assert 'analysis_name' in json_data + assert 'project_id' in json_data + assert json_data.get('analysis_name') == '' + assert json_data.get('project_id') == '' + res = \ + test_client.post( + '/api/v1/raw_analysis/mark_analysis_synched/2', + headers={"Authorization": f"Bearer {token}"}) + assert json.loads(res.data.decode('utf-8')).get('status') == 'success' + res = \ + test_client.post( + '/api/v1/raw_analysis/mark_analysis_synched/1', + headers={"Authorization": f"Bearer {token}"}) + assert json.loads(res.data.decode('utf-8')).get('status') == 'failed' + res = \ + test_client.post( + '/api/v1/raw_analysis/mark_analysis_synched/3', + headers={"Authorization": f"Bearer {token}"}) + assert json.loads(res.data.decode('utf-8')).get('status') == 'failed' + + diff --git a/tests/test_raw_metadata_api.py b/tests/test_raw_metadata_api.py new file mode 100644 index 0000000..2ecaa00 --- /dev/null +++ b/tests/test_raw_metadata_api.py @@ -0,0 +1,135 @@ +import json +import os +from io import BytesIO +from app.models import ( + RawMetadataModel) +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_raw_metadata_api1(db, test_client, tmp_path): + metadata1 = \ + RawMetadataModel( + metadata_tag='test1', + raw_csv_data='raw', + formatted_csv_data='formatted', + report='') + try: + db.session.add(metadata1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + res = \ + test_client.post( + '/api/v1/raw_metadata/search_new_metadata', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 400 + res = \ + test_client.post( + '/api/v1/raw_metadata/search_new_metadata', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(b'{"project_list":["test1", "test3", "test4", "test5"]}'), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + assert 'new_projects' in res.data.decode('utf-8') + assert 'test3' in json.loads(res.data.decode('utf-8')).get('new_projects').split(",") + assert 'test4' in json.loads(res.data.decode('utf-8')).get('new_projects').split(",") + assert 'test5' in json.loads(res.data.decode('utf-8')).get('new_projects').split(",") + assert len(json.loads(res.data.decode('utf-8')).get('new_projects').split(",")) == 3 + res = \ + test_client.post( + '/api/v1/raw_metadata/search_new_metadata', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(b'{"project_list":["test1"]}'), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + assert 'new_projects' in res.data.decode('utf-8') + assert json.loads(res.data.decode('utf-8')).get('new_projects') == "" + res = \ + test_client.post( + '/api/v1/raw_metadata/search_new_metadata', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(b'{"project_list":["test2"]}'), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + assert 'new_projects' in res.data.decode('utf-8') + assert json.loads(res.data.decode('utf-8')).get('new_projects') == "test2" + metadata_file_data = \ + BytesIO(b'[{"metadata_tag": "test2", "raw_csv_data": [{"project_id": "c","sample_id": "d"}], "formatted_csv_data": [{"project_id": "c","sample_id": "d"}]}]') + res = \ + test_client.post( + '/api/v1/raw_metadata/add_metadata', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(metadata_file_data, 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + res = \ + test_client.post( + '/api/v1/raw_metadata/search_new_metadata', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(b'{"project_list":["test2"]}'), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + assert 'new_projects' in res.data.decode('utf-8') + assert json.loads(res.data.decode('utf-8')).get('new_projects') == "" + res = \ + test_client.get( + '/api/v1/raw_metadata/download_ready_metadata', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 200 + assert res.data.decode('utf-8').strip() == '{}' + metadata3 = \ + RawMetadataModel( + metadata_tag='test3', + raw_csv_data='[{"project_id": "c","sample_id": "d"}]', + formatted_csv_data='[{"project_id": "c","sample_id": "d"}]', + status='READY', + report='') + try: + db.session.add(metadata3) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + res = \ + test_client.get( + '/api/v1/raw_metadata/download_ready_metadata', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 200 + json_data = \ + json.loads(res.data.decode('utf-8')) + assert 'test3' in json_data + assert json_data.get('test3') == '[{"project_id": "c","sample_id": "d"}]' + res = \ + test_client.get( + f'/api/v1/raw_metadata/mark_ready_metadata_as_synced', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 200 + records = \ + db.session.\ + query(RawMetadataModel.status).\ + filter(RawMetadataModel.raw_metadata_id==metadata3.raw_metadata_id).\ + one_or_none() + assert records is not None + assert records[0] == 'SYNCHED' diff --git a/tests/test_raw_metadata_util.py b/tests/test_raw_metadata_util.py index 2b8f270..19d4480 100644 --- a/tests/test_raw_metadata_util.py +++ b/tests/test_raw_metadata_util.py @@ -1,5 +1,4 @@ import unittest -from app import db from app.models import RawMetadataModel, Project, Sample from app.raw_metadata.raw_metadata_util import _run_metadata_json_validation from app.raw_metadata.raw_metadata_util import _validate_metadata_library_type @@ -8,51 +7,64 @@ from app.raw_metadata.raw_metadata_util import compare_metadata_sample_with_db from app.raw_metadata.raw_metadata_util import search_metadata_table_and_get_new_projects from app.raw_metadata.raw_metadata_util import parse_and_add_new_raw_metadata +from app.raw_metadata_view import async_validate_metadata -class TestMetaDataValidation1(unittest.TestCase): - def setUp(self): - pass +# class TestMetaDataValidation1(unittest.TestCase): +# def setUp(self): +# pass - def tearDown(self): - pass +# def tearDown(self): +# pass - def test_run_metadata_json_validation(self): +# def test_run_metadata_json_validation(self): +def test_run_metadata_json_validation(): errors = \ _run_metadata_json_validation( metadata_file="data/metadata_file1.csv", schema_json="app/raw_metadata/metadata_validation.json") - self.assertEqual(len(errors), 5) - self.assertEqual(len([err for err in errors if 'sample105799' in err]), 1) - self.assertEqual(len([err for err in errors if 'KDSC_77' in err]), 1) - self.assertEqual(len([err for err in errors if 'Project_cs_23-5-2018_SC' in err]), 1) - self.assertEqual(len([err for err in errors if 'c.s#email.ac.uk' in err]), 1) - self.assertTrue(isinstance(errors[0], str)) - - def test_validate_metadata_library_type(self): + # self.assertEqual(len(errors), 5) + # self.assertEqual(len([err for err in errors if 'sample105799' in err]), 1) + # self.assertEqual(len([err for err in errors if 'KDSC_77' in err]), 1) + # self.assertEqual(len([err for err in errors if 'Project_cs_23-5-2018_SC' in err]), 1) + # self.assertEqual(len([err for err in errors if 'c.s#email.ac.uk' in err]), 1) + # self.assertTrue(isinstance(errors[0], str)) + assert len(errors) == 5 + assert len([err for err in errors if 'sample105799' in err]) == 1 + assert len([err for err in errors if 'KDSC_77' in err]) == 1 + assert len([err for err in errors if 'Project_cs_23-5-2018_SC' in err]) == 1 + assert len([err for err in errors if 'c.s#email.ac.uk' in err]) == 1 + assert isinstance(errors[0], str) + + +# def test_validate_metadata_library_type(self): +def test_validate_metadata_library_type(): err = \ _validate_metadata_library_type( sample_id='test1', library_source='GENOMIC', library_strategy='CHIP-SEQ', experiment_type='TF') - self.assertTrue(err is None) + # self.assertTrue(err is None) + assert err is None err = \ _validate_metadata_library_type( sample_id='test1', library_source='GENOMIC', library_strategy='CHIP-SEQ', experiment_type='CHIP-Seq') - self.assertTrue(err is not None) + # self.assertTrue(err is not None) + assert err is not None -class TestMetaDataValidation2(unittest.TestCase): - def setUp(self): - db.create_all() +# class TestMetaDataValidation2(unittest.TestCase): +# def setUp(self): +# db.create_all() - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_set_metadata_validation_status(self): + # def test_set_metadata_validation_status(self): +def test_set_metadata_validation_status(db): metadata = \ RawMetadataModel( raw_metadata_id=1, @@ -72,9 +84,12 @@ def test_set_metadata_validation_status(self): query(RawMetadataModel).\ filter(RawMetadataModel.raw_metadata_id==1).\ one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.metadata_tag, 'test1') - self.assertEqual(result.status, 'UNKNOWN') + # self.assertTrue(result is not None) + # self.assertEqual(result.metadata_tag, 'test1') + # self.assertEqual(result.status, 'UNKNOWN') + assert result is not None + assert result.metadata_tag == 'test1' + assert result.status == 'UNKNOWN' _set_metadata_validation_status( raw_metadata_id=1, status='failed', @@ -84,9 +99,12 @@ def test_set_metadata_validation_status(self): query(RawMetadataModel).\ filter(RawMetadataModel.raw_metadata_id==1).\ one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.metadata_tag, 'test1') - self.assertEqual(result.status, 'FAILED') + # self.assertTrue(result is not None) + # self.assertEqual(result.metadata_tag, 'test1') + # self.assertEqual(result.status, 'FAILED') + assert result is not None + assert result.metadata_tag, 'test1' + assert result.status == 'FAILED' _set_metadata_validation_status( raw_metadata_id=1, status='validated') @@ -95,11 +113,15 @@ def test_set_metadata_validation_status(self): query(RawMetadataModel).\ filter(RawMetadataModel.raw_metadata_id==1).\ one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.metadata_tag, 'test1') - self.assertEqual(result.status, 'VALIDATED') + # self.assertTrue(result is not None) + # self.assertEqual(result.metadata_tag, 'test1') + # self.assertEqual(result.status, 'VALIDATED') + assert result is not None + assert result.metadata_tag == 'test1' + assert result.status == 'VALIDATED' - def test_validate_raw_metadata_and_set_db_status(self): + # def test_validate_raw_metadata_and_set_db_status(self): +def test_validate_raw_metadata_and_set_db_status(db): with open("data/metadata_file1.csv", "r") as fp: lines = fp.readlines() metadata = \ @@ -124,12 +146,49 @@ def test_validate_raw_metadata_and_set_db_status(self): query(RawMetadataModel).\ filter(RawMetadataModel.raw_metadata_id==1).\ one_or_none() - self.assertTrue(result is not None) - self.assertEqual(result.metadata_tag, 'test1') - self.assertEqual(result.status, 'FAILED') - self.assertTrue(result.report is not None) + # self.assertTrue(result is not None) + # self.assertEqual(result.metadata_tag, 'test1') + # self.assertEqual(result.status, 'FAILED') + # self.assertTrue(result.report is not None) + assert result is not None + assert result.metadata_tag == 'test1' + assert result.status == 'FAILED' + assert result.report is not None + +def test_async_validate_metadata(db): + with open("data/metadata_file1.csv", "r") as fp: + lines = fp.readlines() + metadata = \ + RawMetadataModel( + raw_metadata_id=1, + metadata_tag='test1', + raw_csv_data='raw', + formatted_csv_data='\n'.join(lines), + report='') + try: + db.session.add(metadata) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + async_validate_metadata(id_list=[1,]) + result = \ + db.session.\ + query(RawMetadataModel).\ + filter(RawMetadataModel.raw_metadata_id==1).\ + one_or_none() + # self.assertTrue(result is not None) + # self.assertEqual(result.metadata_tag, 'test1') + # self.assertEqual(result.status, 'FAILED') + # self.assertTrue(result.report is not None) + assert result is not None + assert result.metadata_tag == 'test1' + assert result.status == 'FAILED' + assert result.report is not None - def test_compare_metadata_sample_with_db(self): + # def test_compare_metadata_sample_with_db(self): +def test_compare_metadata_sample_with_db(db): project = \ Project( project_id=1, @@ -150,17 +209,19 @@ def test_compare_metadata_sample_with_db(self): metadata_errors = \ compare_metadata_sample_with_db( metadata_file="data/metadata_file1.csv") - self.assertTrue("Sample sample105799 is linked to project test1, not IGFQ000001_cs_23-5-2018_SC" in metadata_errors) + # self.assertTrue("Sample sample105799 is linked to project test1, not IGFQ000001_cs_23-5-2018_SC" in metadata_errors) + assert "Sample sample105799 is linked to project test1, not IGFQ000001_cs_23-5-2018_SC" in metadata_errors -class TestMetadataApiutil1(unittest.TestCase): - def setUp(self): - db.create_all() +# class TestMetadataApiutil1(unittest.TestCase): +# def setUp(self): +# db.create_all() - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_search_metadata_table_and_get_new_projects(self): + # def test_search_metadata_table_and_get_new_projects(self): +def test_search_metadata_table_and_get_new_projects(db): metadata1 = \ RawMetadataModel( metadata_tag='test1', @@ -184,23 +245,29 @@ def test_search_metadata_table_and_get_new_projects(self): new_projects = \ search_metadata_table_and_get_new_projects( data={"project_list":["test1", "test3"]}) - self.assertTrue(isinstance(new_projects, list)) - self.assertEqual(len(new_projects), 1) - self.assertTrue("test3" in new_projects) + # self.assertTrue(isinstance(new_projects, list)) + # self.assertEqual(len(new_projects), 1) + # self.assertTrue("test3" in new_projects) + assert isinstance(new_projects, list) + assert len(new_projects) == 1 + assert "test3" in new_projects new_projects = \ search_metadata_table_and_get_new_projects( data={"project_list":["test1", "test2"]}) - self.assertTrue(isinstance(new_projects, list)) - self.assertEqual(len(new_projects), 0) + # self.assertTrue(isinstance(new_projects, list)) + # self.assertEqual(len(new_projects), 0) + assert isinstance(new_projects, list) + assert len(new_projects) == 0 -class TestRawMetadataLoading(unittest.TestCase): - def setUp(self): - db.create_all() +# class TestRawMetadataLoading(unittest.TestCase): +# def setUp(self): +# db.create_all() - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_parse_and_add_new_raw_metadata(self): + # def test_parse_and_add_new_raw_metadata(self): +def test_parse_and_add_new_raw_metadata(db): metadata_list = [{ 'metadata_tag': 'test1', 'raw_csv_data': [{'project_id','sample_id'},{'a', 'b'}], @@ -211,8 +278,10 @@ def test_parse_and_add_new_raw_metadata(self): parse_and_add_new_raw_metadata(data=metadata_list) results = db.session.query(RawMetadataModel.metadata_tag).all() results = [i[0] for i in results] - self.assertEqual(len(results), 2) - self.assertTrue('test1' in results) + # self.assertEqual(len(results), 2) + # self.assertTrue('test1' in results) + assert len(results) == 2 + assert 'test1' in results -if __name__ == '__main__': - unittest.main() \ No newline at end of file +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_raw_seqrun_api.py b/tests/test_raw_seqrun_api.py new file mode 100644 index 0000000..3942b5b --- /dev/null +++ b/tests/test_raw_seqrun_api.py @@ -0,0 +1,63 @@ +import json +import os +from io import BytesIO +from app.models import ( + RawSeqrun, + SampleSheetModel) +from flask_appbuilder.const import ( + API_SECURITY_PASSWORD_KEY, + API_SECURITY_PROVIDER_KEY, + API_SECURITY_REFRESH_KEY, + API_SECURITY_USERNAME_KEY) + +def test_raw_seqrun_api(db, test_client, tmp_path): + # with test_client.session_transaction() as session: + # session['user_id'] = 1 + # session['_fresh'] = True + res = \ + test_client.post( + "/api/v1/security/login", + json={ + API_SECURITY_USERNAME_KEY: "admin", + API_SECURITY_PASSWORD_KEY: "password", + API_SECURITY_PROVIDER_KEY: "db"}) + # res = test_client.get('/raw_analysis/search_new_analysis') + # res = res.json() + assert res.status_code == 200 + token = \ + json.loads(res.data.decode("utf-8")).\ + get("access_token") + res = \ + test_client.post( + '/api/v1/raw_seqrun/add_new_seqrun', + headers={"Authorization": f"Bearer {token}"}) + assert res.status_code == 400 + raw_seqrun1 = \ + RawSeqrun(raw_seqrun_igf_id='run1') + try: + db.session.add(raw_seqrun1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + records = \ + db.session.\ + query(RawSeqrun.raw_seqrun_igf_id).\ + filter(RawSeqrun.raw_seqrun_igf_id=="run3").\ + one_or_none() + assert records is None + res = \ + test_client.post( + '/api/v1/raw_seqrun/add_new_seqrun', + headers={"Authorization": f"Bearer {token}"}, + data=dict(file=(BytesIO(b'{"seqrun_id_list":["run1", "run2", "run3"]}'), 'test.json')), + content_type='multipart/form-data', + follow_redirects=True) + assert res.status_code == 200 + records = \ + db.session.\ + query(RawSeqrun.raw_seqrun_igf_id).\ + filter(RawSeqrun.raw_seqrun_igf_id=="run3").\ + one_or_none() + assert records is not None \ No newline at end of file diff --git a/tests/test_rawseqrun_utils.py b/tests/test_rawseqrun_utils.py index 9e44f1c..0852993 100644 --- a/tests/test_rawseqrun_utils.py +++ b/tests/test_rawseqrun_utils.py @@ -1,5 +1,5 @@ import os, unittest, tempfile -from app import db +# from app import db from datetime import datetime from app.models import SampleSheetModel, RawSeqrun from app.raw_seqrun.raw_seqrun_util import fetch_samplesheet_id_for_seqrun @@ -9,86 +9,238 @@ from app.raw_seqrun.raw_seqrun_util import check_and_filter_raw_seqruns_after_checking_samplesheet from app.raw_seqrun.raw_seqrun_util import check_and_add_new_raw_seqrun -class TestRawSeqrunA(unittest.TestCase): - def setUp(self): - db.create_all() - try: - samplesheet1 = \ - SampleSheetModel( - samplesheet_tag='samplesheet1', - csv_data='data', - status='PASS', - update_time=datetime.now(), - validation_time=datetime.now()) - db.session.add(samplesheet1) - db.session.flush() - samplesheet2 = \ - SampleSheetModel( - samplesheet_tag='samplesheet2', - csv_data='data', - status='PASS', - update_time=datetime.now(), - validation_time=datetime.now(),) - db.session.add(samplesheet2) - db.session.flush() - raw_seqrun1 = \ - RawSeqrun( - raw_seqrun_igf_id='run1', - samplesheet_id=samplesheet2.samplesheet_id, - override_cycles='Y100;I8;I8;Y100') - db.session.add(raw_seqrun1) - db.session.flush() - raw_seqrun2 = \ - RawSeqrun( - raw_seqrun_igf_id='run2') - db.session.add(raw_seqrun2) - db.session.flush() - db.session.commit() - except: - db.session.rollback() - raise +# class TestRawSeqrunA(unittest.TestCase): +# def setUp(self): +# db.create_all() +# try: +# samplesheet1 = \ +# SampleSheetModel( +# samplesheet_tag='samplesheet1', +# csv_data='data', +# status='PASS', +# update_time=datetime.now(), +# validation_time=datetime.now()) +# db.session.add(samplesheet1) +# db.session.flush() +# samplesheet2 = \ +# SampleSheetModel( +# samplesheet_tag='samplesheet2', +# csv_data='data', +# status='PASS', +# update_time=datetime.now(), +# validation_time=datetime.now(),) +# db.session.add(samplesheet2) +# db.session.flush() +# raw_seqrun1 = \ +# RawSeqrun( +# raw_seqrun_igf_id='run1', +# samplesheet_id=samplesheet2.samplesheet_id, +# override_cycles='Y100;I8;I8;Y100') +# db.session.add(raw_seqrun1) +# db.session.flush() +# raw_seqrun2 = \ +# RawSeqrun( +# raw_seqrun_igf_id='run2') +# db.session.add(raw_seqrun2) +# db.session.flush() +# db.session.commit() +# except: +# db.session.rollback() +# raise - def tearDown(self): - db.drop_all() +# def tearDown(self): +# db.drop_all() - def test_fetch_samplesheet_id_for_seqrun(self): - result = \ - fetch_samplesheet_id_for_seqrun('run1') - self.assertEqual(result, 2) - result = \ - fetch_samplesheet_id_for_seqrun('run2') - self.assertIsNone(result) +def test_fetch_samplesheet_id_for_seqrun(db): + # def test_fetch_samplesheet_id_for_seqrun(self): + samplesheet1 = \ + SampleSheetModel( + samplesheet_tag='samplesheet1', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now()) + samplesheet2 = \ + SampleSheetModel( + samplesheet_tag='samplesheet2', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now(),) + raw_seqrun1 = \ + RawSeqrun( + raw_seqrun_igf_id='run1', + samplesheet_id=samplesheet2.samplesheet_id, + override_cycles='Y100;I8;I8;Y100') + raw_seqrun2 = \ + RawSeqrun( + raw_seqrun_igf_id='run2') + try: + db.session.add(samplesheet1) + db.session.flush() + db.session.add(samplesheet2) + db.session.flush() + db.session.add(raw_seqrun1) + db.session.flush() + db.session.add(raw_seqrun2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + result = \ + fetch_samplesheet_id_for_seqrun(raw_seqrun1.raw_seqrun_id) + # self.assertEqual(result, 2) + assert result == raw_seqrun1.samplesheet_id + result = \ + fetch_samplesheet_id_for_seqrun('run2') + # self.assertIsNone(result) + assert result is None - def test_fetch_override_cycle_for_seqrun(self): - result = \ - fetch_override_cycle_for_seqrun('run1') - self.assertEqual(result, 'Y100;I8;I8;Y100') - result = \ - fetch_override_cycle_for_seqrun('run2') - self.assertIsNone(result) +def test_fetch_override_cycle_for_seqrun(db): + # def test_fetch_override_cycle_for_seqrun(self): + samplesheet1 = \ + SampleSheetModel( + samplesheet_tag='samplesheet1', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now()) + samplesheet2 = \ + SampleSheetModel( + samplesheet_tag='samplesheet2', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now(),) + raw_seqrun1 = \ + RawSeqrun( + raw_seqrun_igf_id='run1', + samplesheet_id=samplesheet2.samplesheet_id, + override_cycles='Y100;I8;I8;Y100') + raw_seqrun2 = \ + RawSeqrun( + raw_seqrun_igf_id='run2') + try: + db.session.add(samplesheet1) + db.session.flush() + db.session.add(samplesheet2) + db.session.flush() + db.session.add(raw_seqrun1) + db.session.flush() + db.session.add(raw_seqrun2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + result = \ + fetch_override_cycle_for_seqrun('run1') + # self.assertEqual(result, 'Y100;I8;I8;Y100') + assert result == 'Y100;I8;I8;Y100' + result = \ + fetch_override_cycle_for_seqrun('run2') + # self.assertIsNone(result) + assert result is None - def test_fetch_samplesheet_for_seqrun(self): - result = \ - fetch_samplesheet_for_seqrun('run1') +def test_fetch_samplesheet_for_seqrun(db): + # def test_fetch_samplesheet_for_seqrun(self): + samplesheet1 = \ + SampleSheetModel( + samplesheet_tag='samplesheet1', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now()) + samplesheet2 = \ + SampleSheetModel( + samplesheet_tag='samplesheet2', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now(),) + raw_seqrun1 = \ + RawSeqrun( + raw_seqrun_igf_id='run1', + samplesheet=samplesheet2, + override_cycles='Y100;I8;I8;Y100') + raw_seqrun2 = \ + RawSeqrun( + raw_seqrun_igf_id='run2') + try: + db.session.add(samplesheet1) + db.session.add(samplesheet2) + db.session.add(raw_seqrun1) + db.session.add(raw_seqrun2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + result = \ + fetch_samplesheet_for_seqrun(raw_seqrun1.raw_seqrun_igf_id) #print(db.session.query(SampleSheetModel.validation_time, SampleSheetModel.update_time).filter(SampleSheetModel.samplesheet_tag=='samplesheet2').all()) - self.assertEqual(result.samplesheet_tag, 'samplesheet2') - self.assertEqual(result.csv_data, 'data') - result = \ - fetch_samplesheet_for_seqrun('run2') - self.assertIsNone(result) + # self.assertEqual(result.samplesheet_tag, 'samplesheet2') + # self.assertEqual(result.csv_data, 'data') + assert result is not None + assert result.samplesheet_tag == samplesheet2.samplesheet_tag + assert result.csv_data == samplesheet2.csv_data + result = \ + fetch_samplesheet_for_seqrun('run2') + # self.assertIsNone(result) + assert result is None - def test_check_and_add_new_raw_seqrun(self): - seqrun_id_list = ['run1', 'run2', 'run 3'] - check_and_add_new_raw_seqrun( - seqrun_id_list=seqrun_id_list) - results = \ - db.session.\ - query(RawSeqrun.raw_seqrun_igf_id).\ - all() - self.assertEqual(len(results), 3) - results = [s[0] for s in results] - self.assertIn('run1', results) - self.assertIn('run_3', results) +def test_check_and_add_new_raw_seqrun(db): + # def test_check_and_add_new_raw_seqrun(self): + samplesheet1 = \ + SampleSheetModel( + samplesheet_tag='samplesheet1', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now()) + samplesheet2 = \ + SampleSheetModel( + samplesheet_tag='samplesheet2', + csv_data='data', + status='PASS', + update_time=datetime.now(), + validation_time=datetime.now(),) + raw_seqrun1 = \ + RawSeqrun( + raw_seqrun_igf_id='run1', + samplesheet_id=samplesheet2.samplesheet_id, + override_cycles='Y100;I8;I8;Y100') + raw_seqrun2 = \ + RawSeqrun( + raw_seqrun_igf_id='run2') + try: + db.session.add(samplesheet1) + db.session.flush() + db.session.add(samplesheet2) + db.session.flush() + db.session.add(raw_seqrun1) + db.session.flush() + db.session.add(raw_seqrun2) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + seqrun_id_list = ['run1', 'run2', 'run 3'] + check_and_add_new_raw_seqrun( + seqrun_id_list=seqrun_id_list) + results = \ + db.session.\ + query(RawSeqrun.raw_seqrun_igf_id).\ + all() + # self.assertEqual(len(results), 3) + assert len(results) == 3 + results = [s[0] for s in results] + # self.assertIn('run1', results) + assert 'run1' in results + # self.assertIn('run_3', results) + assert 'run_3' in results -if __name__ == '__main__': - unittest.main() \ No newline at end of file +# if __name__ == '__main__': +# unittest.main() \ No newline at end of file diff --git a/tests/test_rawseqrun_view.py b/tests/test_rawseqrun_view.py new file mode 100644 index 0000000..77e2be3 --- /dev/null +++ b/tests/test_rawseqrun_view.py @@ -0,0 +1,99 @@ +import os +import requests +from unittest.mock import MagicMock +from unittest.mock import patch +from datetime import datetime +from app.raw_seqrun_view import samplesheet_query +from app.models import RawSeqrun, SampleSheetModel +from app.raw_seqrun_view import update_trigger_date_for_seqrun +from app.raw_seqrun_view import async_trigger_airflow_pipeline + +def test_samplesheet_query(db): + samplesheets = samplesheet_query() + assert len(samplesheets) == 0 + try: + sa1 = \ + SampleSheetModel( + samplesheet_tag='test1', + csv_data='test data', + status='PASS', + validation_time=datetime.now(), + update_time=datetime.now()) + db.session.add(sa1) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + samplesheets = samplesheet_query() + assert len(samplesheets) == 1 + assert samplesheets[0].samplesheet_tag == 'test1' + assert samplesheets[0].status == 'PASS' + + +def test_update_trigger_date_for_seqrun(db): + try: + seqrun = \ + RawSeqrun(raw_seqrun_igf_id='seqrun2') + db.session.add(seqrun) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + seqrun = \ + db.session.\ + query(RawSeqrun).\ + filter(RawSeqrun.raw_seqrun_igf_id=='seqrun2').\ + one_or_none() + assert seqrun.raw_seqrun_igf_id == 'seqrun2' + assert seqrun.trigger_time is None + update_trigger_date_for_seqrun(seqrun_id='seqrun2') + seqrun = \ + db.session.\ + query(RawSeqrun).\ + filter(RawSeqrun.raw_seqrun_igf_id=='seqrun2').\ + one_or_none() + assert seqrun.raw_seqrun_igf_id == 'seqrun2' + assert seqrun.trigger_time is not None + +@patch('app.raw_seqrun_view.trigger_airflow_pipeline', return_value=requests.patch('https://httpbin.org/patch', data ={'key': 'value'}, headers={'Content-Type': 'application/json'})) +def test_async_trigger_airflow_pipeline(mock_object, db): + try: + seqrun = \ + RawSeqrun(raw_seqrun_igf_id='seqrun1') + db.session.add(seqrun) + db.session.flush() + db.session.commit() + except: + db.session.rollback() + raise + os.environ['AIRFLOW_CONF_FILE'] = '/tmp/' + result = async_trigger_airflow_pipeline('test_dag', [{'seqrun_id':'seqrun1'}], True) + assert 'seqrun1' in result + +#@patch('app.raw_seqrun_view.trigger_airflow_pipeline', return_value=requests.patch('https://httpbin.org/patch', data ={'key': 'value'}, headers={'Content-Type': 'application/json'})) +# def test_action_trigger_pre_demultiplexing(db, test_client): +# try: +# sa1 = \ +# SampleSheetModel( +# samplesheet_tag='test1', +# csv_data='test data', +# status='PASS', +# validation_time=datetime.now(), +# update_time=datetime.now()) +# seqrun = \ +# RawSeqrun(raw_seqrun_igf_id='seqrun1') +# db.session.add(seqrun) +# db.session.add(sa1) +# db.session.flush() +# db.session.commit() +# except: +# db.session.rollback() +# raise +# resp = test_client.post( +# "/rawseqrunview/action_post", +# data={'action':'trigger_pre_demultiplexing', 'rowid': [seqrun.raw_seqrun_id]} +# ) +# print(resp.__dict__) +# assert resp.status_code == 200 \ No newline at end of file diff --git a/tox.ini b/tox.ini index 1df2d93..8c94b52 100644 --- a/tox.ini +++ b/tox.ini @@ -1,9 +1,12 @@ [tox] -envlist = py37 +envlist = + py38 + +isolated_build = true [testenv] deps = -rrequirements.txt setenv = PYTHONPATH = {env:PYTHONPATH}{:}{toxinidir} commands = - pytest \ No newline at end of file + pytest --cov=app --log-level=ERROR tests