diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 000000000..3df43dc6d --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,20 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node/.devcontainer/base.Dockerfile + +ARG VARIANT="18-bullseye" +FROM mcr.microsoft.com/vscode/devcontainers/javascript-node:0-${VARIANT} +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + python3 \ + git \ + make \ + g++ \ + libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libgif-dev \ + librsvg2-dev \ + && npm install -g @nrwl/cli commitizen \ + && git config --global pull.rebase true \ + && npx husky install +# && su node -c "git config --global pull.rebase true" \ +# && su node -c "npm install -g @nrwl/cli commitizen" \ No newline at end of file diff --git a/.devcontainer/README.md b/.devcontainer/README.md new file mode 100644 index 000000000..533e42f59 --- /dev/null +++ b/.devcontainer/README.md @@ -0,0 +1,7 @@ +### BioSimulations Containerized Development Environment using VS Code Dev containers. + +#### _Getting Started_: +1. Open this repo in VS Code +2. Open the command palatte (CMD + SHIFT + P) +3. Type and select: `Dev Containers: Rebuild and Reopen in Container`. A new window will open. +4. Once the dev container builds successfully, open a new terminal window in the container and run: `npx nx run platform:serve --host 0.0.0.0 --port 4200`. The dev content will be available at `http://localhost:4200`. \ No newline at end of file diff --git a/.devcontainer/api.devcontainer/Dockerfile b/.devcontainer/api.devcontainer/Dockerfile new file mode 100644 index 000000000..8006bced7 --- /dev/null +++ b/.devcontainer/api.devcontainer/Dockerfile @@ -0,0 +1,22 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node/.devcontainer/base.Dockerfile + +ARG VARIANT="18-bullseye" +FROM mcr.microsoft.com/vscode/devcontainers/javascript-node:0-${VARIANT} +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + python3 \ + git \ + make \ + g++ \ + libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libgif-dev \ + librsvg2-dev \ + && npm install -g @nrwl/cli commitizen \ + && git config --global pull.rebase true + +RUN npx husky install +# && su node -c "git config --global pull.rebase true" \ +# && su node -c "npm install -g @nrwl/cli commitizen" +# ENTRYPOINT ["npx", "nx", "run", "platform:serve", "--host", "0.0.0.0", "--port", "4200"] \ No newline at end of file diff --git a/.devcontainer/api.devcontainer/devcontainer.json b/.devcontainer/api.devcontainer/devcontainer.json new file mode 100644 index 000000000..21ec5ab9b --- /dev/null +++ b/.devcontainer/api.devcontainer/devcontainer.json @@ -0,0 +1,40 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node +{ + "name": "Biosimulations", + "dockerComposeFile": "../docker-compose.yml", + "service": "api", + "workspaceFolder": "/workspace", + + // Set *default* container specific settings.json values on container create. + "settings": {}, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "mikael.angular-beastcode", + "angular.ng-template", + "aaron-bond.better-comments", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "nrwl.angular-console", + "msjsdiag.debugger-for-chrome", + "firefox-devtools.vscode-firefox-debug", + "esbenp.prettier-vscode", + "dbaeumer.vscode-eslint", + "firsttris.vscode-jest-runner", + "eamodio.gitlens", + "github.copilot" + ], + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [4444], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "npm install" + + // check ports: netstat -tuln + + // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "alexanderpatrie" + + // once container is built run: npx nx run platform:serve --host 0.0.0.0 --port 4200 +} diff --git a/.devcontainer/combine-api.devcontainer/Dockerfile b/.devcontainer/combine-api.devcontainer/Dockerfile new file mode 100644 index 000000000..90c2066a6 --- /dev/null +++ b/.devcontainer/combine-api.devcontainer/Dockerfile @@ -0,0 +1,372 @@ +############# +### base ### +############# +FROM python:3.10-buster as base + +WORKDIR /app + +################################### +# update apt package database +RUN apt-get update -y + +################################### +# AMICI +RUN apt-get install --no-install-recommends -y \ + g++ \ + libatlas-base-dev \ + swig + +################################### +# BioNetGen +ARG BIONETGEN_VERSION=2.8.0 +RUN apt-get install -y --no-install-recommends \ + perl \ + tar \ + wget \ + \ + && cd /tmp \ + && wget https://github.com/RuleWorld/bionetgen/releases/download/BioNetGen-${BIONETGEN_VERSION}/BioNetGen-${BIONETGEN_VERSION}-linux.tgz \ + && tar xvvf BioNetGen-${BIONETGEN_VERSION}-linux.tgz \ + && mv BioNetGen-${BIONETGEN_VERSION}/ /opt/ \ + \ + && rm BioNetGen-${BIONETGEN_VERSION}-linux.tgz +ENV PATH=${PATH}:/opt/BioNetGen-${BIONETGEN_VERSION}/ + +################################### +# BoolNet +#ARG BOOLNET_VERSION=2.1.8 +RUN apt-get install -y --no-install-recommends \ + r-base-dev \ + build-essential \ + gcc \ + gfortran \ + libblas-dev \ + libcurl4-openssl-dev \ + libgit2-dev \ + liblapack-dev \ + libssl-dev \ + libxml2 \ + libxml2-dev \ + \ + && Rscript \ + -e "install.packages('BoolNet')" \ + -e "require('BoolNet')" + +################################### +# CBMPy +RUN apt-get install -y --no-install-recommends \ + gcc \ + libglpk-dev + +################################### +# COBRAPY +RUN mkdir -p /.cache/cobrapy \ + && chmod ugo+rw /.cache/cobrapy + +################################### +# COPASI + +################################### +# GillesPy2 +RUN apt-get install -y --no-install-recommends \ + build-essential + +################################### +# GINsim: Set up path; needed up installation of GINsim Python package is fixed +RUN mkdir -p /usr/share/man/man1/ \ + && apt-get install -y --no-install-recommends \ + default-jre \ + wget \ + \ + && cd /tmp \ + && pip install ginsim \ + && wget https://raw.githubusercontent.com/GINsim/GINsim-python/master/ginsim_setup.py \ + && python ginsim_setup.py \ + \ + && rm ginsim_setup.py + +################################### +# libSBMLSIM +ARG LSBMLSIM_VERSION=1.4.0 +ARG LIBSBML_VERSION=5.19.0 + +# libSBML +RUN apt-get install --no-install-recommends -y \ + wget \ + libxml2 \ + \ + && wget https://master.dl.sourceforge.net/project/sbml/libsbml/${LIBSBML_VERSION}/stable/Linux/64-bit/libSBML-${LIBSBML_VERSION}-Linux-x64.deb \ + && dpkg -i libSBML-${LIBSBML_VERSION}-Linux-x64.deb \ + \ + && rm libSBML-${LIBSBML_VERSION}-Linux-x64.deb +ENV LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH + +# LibSBMLSim +RUN apt-get install --no-install-recommends -y \ + wget \ + build-essential \ + cmake \ + swig \ + libbz2-dev \ + \ + && PY_EXEC_PATH=$(which python) \ + && PY_PREFIX=$(echo ${PY_EXEC_PATH} | rev | cut -d "/" -f 3- | rev) \ + && cd /tmp \ + && wget https://github.com/libsbmlsim/libsbmlsim/archive/refs/tags/v${LSBMLSIM_VERSION}.tar.gz \ + && tar xvvf v${LSBMLSIM_VERSION}.tar.gz \ + && cd libsbmlsim-${LSBMLSIM_VERSION} \ + && mkdir build \ + && cd build \ + && PYTHON_MAJOR_MINOR_VERSION=$(echo $PYTHON_VERSION | cut -d . -f 1-2) \ + && cmake \ + -D LIBSBML_INCLUDE_DIR=/usr/include \ + -D LIBSBML_LIBRARY=/usr/lib64/libsbml.so \ + -D WITH_PYTHON=ON \ + -D PYTHON_PREFIX=${PY_PREFIX} \ + -D PYTHON_EXECUTABLE:FILEPATH=${PY_EXEC_PATH} \ + -D PYTHON_INCLUDE_DIR:PATH=/usr/local/include/python${PYTHON_MAJOR_MINOR_VERSION} \ + -D PYTHON_LIBRARY:FILEPATH=/usr/local/lib/libpython${PYTHON_MAJOR_MINOR_VERSION}.so \ + .. \ + \ + && make \ + && make install \ + \ + && cd /tmp \ + && rm v${LSBMLSIM_VERSION}.tar.gz \ + && rm -r libsbmlsim-${LSBMLSIM_VERSION} + +################################### +# MASSpy +RUN apt-get install -y --no-install-recommends \ + git \ + gcc \ + build-essential \ + libfreetype6-dev \ + libfreetype6 \ + pkg-config \ + \ + && mkdir -p /.cache/cobrapy + +################################### +# NetPyNe +RUN apt-get install -y --no-install-recommends \ + gcc \ + g++ \ + make \ + libmpich-dev \ + mpi \ + mpi-default-bin \ + mpich + +################################### +# NEURON +RUN apt-get install -y --no-install-recommends \ + gcc \ + g++ \ + make +RUN pip install neuron +ENV NEURON_HOME=/usr/local + +################################### +# pyNeuroML, LEMS +RUN mkdir -p /usr/share/man/man1/ \ + && apt-get install -y --no-install-recommends \ + default-jre + +################################### +# PySCeS + +# SUNDIALS +# ignore certificate checking because certificate was expired as of 2021-11-10 +ARG SUNDIALS_VERSION=2.6.2 +RUN apt-get install -y --no-install-recommends \ + wget \ + cmake \ + make \ + g++ \ + \ + && cd /tmp \ + && wget --no-check-certificate https://computing.llnl.gov/sites/default/files/inline-files/sundials-${SUNDIALS_VERSION}.tar.gz \ + && tar xvvf sundials-${SUNDIALS_VERSION}.tar.gz \ + && cd sundials-${SUNDIALS_VERSION} \ + && mkdir build \ + && cd build \ + && cmake .. \ + && make \ + && make install \ + \ + && cd /tmp \ + && rm sundials-${SUNDIALS_VERSION}.tar.gz \ + && rm -r sundials-${SUNDIALS_VERSION} +ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH + +# Assimulo +RUN apt-get install -y --no-install-recommends \ + g++ \ + gfortran \ + libblas-dev \ + liblapack-dev \ + git + +# PySCeS +RUN apt-get install -y --no-install-recommends \ + git \ + gcc \ + gfortran \ + libgfortran5 + +# Configure PySCeS +COPY Dockerfile-assets/.pys_usercfg.ini /Pysces/.pys_usercfg.ini +COPY Dockerfile-assets/.pys_usercfg.ini /root/Pysces/.pys_usercfg.ini +RUN mkdir -p /Pysces \ + && mkdir -p /Pysces/psc \ + && mkdir -p /root/Pysces \ + && mkdir -p /root/Pysces/psc \ + && chmod ugo+rw -R /Pysces + +################################### +# RBApy +RUN apt-get install -y --no-install-recommends \ + gcc \ + libglpk-dev + +################################### +# Smoldyn + +################################### +# tellurium +RUN apt-get install -y --no-install-recommends \ + libxml2 \ + libncurses5 +ENV PLOTTING_ENGINE=matplotlib \ + PYTHONWARNINGS="ignore:The 'warn' parameter of use():UserWarning:tellurium.tellurium,ignore:Matplotlib is currently using agg:UserWarning:tellurium.plotting.engine_mpl" + +################################### +# XPP +RUN apt-get install -y --no-install-recommends \ + wget \ + make \ + gcc \ + libx11-dev \ + libc6-dev \ + libx11-6 \ + libc6 \ + \ + && cd /tmp \ + && wget https://web.archive.org/web/20210425172021/http://www.math.pitt.edu/~bard/bardware/xppaut_latest.tar.gz \ + && mkdir xpp \ + && tar zxvf xppaut_latest.tar.gz --directory xpp \ + && cd xpp \ + && make \ + && make install \ + \ + && cd /tmp \ + && rm xppaut_latest.tar.gz \ + && rm -r xpp + +################################### +# setup headless for NEURON, Smoldyn +RUN apt-get -y update \ + \ + && apt-get install --no-install-recommends -y \ + xvfb \ + && mkdir /tmp/.X11-unix \ + && chmod 1777 /tmp/.X11-unix \ + \ + && rm -rf /var/lib/apt/lists/* +COPY Dockerfile-assets/xvfb-startup.sh /xvfb-startup.sh +ENV XVFB_RES="1920x1080x24" \ + XVFB_ARGS="" + +################################### +# fonts for matplotlib +RUN apt-get update -y \ + && apt-get install -y --no-install-recommends libfreetype6 \ + && rm -rf /var/lib/apt/lists/* + +# configure matplotlib cache and config to avoid warnings +RUN mkdir -p /.cache/matplotlib \ + && mkdir -p /.config/matplotlib \ + && chmod ugo+rw /.config/matplotlib \ + && chmod ugo+rw /.cache/matplotlib + +# Configure default simulator options +ENV ALGORITHM_SUBSTITUTION_POLICY=SIMILAR_VARIABLES \ + VERBOSE=0 \ + MPLBACKEND=PDF \ + PLOTTING_ENGINE=matplotlib + +################################### +# setup python, ports + +# install poetry +ENV PATH="/opt/poetry/bin:$PATH" +ENV POETRY_HOME=/opt/poetry +RUN curl -sSL https://install.python-poetry.org | python3 - \ + && poetry config cache-dir "/poetry/.cache" --local \ + && chmod 755 poetry.toml + +# Copy over dependency list +# COPY ./poetry.lock /app/poetry.lock +COPY ./pyproject.toml /app/pyproject.toml +COPY ./combine_api/ /app/combine_api/ +COPY ./vendor/ /app/vendor/ +COPY ./tests/ /app/tests/ +COPY ./pytest.ini /app/pytest.ini +COPY ./README.md /app/README.md + +RUN chmod 755 poetry.toml \ + && poetry install --no-cache + +# +## set up matplotlib font manager +RUN poetry run python -c "import matplotlib.font_manager" + +## install assimulo because pipenv fails to install it +#ARG ASSIMULO_VERSION=3.2.9 +#RUN pip install git+https://github.com/modelon-community/Assimulo.git@Assimulo-${ASSIMULO_VERSION} + +RUN poetry run python -m compileall /app/combine_api/ \ + && PY_EXEC_PATH=$(which python) \ + && PY_PREFIX=$(echo ${PY_EXEC_PATH} | rev | cut -d "/" -f 3- | rev) \ + && PYTHON_MAJOR_MINOR_VERSION=$(echo $PYTHON_VERSION | cut -d . -f 1-2) \ + && poetry run python -m compileall ${PY_PREFIX}/lib/python${PYTHON_MAJOR_MINOR_VERSION}/site-packages \ +# && PYTHONPATH=/app python -c "from combine_api.handlers.run.utils import write_simulator_specs_cache; write_simulator_specs_cache();" \ + || exit 0 + +# in place testing - must provide env vars for S3 testing +RUN poetry run python3 -m pytest tests + +EXPOSE 3333 +CMD /bin/bash /xvfb-startup.sh \ + poetry run gunicorn \ + --bind 0.0.0.0:3333 combine_api.app:app \ + --workers 2 \ + --threads 4 \ + --worker-class gthread \ + --max-requests 1000 \ + --timeout 30 \ + --keep-alive 5 \ + --worker-tmp-dir /dev/shm \ + --log-level debug \ + --log-file=- + +############# +### build ### +############# +FROM base as build + +ARG app +ENV APP=$app + +RUN echo building ${app} + +LABEL \ + org.opencontainers.image.title="BioSimulations ${app}" \ + org.opencontainers.image.description="Docker image for BioSimulations ${app}" \ + org.opencontainers.image.url="https://biosimulations.org/" \ + org.opencontainers.image.documentation="https://docs.biosimulations.org/" \ + org.opencontainers.image.source="https://github.com/biosimulations/biosimulations" \ + org.opencontainers.image.authors="BioSimulations Team " \ + org.opencontainers.image.vendor="BioSimulations Team" \ + org.opencontainers.image.licenses="MIT" diff --git a/.devcontainer/combine-api.devcontainer/devcontainer.json b/.devcontainer/combine-api.devcontainer/devcontainer.json new file mode 100644 index 000000000..94dd81fe2 --- /dev/null +++ b/.devcontainer/combine-api.devcontainer/devcontainer.json @@ -0,0 +1,40 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node +{ + "name": "Biosimulations", + "dockerComposeFile": "../docker-compose.yml", + "service": "combine-api", + "workspaceFolder": "/workspace", + + // Set *default* container specific settings.json values on container create. + "settings": {}, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "mikael.angular-beastcode", + "angular.ng-template", + "aaron-bond.better-comments", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "nrwl.angular-console", + "msjsdiag.debugger-for-chrome", + "firefox-devtools.vscode-firefox-debug", + "esbenp.prettier-vscode", + "dbaeumer.vscode-eslint", + "firsttris.vscode-jest-runner", + "eamodio.gitlens", + "github.copilot" + ], + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [4222, 3333, 4200], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "npm install", + + // check ports: netstat -tuln + + // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "node" + + // once container is built run: npx nx run platform:serve --host 0.0.0.0 --port 4200 +} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..1e503c952 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,37 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node +{ + "name": "Biosimulations", + "dockerComposeFile": "docker-compose.yml", + "service": "app", + "workspaceFolder": "/workspace", + + // Set *default* container specific settings.json values on container create. + "settings": {}, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "mikael.angular-beastcode", + "angular.ng-template", + "aaron-bond.better-comments", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "nrwl.angular-console", + "msjsdiag.debugger-for-chrome", + "firefox-devtools.vscode-firefox-debug", + "esbenp.prettier-vscode", + "dbaeumer.vscode-eslint", + "firsttris.vscode-jest-runner", + "eamodio.gitlens", + "github.copilot" + ], + + // define the ports that need to be available in the container, as per the docker-compose.yml file (api, dispatch-service, redis, nats, platform, etc) + "forwardPorts": [4444, 7777, 4200, 6379, 4222], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "npm install && npm install --platform=linux --arch=x64 sharp && npm rebuild sharp --platform=linux --arch=x64" + + // check ports: netstat -tuln + // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "alexanderpatrie" +} diff --git a/.devcontainer/dispatch-service.devcontainer/Dockerfile b/.devcontainer/dispatch-service.devcontainer/Dockerfile new file mode 100644 index 000000000..8006bced7 --- /dev/null +++ b/.devcontainer/dispatch-service.devcontainer/Dockerfile @@ -0,0 +1,22 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node/.devcontainer/base.Dockerfile + +ARG VARIANT="18-bullseye" +FROM mcr.microsoft.com/vscode/devcontainers/javascript-node:0-${VARIANT} +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + python3 \ + git \ + make \ + g++ \ + libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libgif-dev \ + librsvg2-dev \ + && npm install -g @nrwl/cli commitizen \ + && git config --global pull.rebase true + +RUN npx husky install +# && su node -c "git config --global pull.rebase true" \ +# && su node -c "npm install -g @nrwl/cli commitizen" +# ENTRYPOINT ["npx", "nx", "run", "platform:serve", "--host", "0.0.0.0", "--port", "4200"] \ No newline at end of file diff --git a/.devcontainer/dispatch-service.devcontainer/devcontainer.json b/.devcontainer/dispatch-service.devcontainer/devcontainer.json new file mode 100644 index 000000000..89749af84 --- /dev/null +++ b/.devcontainer/dispatch-service.devcontainer/devcontainer.json @@ -0,0 +1,40 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node +{ + "name": "Biosimulations", + "dockerComposeFile": "../docker-compose.yml", + "service": "dispatch-service", + "workspaceFolder": "/workspace", + + // Set *default* container specific settings.json values on container create. + "settings": {}, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "mikael.angular-beastcode", + "angular.ng-template", + "aaron-bond.better-comments", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "nrwl.angular-console", + "msjsdiag.debugger-for-chrome", + "firefox-devtools.vscode-firefox-debug", + "esbenp.prettier-vscode", + "dbaeumer.vscode-eslint", + "firsttris.vscode-jest-runner", + "eamodio.gitlens", + "github.copilot" + ], + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [7777], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "npm install" + + // check ports: netstat -tuln + + // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "alexanderpatrie" + + // once container is built run: npx nx run platform:serve --host 0.0.0.0 --port 4200 +} diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 000000000..b89c48890 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,118 @@ +networks: + biosimulations-dev-net: + driver: bridge + +services: + # -- external services -- + nats: + image: nats:latest + restart: unless-stopped + ports: + - "4222:4222" + networks: + - biosimulations-dev-net + + redis-primary: + image: redis:latest + restart: unless-stopped + ports: + - "6379:6379" + networks: + - biosimulations-dev-net + + # mongo: + # image: mongo:latest + # restart: unless-stopped + # ports: + # - "27017:27017" + # networks: + # - biosimulations-dev-net + + # -- app service -- + app: + platform: linux/amd64 + build: + context: . + dockerfile: ./Dockerfile # ./api.devcontainer/Dockerfile + args: + VARIANT: 18-bullseye + USER_UID: 1000 # On Linux, you may need to update USER_UID and USER_GID below if not your local UID is not 1000. + USER_GID: 1000 + volumes: + - ..:/workspace:cached + user: "504" + command: sleep infinity # Overrides default command so things don't shut down after the process ends. + networks: + - biosimulations-dev-net + depends_on: + - nats + - redis-primary + # - mongo + env_file: + - ../config/config.env + + # -- multi-container strategy TODO: uncomment this -- + # api: + # platform: linux/amd64 + # build: + # context: . + # dockerfile: ./Dockerfile # ./api.devcontainer/Dockerfile + # args: + # VARIANT: 18-bullseye + # USER_UID: 1000 # On Linux, you may need to update USER_UID and USER_GID below if not your local UID is not 1000. + # USER_GID: 1000 + # volumes: + # - ..:/workspace # :cached + # ports: + # - "4444:4444" + # user: "504" + # command: sleep infinity # Overrides default command so things don't shut down after the process ends. + # networks: + # - biosimulations-dev-net + # depends_on: + # - nats + # - redis-primary + # # - mongo + # env_file: + # - ../config/config.env + # dispatch-service: + # platform: linux/amd64 + # build: + # context: . + # dockerfile: ./Dockerfile # ./dispatch-service.devcontainer/Dockerfile + # args: + # VARIANT: 18-bullseye + # USER_UID: 1000 + # USER_GID: 1000 + # volumes: + # - ..:/workspace # cached + # user: "504" + # command: sleep infinity + # networks: + # - biosimulations-dev-net + # depends_on: + # - nats + # - redis-primary + # # - mongo + # env_file: + # - ../config/config.env + # platform: + # platform: linux/amd64 + # build: + # context: . + # dockerfile: ./Dockerfile # ./platform.devcontainer/Dockerfile + # args: + # VARIANT: 18-bullseye + # USER_UID: 1000 + # USER_GID: 1000 + # volumes: + # - ..:/workspace # :cached + # user: "504" + # command: sleep infinity + # networks: + # - biosimulations-dev-net + # depends_on: + # - nats + # - redis-primary + # # - mongo + \ No newline at end of file diff --git a/.devcontainer/platform.devcontainer/Dockerfile b/.devcontainer/platform.devcontainer/Dockerfile new file mode 100644 index 000000000..8006bced7 --- /dev/null +++ b/.devcontainer/platform.devcontainer/Dockerfile @@ -0,0 +1,22 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node/.devcontainer/base.Dockerfile + +ARG VARIANT="18-bullseye" +FROM mcr.microsoft.com/vscode/devcontainers/javascript-node:0-${VARIANT} +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + python3 \ + git \ + make \ + g++ \ + libcairo2-dev \ + libpango1.0-dev \ + libjpeg-dev \ + libgif-dev \ + librsvg2-dev \ + && npm install -g @nrwl/cli commitizen \ + && git config --global pull.rebase true + +RUN npx husky install +# && su node -c "git config --global pull.rebase true" \ +# && su node -c "npm install -g @nrwl/cli commitizen" +# ENTRYPOINT ["npx", "nx", "run", "platform:serve", "--host", "0.0.0.0", "--port", "4200"] \ No newline at end of file diff --git a/.devcontainer/platform.devcontainer/devcontainer.json b/.devcontainer/platform.devcontainer/devcontainer.json new file mode 100644 index 000000000..79b32b2ea --- /dev/null +++ b/.devcontainer/platform.devcontainer/devcontainer.json @@ -0,0 +1,40 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.192.0/containers/javascript-node +{ + "name": "Biosimulations", + "dockerComposeFile": "../docker-compose.yml", + "service": "platform", + "workspaceFolder": "/workspace", + + // Set *default* container specific settings.json values on container create. + "settings": {}, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "mikael.angular-beastcode", + "angular.ng-template", + "aaron-bond.better-comments", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "nrwl.angular-console", + "msjsdiag.debugger-for-chrome", + "firefox-devtools.vscode-firefox-debug", + "esbenp.prettier-vscode", + "dbaeumer.vscode-eslint", + "firsttris.vscode-jest-runner", + "eamodio.gitlens", + "github.copilot" + ], + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + "forwardPorts": [4200], + + // Use 'postCreateCommand' to run commands after the container is created. + "postCreateCommand": "npm install" + + // check ports: netstat -tuln + + // Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + // "remoteUser": "alexanderpatrie" + + // once container is built run: npx nx run platform:serve --host 0.0.0.0 --port 4200 +} diff --git a/.gitignore b/.gitignore index 86d6a2316..e3f0da534 100644 --- a/.gitignore +++ b/.gitignore @@ -23,5 +23,5 @@ poetry.lock *_parsetab.py condaenv.n_dn60fa.requirements.txt worker/.worker_STABLE -build +!worker/datagen_build .DS_Store diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 000000000..7ef4a1341 --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,16 @@ +{ + "recommendations": [ + "mikael.angular-beastcode", + "angular.ng-template", + "aaron-bond.better-comments", + "ms-kubernetes-tools.vscode-kubernetes-tools", + "nrwl.angular-console", + "msjsdiag.debugger-for-chrome", + "firefox-devtools.vscode-firefox-debug", + "esbenp.prettier-vscode", + "dbaeumer.vscode-eslint", + "firsttris.vscode-jest-runner", + "eamodio.gitlens", + "github.copilot" + ] +} diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..b72aa8184 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,55 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "Serve API", + "runtimeExecutable": "npx", + "runtimeArgs": ["nx", "run", "api:serve"], + "skipFiles": ["/**"], + "port": 9229, // ensure Nx is configured to open this port for debugging + "console": "integratedTerminal" + // "internalConsoleOptions": "neverOpen" + }, + { + "type": "node", + "request": "launch", + "name": "Serve Dispatch-Service", + "runtimeExecutable": "npx", + "runtimeArgs": ["nx", "run", "dispatch-service:serve"], + "skipFiles": ["/**"], + "port": 9229, + "console": "integratedTerminal" + // "internalConsoleOptions": "neverOpen" + }, + { + "type": "node", + "request": "launch", + "name": "Serve Platform", + "runtimeExecutable": "npx", + "runtimeArgs": ["nx", "run", "platform:serve"], + "skipFiles": ["/**"], + "port": 9229, + "console": "integratedTerminal" + // "internalConsoleOptions": "neverOpen" + }, + { + "type": "node", + "request": "attach", + "name": "Attach to Nx", + "port": 9229, + "address": "localhost", + "restart": true, + "protocol": "inspector", + "skipFiles": ["/**"], + "outFiles": ["${workspaceFolder}/dist/**/*.js"] + } + ], + "compounds": [ + { + "name": "Full Stack Debug", + "configurations": ["Serve API", "Serve Dispatch-Service", "Serve Platform"] + } + ] +} diff --git a/test_fixtures/sbml-core/archive.omex b/test_fixtures/sbml-core/archive.omex new file mode 100644 index 000000000..a4daa2778 Binary files /dev/null and b/test_fixtures/sbml-core/archive.omex differ diff --git a/worker/Dockerfile-worker b/worker/Dockerfile-worker index 50c0e14f4..a1285f5fa 100644 --- a/worker/Dockerfile-worker +++ b/worker/Dockerfile-worker @@ -8,7 +8,8 @@ FROM ghcr.io/biosimulators/bio-compose-server-base:0.0.1-test SHELL ["/usr/bin/env", "bash", "-c"] -COPY . . +COPY ./service ./service +COPY ./datagen_build ./datagen_build ENV MONGO_URI=mongodb://mongodb/service_requests diff --git a/worker/data_generator.py b/worker/datagen_src/data_generator.py similarity index 100% rename from worker/data_generator.py rename to worker/datagen_src/data_generator.py diff --git a/worker/datagen_src/data_generator.pyx b/worker/datagen_src/data_generator.pyx new file mode 100644 index 000000000..7489609eb --- /dev/null +++ b/worker/datagen_src/data_generator.pyx @@ -0,0 +1,1276 @@ +import logging +from importlib import import_module +from tempfile import mkdtemp +import uuid +from pprint import pformat +from typing import Dict, List, Tuple, Any +from abc import abstractmethod +from logging import warn, Logger +from uuid import uuid4 + +import libsbml +from biosimulators_utils.config import Config +from kisao import AlgorithmSubstitutionPolicy +from process_bigraph import Step, Process +from process_bigraph.composite import Emitter, ProcessTypes, Composite +from pymongo import ASCENDING, MongoClient +from pymongo.database import Database +from simulariumio import InputFileData, UnitData, DisplayData, DISPLAY_TYPE +from simulariumio.smoldyn import SmoldynData + +# TODO: make other content for this +# from log_config import setup_logging +# from shared_worker import handle_exception +# from compatible import COMPATIBLE_UTC_SIMULATORS +# from io_worker import normalize_smoldyn_output_path_in_root, get_sbml_species_mapping, read_report_outputs, read_h5_reports, make_dir +# from simularium_utils import calculate_agent_radius, translate_data_object, write_simularium_file +# from data_model import BiosimulationsRunOutputData + +# logging TODO: implement this. +logger: Logger = logging.getLogger("biochecknet.worker.data_generator.log") +setup_logging(logger) + +AMICI_ENABLED = True +COPASI_ENABLED = True +PYSCES_ENABLED = True +TELLURIUM_ENABLED = True +SMOLDYN_ENABLED = True +READDY_ENABLED = True + +try: + from amici import SbmlImporter, import_model_module, Model, runAmiciSimulation +except ImportError as e: + AMICI_ENABLED = False + logger.warning(str(e)) +try: + from basico import * +except ImportError as e: + COPASI_ENABLED = False + logger.warning(str(e)) +try: + import tellurium as te +except ImportError as e: + TELLURIUM_ENABLED = False + logger.warning(str(e)) +try: + from smoldyn import Simulation + from smoldyn._smoldyn import MolecState +except ImportError as e: + SMOLDYN_ENABLED = False + logger.warning(str(e)) +try: + import readdy +except ImportError as e: + READDY_ENABLED = False + logger.warning(str(e)) +try: + import pysces +except ImportError as e: + PYSCES_ENABLED = False + logger.warning(str(e)) + +HISTORY_INDEXES = [ + 'data.time', + [('experiment_id', ASCENDING), + ('data.time', ASCENDING), + ('_id', ASCENDING)], +] +CONFIGURATION_INDEXES = [ + 'experiment_id', +] +SECRETS_PATH = 'secrets.json' + +# TODO: Update this and finish cython implementation! + +# -- functions related to generating time course output data (for verification and more) using the process-bigraph engine -- # + +cdef class NodeSpec(dict): + def __init__(self, _type: str, address: str, config: Dict[str, Any], inputs: Dict[str, List[str]], outputs: Dict[str, List[str]], name: str = None): + super().__init__() + self._type = _type + self.address = address + self.config = config + self.inputs = inputs + self.outputs = outputs + self.name = name + + +cdef def node_spec(_type: str, address: str, config: Dict[str, Any], inputs: Dict[str, List[str]], outputs: Dict[str, List[str]], name: str = None) -> Dict[str, Any]: + spec = { + '_type': _type, + 'address': address, + 'config': config, + 'inputs': inputs, + 'outputs': outputs + } + + return {name: spec} if name else spec + + +def step_node_spec(address: str, config: Dict[str, Any], inputs: Dict[str, Any], outputs: Dict[str, Any], name: str = None): + return node_spec(name=name, _type="step", address=address, config=config, inputs=inputs, outputs=outputs) + + +def process_node_spec(address: str, config: Dict[str, Any], inputs: Dict[str, Any], outputs: Dict[str, Any], name: str = None): + return node_spec(name=name, _type="process", address=address, config=config, inputs=inputs, outputs=outputs) + + +def time_course_node_spec(input_file: str, context: str, start_time: int, end_time: int, num_steps: int): + config = { + 'input_file': input_file, + 'start_time': start_time, + 'end_time': end_time, + 'num_steps': num_steps, + 'context': context + } + return step_node_spec( + address='local:time-course-output-generator', + config=config, + inputs={ + 'parameters': [f'parameters_store_{context}'] + }, + outputs={ + 'output_data': [f'output_data_store_{context}'] + } + ) + + +def generate_time_course_data( + input_fp: str, + start: int, + end: int, + steps: int, + core=None, + simulators: List[str] = None, + parameters: Dict[str, Any] = None, + expected_results_fp: str = None, + out_dir: str = None +) -> Dict[str, Dict[str, List[float]]]: + requested_sims = simulators or ["amici", "copasi", "pysces", "tellurium"] + simulation_spec = { + simulator: time_course_node_spec( + input_file=input_fp, + context=simulator, + start_time=start, + end_time=end, + num_steps=steps + ) for simulator in requested_sims + } + simulation = Composite({'state': simulation_spec, 'emitter': {'mode': 'all'}}, core=core) + + input_filename = input_fp.split("/")[-1].split(".")[0] + if not out_dir: + out_dir = mkdtemp() + + if out_dir: + simulation.save( + filename=f'{input_filename}-initialization.json', + outdir=out_dir + ) + + # TODO: is there a better way to do this? (interval of one? Is that symbolic more than anything?) + if parameters: + simulation.update(parameters, 1) + else: + simulation.run(1) + + if out_dir: + simulation.save( + filename=f'{input_filename}-update.json', + outdir=out_dir + ) + + output_data = {} + raw_data = simulation.gather_results()[('emitter',)] + for data in raw_data: + for data_key, data_value in data.items(): + if data_key.startswith('output_data_store_'): + simulator = data_key.split('_')[-1] + output_data[simulator] = data_value + + # return output_data + import json + with open(f'{out_dir}/{input_filename}-update.json', 'r') as f: + state_spec = json.load(f) + + return {'output_data': output_data, 'state': state_spec} + + +def generate_composition_result_data( + state_spec: Dict[str, Any], + duration: int = None, + core: ProcessTypes = None, + out_dir: str = None +) -> Dict[str, Union[List[Dict[str, Any]], Dict[str, Any]]]: + simulation = Composite({'state': state_spec, 'emitter': {'mode': 'all'}}, core=core) + if duration is None: + duration = 10 + simulation.run(duration) + + results = simulation.gather_results()[('emitter',)] + + import json + if out_dir is None: + out_dir = mkdtemp() + with open(f'{out_dir}/update.json', 'r') as f: + state_spec = json.load(f) + + return {'results': results, 'state': state_spec} + + +# -- direct simulator API wrappers -- # + +def run_readdy( + box_size: List[float], + species_config: List[Dict[str, float]], # {SPECIES_NAME: DIFFUSION_CONSTANT} ie: {'E': 10.} + reactions_config: List[Dict[str, float]], # {REACTION_SCHEME: REACTION RATE} ie: {"fwd: E +(0.03) S -> ES": 86.551} + particles_config: List[Dict[str, Union[List[float], np.ndarray]]], # {PARTICLE_NAME: INITIAL_POSITIONS_ARRAY} ie: {'E': np.random.random(size=(n_particles_e, 3)) * edge_length - .5*edge_length} + dt: float, + duration: float, + unit_system_config: Dict[str, str] = None +) -> Dict[str, str]: + output = {} + if READDY_ENABLED: + # establish reaction network system + unit_system = unit_system_config or {"length_unit": "micrometer", "time_unit": "second"} + system = readdy.ReactionDiffusionSystem( + box_size=box_size, + unit_system=unit_system + ) + + # add species via spec + species_names = [] + for config in species_config: + species_name = config["name"] + species_difc = config["diffusion_constant"] + species_names.append(species_name) + system.add_species(species_name, diffusion_constant=float(species_difc)) + + # add reactions via spec + for config in reactions_config: + reaction_scheme = config["scheme"] + reaction_rate = config["rate"] + system.reactions.add(reaction_scheme, rate=float(reaction_rate)) + + # configure simulation outputs + simulation = system.simulation(kernel="CPU") + simulation.output_file = "out.h5" + simulation.reaction_handler = "UncontrolledApproximation" + + # set initial particle state and configure observations + for config in particles_config: + particle_name = config["name"] + particle_positions = config["initial_positions"] + if not isinstance(particle_positions, np.ndarray): + particle_positions = np.array(particle_positions) + simulation.add_particles(particle_name, particle_positions) + simulation.observe.number_of_particles( + stride=1, + types=list(set(species_names)) + ) + + # run simulation for given time parameters + n_steps = int(float(duration) / dt) + simulation.run(n_steps=n_steps, timestep=dt) + output = {"results_file": simulation.output_file} + else: + error = handle_exception("Run Readdy") + logger.error(error) + output = {'error': error} + + return output + + +# TODO: should we return the actual data from memory, or that reflected in a smoldyn output txt file or both? +def run_smoldyn(model_fp: str, duration: int, dt: float = None) -> Dict[str, Union[str, Dict[str, Union[float, List[float]]]]]: + """Run the simulation model found at `model_fp` for the duration + specified therein if output_files are specified in the smoldyn model file and return the aforementioned output file + or return a dictionary of an array of the `listmols` as well as `molcount` command outputs. NOTE: The model file is currently + searched for this `output_files` value, and if it exists and not commented out, it will scan the root of the model_fp + (usually where smoldyn output files are stored, which is the same dir as the model_fp) to retrieve the output file. + + Args: + model_fp:`str`: path to the smoldyn configuration. Defaults to `None`. + duration:`float`: duration in seconds to run the simulation for. + dt:`float`: time step in seconds to run the simulation for. Defaults to None, which uses the built-in simulation dt. + + For the output, we should read the model file and search for "output_files" to start one of the lines. + If it startswith that, then assume a return of the output txt file, if not: then assume a return from ram. + """ + # search for output_files in model_fp TODO: optimize this + use_file_output = False + with open(model_fp, 'r') as f: + model_content = [line.strip() for line in f.readlines()] + for content in model_content: + if content.startswith('output_files'): + use_file_output = True + f.close() + + output_data = {} + simulation = Simulation.fromFile(model_fp) + try: + # case: there is no declaration of output_files in the smoldyn config file, or it is commented out + if not use_file_output: + # write molcounts to counts dataset at every timestep (shape=(n_timesteps, 1+n_species <-- one for time)): [timestep, countSpec1, countSpec2, ...] + simulation.addOutputData('species_counts') + simulation.addCommand(cmd='molcount species_counts', cmd_type='E') + + # write spatial output to molecules dataset + simulation.addOutputData('molecules') + simulation.addCommand(cmd='listmols molecules', cmd_type='E') + + # run simulation for specified time + step_size = dt or simulation.dt + simulation.run(duration, step_size, overwrite=True) + + species_count = simulation.count()['species'] + species_names: List[str] = [] + for index in range(species_count): + species_name = simulation.getSpeciesName(index) + if 'empty' not in species_name.lower(): + species_names.append(species_name) + + molecule_output = simulation.getOutputData('molecules') + counts_output = simulation.getOutputData('species_counts') + for i, output_array in enumerate(counts_output): + interval_data = {} + for j, species_count in enumerate(output_array): + interval_data[species_names[j - 1]] = species_count + counts_output.pop(i) + counts_output.insert(i, interval_data) + + # return ram data (default dimensions) + output_data = {'species_counts': counts_output, 'molecules': molecule_output} + + # case: output files are specified, and thus time parameters by which to capture/collect output + else: + # run simulation with default time params + simulation.runSim() + + # change the output filename to a standardized 'modelout.txt' name + working_dir = os.path.dirname(model_fp) + results_fp = normalize_smoldyn_output_path_in_root(working_dir) + + # return output file + output_data = {'results_file': results_fp} + except: + error = handle_exception("Run Smoldyn") + logger.error(error) + output_data = {'error': error} + + return output_data + + +def handle_sbml_exception() -> str: + tb_str = traceback.format_exc() + error_message = pformat(f"time-course-simulation-error:\n{tb_str}") + return error_message + + +def run_sbml_pysces(sbml_fp: str, start: int, dur: int, steps: int) -> Dict[str, Union[List[float], str]]: + # model compilation + sbml_filename = sbml_fp.split('/')[-1] + psc_filename = sbml_filename + '.psc' + psc_fp = os.path.join(pysces.model_dir, psc_filename) + # get output with mapping of internal species ids to external (shared) species names + sbml_species_mapping = get_sbml_species_mapping(sbml_fp) + obs_names = list(sbml_species_mapping.keys()) + obs_ids = list(sbml_species_mapping.values()) + # run the simulation with specified time params and get the data + try: + # NOTE: the below model load works only in pysces 1.2.2 which is not available on conda via mac. TODO: fix this. + model = pysces.loadSBML(sbmlfile=sbml_fp, pscfile=psc_fp) + model.sim_time = np.linspace(start, dur, steps + 1) + model.Simulate(1) # specify userinit=1 to directly use model.sim_time (t) rather than the default + return { + name: model.data_sim.getSimData(obs_id)[:, 1].tolist() + for name, obs_id in sbml_species_mapping.items() + } + except: + error_message = handle_sbml_exception() + logger.error(error_message) + return {"error": error_message} + + +def run_sbml_tellurium(sbml_fp: str, start: int, dur: int, steps: int) -> Dict[str, Union[List[float], str]]: + result = None + try: + simulator = te.loadSBMLModel(sbml_fp) + if start > 0: + simulator.simulate(0, start) + result = simulator.simulate(start, dur, steps + 1) + species_mapping = get_sbml_species_mapping(sbml_fp) + if result is not None: + outputs = {} + for colname in result.colnames: + if 'time' not in colname: + for spec_name, spec_id in species_mapping.items(): + if colname.replace("[", "").replace("]", "") == spec_id: + data = result[colname] + outputs[spec_name] = data.tolist() + return outputs + else: + raise Exception('Tellurium: Could not generate results.') + except: + error_message = handle_sbml_exception() + logger.error(error_message) + return {"error": error_message} + + +def run_sbml_copasi(sbml_fp: str, start: int, dur: int, steps: int) -> Dict[str, Union[List[float], str]]: + try: + t = np.linspace(start, dur, steps + 1) + model = load_model(sbml_fp) + specs = get_species(model=model).index.tolist() + for spec in specs: + if spec == "EmptySet" or "EmptySet" in spec: + specs.remove(spec) + tc = run_time_course(model=model, update_model=True, values=t) + data = {spec: tc[spec].values.tolist() for spec in specs} + return data + except: + error_message = handle_sbml_exception() + logger.error(error_message) + return {"error": error_message} + + +def run_sbml_amici(sbml_fp: str, start: int, dur: int, steps: int) -> Dict[str, Union[List[float], str]]: + try: + sbml_reader = libsbml.SBMLReader() + sbml_doc = sbml_reader.readSBML(sbml_fp) + sbml_model_object = sbml_doc.getModel() + sbml_importer = SbmlImporter(sbml_fp) + model_id = sbml_fp.split('/')[-1].replace('.xml', '') + model_output_dir = mkdtemp() + sbml_importer.sbml2amici( + model_id, + model_output_dir, + verbose=logging.INFO, + observables=None, + sigmas=None, + constant_parameters=None + ) + # model_output_dir = model_id # mkdtemp() + model_module = import_model_module(model_id, model_output_dir) + amici_model_object: Model = model_module.getModel() + floating_species_list = list(amici_model_object.getStateIds()) + floating_species_initial = list(amici_model_object.getInitialStates()) + sbml_species_ids = [spec.getName() for spec in sbml_model_object.getListOfSpecies()] + t = np.linspace(start, dur, steps + 1) + amici_model_object.setTimepoints(t) + initial_state = dict(zip(floating_species_list, floating_species_initial)) + set_values = [] + for species_id, value in initial_state.items(): + set_values.append(value) + amici_model_object.setInitialStates(set_values) + sbml_species_mapping = get_sbml_species_mapping(sbml_fp) + method = amici_model_object.getSolver() + result_data = runAmiciSimulation(solver=method, model=amici_model_object) + results = {} + floating_results = dict(zip( + sbml_species_ids, + list(map( + lambda x: result_data.by_id(x), + floating_species_list + )) + )) + results = floating_results + return { + key: val.tolist() if isinstance(val, np.ndarray) else val + for key, val in results.items() + } + except: + error_message = handle_sbml_exception() + logger.error(error_message) + return {"error": error_message} + + +# TODO: add vcell and masspy here +SBML_EXECUTORS = dict(zip( + [data[0] for data in COMPATIBLE_UTC_SIMULATORS], + [run_sbml_amici, run_sbml_copasi, run_sbml_pysces, run_sbml_tellurium] +)) + + +# -- formatted observables data -- # + +def sbml_output_stack(spec_name: str, output): + # 2. in output_stack: return {simname: output} + stack = {} + for simulator_name in output.keys(): + spec_data = output[simulator_name].get(spec_name) + if isinstance(spec_data, str): + data = None + else: + data = spec_data + + stack[simulator_name] = data + + return stack + + +def get_output_stack(spec_name: str, outputs): + return sbml_output_stack(spec_name=spec_name, output=outputs) + + +def _get_report_output_stack(outputs: dict, spec_id: str): + output_stack = [] + for sim_name in outputs.keys(): + sim_data = outputs[sim_name]['data'] + for data_index, data in enumerate(sim_data): + data_id = data['dataset_label'] + if data_id == spec_id: + # print(spec_id, data_id) + output_stack.append(sim_data[data_index]['data']) + else: + pass + return np.stack(output_stack) + + +def _generate_biosimulator_utc_outputs(omex_fp: str, output_root_dir: str, simulators: List[str] = None, alg_policy="same_framework") -> Dict: + """Generate the outputs of the standard UTC simulators Copasi, Tellurium, and Amici from the + biosimulators interface (exec_sedml_docs_in_combine_archive). + """ + make_dir(output_root_dir) + + output_data = {} + sims = simulators or ['amici', 'copasi', 'tellurium'] + sim_config = Config( + LOG=False, + ALGORITHM_SUBSTITUTION_POLICY=AlgorithmSubstitutionPolicy[alg_policy.upper()], + VERBOSE=False) + for sim in sims: + sim_output_dir = os.path.join(output_root_dir, f'{sim}_outputs') + make_dir(sim_output_dir) + try: + module = import_module(name=f'biosimulators_{sim}.core') + exec_func = getattr(module, 'exec_sedml_docs_in_combine_archive') + sim_output_dir = os.path.join(output_root_dir, f'{sim}_outputs') + if not os.path.exists(sim_output_dir): + os.mkdir(sim_output_dir) + # execute simulator-specific simulation + exec_func(archive_filename=omex_fp, out_dir=sim_output_dir, config=sim_config) + report_path = os.path.join(sim_output_dir, 'reports.h5') + + sim_data = read_report_outputs(report_path) + data = sim_data.to_dict() if isinstance(sim_data, BiosimulationsRunOutputData) else sim_data + output_data[sim] = data + except Exception as e: + import traceback + tb_str = traceback.format_exc() + error_message = ( + f"An unexpected error occurred while processing your request:\n" + f"Error Type: {type(e).__name__}\n" + f"Error Details: {str(e)}\n" + f"Traceback:\n{tb_str}" + ) + output_data[sim] = error_message + + return output_data + + +def generate_biosimulator_utc_outputs(omex_fp: str, output_root_dir: str, simulators: list[str] = None, alg_policy="same_framework") -> dict: + """Generate the outputs of the standard UTC simulators Copasi, Tellurium, and Amici from the + biosimulators interface (exec_sedml_docs_in_combine_archive). + """ + make_dir(output_root_dir) + + output_data = {} + sims = simulators or ['amici', 'copasi', 'tellurium'] # , 'pysces'] + sim_config = Config( + LOG=False, + ALGORITHM_SUBSTITUTION_POLICY=AlgorithmSubstitutionPolicy[alg_policy.upper()], + VERBOSE=False) + for sim in sims: + sim_output_dir = os.path.join(output_root_dir, f'{sim}_outputs') + make_dir(sim_output_dir) + try: + module = import_module(name=f'biosimulators_{sim}.core') + exec_func = getattr(module, 'exec_sedml_docs_in_combine_archive') + sim_output_dir = os.path.join(output_root_dir, f'{sim}_outputs') + if not os.path.exists(sim_output_dir): + os.mkdir(sim_output_dir) + + # execute simulator-specific simulation + exec_func( + archive_filename=omex_fp, + out_dir=sim_output_dir, + config=sim_config if not sim == "pysces" else None + ) + report_path = os.path.join(sim_output_dir, 'reports.h5') + + sim_data = read_h5_reports(report_path) + data = sim_data.to_dict() if isinstance(sim_data, BiosimulationsRunOutputData) else sim_data + output_data[sim] = data + except: + import traceback + tb_str = traceback.format_exc() + error_message = ( + f"Traceback:\n{tb_str}" + ) + output_data[sim] = {'error': error_message} + + return output_data + + +def generate_sbml_utc_outputs(sbml_fp: str, start: int, dur: int, steps: int, simulators: list[str] = None, truth: str = None) -> dict: + # TODO: add VCELL and pysces here + output = {} + sbml_species_ids = list(get_sbml_species_mapping(sbml_fp).keys()) + simulators = simulators or ['amici', 'copasi', 'tellurium', 'pysces'] + all_output_ids = [] + for simulator in simulators: + results = {} + simulator = simulator.lower() + simulation_executor = SBML_EXECUTORS[simulator] + sim_result = simulation_executor(sbml_fp=sbml_fp, start=start, dur=dur, steps=steps) + + # case: simulation execution was successful + if "error" not in sim_result.keys(): + # add to all shared names + all_output_ids.append(list(sim_result.keys())) + + # iterate over sbml_species_ids to index output data + for species_id in sbml_species_ids: + if species_id in sim_result.keys(): + output_vals = sim_result[species_id] + if isinstance(output_vals, np.ndarray): + output_vals = output_vals.tolist() + results[species_id] = output_vals + else: + # case: simulation had an error + results = sim_result + + # set the simulator output + output[simulator] = results + + # get the commonly shared output ids + final_output = {} + shared_output_ids = min(all_output_ids) + for simulator_name in output.keys(): + sim_data = {} + for spec_id in output[simulator_name].keys(): + if spec_id in shared_output_ids: + sim_data[spec_id] = output[simulator_name][spec_id] + elif spec_id == "error": + sim_data["error"] = output[simulator_name][spec_id] + + final_output[simulator_name] = sim_data + + # handle expected outputs + if truth is not None: + final_output['ground_truth'] = {} + report_results = read_report_outputs(truth) + report_data = report_results.to_dict()['data'] if isinstance(report_results, BiosimulationsRunOutputData) else {} + for datum in report_data: + spec_name = datum['dataset_label'] + if not spec_name.lower() == 'time': + spec_data = datum['data'] + final_output['ground_truth'][spec_name] = spec_data + + return final_output + + +# -- process-bigraph implementations -- # + +class MongoDatabaseEmitter(Emitter): + client_dict: Dict[int, MongoClient] = {} + config_schema = { + 'connection_uri': 'string', + 'experiment_id': 'maybe[string]', + 'emit_limit': { + '_type': 'integer', + '_default': 4000000 + }, + 'database': 'maybe[string]' + } + + @classmethod + def create_indexes(cls, table: Any, columns: List[Any]) -> None: + """Create the listed column indexes for the given DB table.""" + for column in columns: + table.create_index(column) + + def __init__(self, config, core) -> None: + """Config may have 'host' and 'database' items. The config passed is expected to be: + + {'experiment_id':, + 'emit_limit':, + 'embed_path':} + + TODO: Automate this process for the user in builder + """ + super().__init__(config) + self.core = core + self.experiment_id = self.config.get('experiment_id', str(uuid.uuid4())) + # In the worst case, `breakdown_data` can underestimate the size of + # data by a factor of 4: len(str(0)) == 1 but 0 is a 4-byte int. + # Use 4 MB as the breakdown limit to stay under MongoDB's 16 MB limit. + self.emit_limit = self.config['emit_limit'] + + # create new MongoClient per OS process + curr_pid = os.getpid() + if curr_pid not in MongoDatabaseEmitter.client_dict: + MongoDatabaseEmitter.client_dict[curr_pid] = MongoClient( + config['connection_uri']) + self.client: MongoClient = MongoDatabaseEmitter.client_dict[curr_pid] + + # extract objects from current mongo client instance + self.db: Database = getattr(self.client, self.config.get('database', 'simulations')) + self.history_collection: Collection = getattr(self.db, 'history') + self.configuration: Collection = getattr(self.db, 'configuration') + + # create column indexes for the given collection objects + self.create_indexes(self.history_collection, HISTORY_INDEXES) + self.create_indexes(self.configuration, CONFIGURATION_INDEXES) + + self.fallback_serializer = make_fallback_serializer_function(self.core) + + def query(self, query): + return self.history_collection.find_one(query) + + def history(self): + return [v for v in self.history_collection.find()] + + def flush_history(self): + for v in self.history(): + self.history_collection.delete_one(v) + + def update(self, inputs): + self.history_collection.insert_one(inputs) + return {} + + +# -- simulators -- # + +class SmoldynStep(Step): + config_schema = { + 'model_filepath': 'string', + 'animate': { + '_type': 'boolean', + '_default': False + }, + 'duration': 'maybe[integer]', + 'dt': 'maybe[float]', + 'initial_species_counts': 'maybe[tree[float]]', + 'initial_mol_position': 'maybe[list[float]]', # of particles/molecules + 'initial_mol_state': 'maybe[integer]', + + # TODO: Add a more nuanced way to describe and configure dynamic difcs given species interaction patterns + } + + def __init__(self, config, core): + """A new instance of `SmoldynProcess` based on the `config` that is passed. The schema for the config to be passed in + this object's constructor is as follows: + + config_schema = { + 'model_filepath': 'string', <-- analogous to python `str` + 'animate': 'bool' <-- of type `bigraph_schema.base_types.bool` + + # TODO: It would be nice to have classes associated with this. + """ + super().__init__(config=config, core=core) + + # specify the model fp for clarity + self.model_filepath = self.config.get('model_filepath') + + # enforce model filepath passing + if not self.model_filepath: + raise ValueError( + ''' + The Process configuration requires a Smoldyn model filepath to be passed. + Please specify a 'model_filepath' in your instance configuration. + ''' + ) + + # initialize the simulator from a Smoldyn MinE.txt file. + self.simulation: Simulation = Simulation.fromFile(self.model_filepath) + + # set default starting position of molecules/particles (assume all) + self.initial_mol_position = self.config.get('initial_mol_position', [0.0, 0.0, 0.0]) + self.initial_mol_state = self.config.get('initial_mol_state', 0) + + # get a list of the simulation species + species_count = self.simulation.count()['species'] + counts = self.config.get('initial_species_counts') + self.initial_species_counts = counts + self.species_names: List[str] = [] + for index in range(species_count): + species_name = self.simulation.getSpeciesName(index) + if 'empty' not in species_name.lower(): + self.species_names.append(species_name) + + self.initial_species_state = {} + self.initial_mol_state = {} + initial_mol_counts = {spec_name: self.simulation.getMoleculeCount(spec_name, MolecState.all) for spec_name in self.species_names} + for species_name, count in initial_mol_counts.items(): + self.initial_species_state[species_name] = count + for _ in range(count): + self.initial_mol_state[str(uuid4())] = { + 'coordinates': self.initial_mol_position, + 'species_id': species_name, + 'state': self.initial_mol_state + } + + # sort for logistical mapping to species names (i.e: ['a', 'b', c'] == ['0', '1', '2'] + self.species_names.sort() + + # make species counts of molecules dataset for output + self.simulation.addOutputData('species_counts') + # write molcounts to counts dataset at every timestep (shape=(n_timesteps, 1+n_species <-- one for time)): [timestep, countSpec1, countSpec2, ...] + self.simulation.addCommand(cmd='molcount species_counts', cmd_type='E') + + # make molecules dataset (molecule information) for output + self.simulation.addOutputData('molecules') + # write coords to dataset at every timestep (shape=(n_output_molecules, 7)): seven being [timestep, smol_id(species), mol_state, x, y, z, mol_serial_num] + self.simulation.addCommand(cmd='listmols molecules', cmd_type='E') + + # initialize the molecule ids based on the species names. We need this value to properly emit the schema, which expects a single value from this to be a str(int) + # the format for molecule_ids is expected to be: 'speciesId_moleculeNumber' + self.molecule_ids = list(self.initial_mol_state.keys()) + + # get the simulation boundaries, which in the case of Smoldyn denote the physical boundaries + # TODO: add a verification method to ensure that the boundaries do not change on the next step... + self.boundaries: Dict[str, List[float]] = dict(zip(['low', 'high'], self.simulation.getBoundaries())) + + # create a re-usable counts and molecules type to be used by both inputs and outputs + self.counts_type = { + species_name: 'integer' + for species_name in self.species_names + } + + self.output_port_schema = { + 'species_counts': { + species_name: 'integer' + for species_name in self.species_names + }, + 'molecules': 'tree[string]', # self.molecules_type + 'results_file': 'string' + } + + # set time if applicable + self.duration = self.config.get('duration') + self.dt = self.config.get('dt', self.simulation.dt) + + # set graphics (defaults to False) + if self.config['animate']: + self.simulation.addGraphics('opengl_better') + + self._specs = [None for _ in self.species_names] + self._vals = dict(zip(self.species_names, [[] for _ in self.species_names])) + + # def initial_state(self): + # return { + # 'species_counts': self.initial_species_state, + # 'molecules': self.initial_mol_state + # } + + def inputs(self): + # schema = self.output_port_schema.copy() + # schema.pop('results_file') + # return schema + return {} + + def outputs(self): + return self.output_port_schema + + def update(self, inputs) -> Dict: + # reset the molecules, distribute the mols according to self.boundarieså + # for name in self.species_names: + # self.set_uniform( + # species_name=name, + # count=inputs['species_counts'][name], + # kill_mol=False + # ) + + # run the simulation for a given interval if specified, otherwise use builtin time + if self.duration is not None: + self.simulation.run(stop=self.duration, dt=self.simulation.dt, overwrite=True) + else: + self.simulation.runSim() + + # get the counts data, clear the buffer + counts_data = self.simulation.getOutputData('species_counts') + + # get the final counts for the update + final_count = counts_data[-1] + # remove the timestep from the list + final_count.pop(0) + + # get the data based on the commands added in the constructor, clear the buffer + molecules_data = self.simulation.getOutputData('molecules') + + # create an empty simulation state mirroring that which is specified in the schema + simulation_state = { + 'species_counts': {}, + 'molecules': {} + } + + # get and populate the species counts + for index, name in enumerate(self.species_names): + simulation_state['species_counts'][name] = counts_data[index] + # input_counts = simulatio['species_counts'][name] + # simulation_state['species_counts'][name] = int(final_count[index]) - input_counts + + # clear the list of known molecule ids and update the list of known molecule ids (convert to an intstring) + # self.molecule_ids.clear() + # for molecule in molecules_data: + # self.molecule_ids.append(str(uuid4())) + + # get and populate the output molecules + for i, single_mol_data in enumerate(molecules_data): + mol_species_index = int(single_mol_data[1]) - 1 + mol_id = str(uuid4()) + simulation_state['molecules'][mol_id] = { + 'coordinates': single_mol_data[3:6], + 'species_id': self.species_names[mol_species_index], + 'state': str(int(single_mol_data[2])) + } + + # mols = [] + # for index, mol_id in enumerate(self.molecule_ids): + # single_molecule_data = molecules_data[index] + # single_molecule_species_index = int(single_molecule_data[1]) - 1 + # mols.append(single_molecule_species_index) + # simulation_state['molecules'][mol_id] = { + # 'coordinates': single_molecule_data[3:6], + # 'species_id': self.species_names[single_molecule_species_index], + # 'state': str(int(single_molecule_data[2])) + # } + + # TODO -- post processing to get effective rates + + # TODO: adjust this for a more dynamic dir struct + model_dir = os.path.dirname(self.model_filepath) + for f in os.listdir(model_dir): + if f.endswith('.txt') and 'out' in f: + simulation_state['results_file'] = os.path.join(model_dir, f) + + return simulation_state + + def set_uniform( + self, + species_name: str, + count: int, + kill_mol: bool = True + ) -> None: + """Add a distribution of molecules to the solution in + the simulation memory given a higher and lower bound x,y coordinate. Smoldyn assumes + a global boundary versus individual species boundaries. Kills the molecule before dist if true. + + TODO: If pymunk expands the species compartment, account for + expanding `highpos` and `lowpos`. This method should be used within the body/logic of + the `update` class method. + + Args: + species_name:`str`: name of the given molecule. + count:`int`: number of molecules of the given `species_name` to add. + kill_mol:`bool`: kills the molecule based on the `name` argument, which effectively + removes the molecule from simulation memory. + """ + # kill the mol, effectively resetting it + if kill_mol: + self.simulation.runCommand(f'killmol {species_name}') + + # TODO: eventually allow for an expanding boundary ie in the configuration parameters (pymunk?), which is defies the methodology of smoldyn + + # redistribute the molecule according to the bounds + self.simulation.addSolutionMolecules( + species=species_name, + number=count, + highpos=self.boundaries['high'], + lowpos=self.boundaries['low'] + ) + + +class SimulariumSmoldynStep(Step): + """ + agent_data should have the following structure: + + {species_name(type): + {display_type: DISPLAY_TYPE., + (mass: `float` AND density: `float`) OR (radius: `float`) + + """ + config_schema = { + 'output_dest': 'string', + 'box_size': 'float', # as per simulariumio + 'spatial_units': { + '_default': 'nm', + '_type': 'string' + }, + 'temporal_units': { + '_default': 'ns', + '_type': 'string' + }, + 'translate_output': { + '_default': True, + '_type': 'boolean' + }, + 'write_json': { + '_default': True, + '_type': 'boolean' + }, + 'run_validation': { + '_default': True, + '_type': 'boolean' + }, + 'file_save_name': 'maybe[string]', + 'translation_magnitude': 'maybe[float]', + 'meta_data': 'maybe[tree[string]]', + 'agent_display_parameters': 'maybe[tree[string]]' # as per biosim simularium + } + + def __init__(self, config, core): + super().__init__(config=config, core=core) + + # io params + self.output_dest = self.config['output_dest'] + self.write_json = self.config['write_json'] + self.filename = self.config.get('file_save_name') + + # display params + self.box_size = self.config['box_size'] + self.translate_output = self.config['translate_output'] + self.translation_magnitude = self.config.get('translation_magnitude') + self.agent_display_parameters = self.config.get('agent_display_parameters', {}) + + # units params + self.spatial_units = self.config['spatial_units'] + self.temporal_units = self.config['temporal_units'] + + # info params + self.meta_data = self.config.get('meta_data') + self.run_validation = self.config['run_validation'] + + def inputs(self): + return {'results_file': 'string', 'species_names': 'list[string]'} + + def outputs(self): + return {'simularium_file': 'string'} + + def update(self, inputs): + # get job params + in_file = inputs['results_file'] + file_data = InputFileData(in_file) + + # get species data for display data + species_names = inputs['species_names'] + + # generate simulariumio Smoldyn Data TODO: should display data be gen for each species type or n number of instances of that type? + display_data = self._generate_display_data(species_names) + io_data = SmoldynData( + smoldyn_file=file_data, + spatial_units=UnitData(self.spatial_units), + time_units=UnitData(self.temporal_units), + display_data=display_data, + meta_data=self.meta_data, + center=True + ) + + # translate reflections if needed + if self.translate_output: + io_data = translate_data_object(data=io_data, box_size=self.box_size, translation_magnitude=self.translation_magnitude) + # write data to simularium file + if self.filename is None: + self.filename = in_file.split('/')[-1].replace('.', '') + "-simulation" + + save_path = os.path.join(self.output_dest, self.filename) + write_simularium_file(data=io_data, simularium_fp=save_path, json=self.write_json, validate=self.run_validation) + result = {'simularium_file': save_path + '.simularium'} + + return result + + def _generate_display_data(self, species_names) -> Dict | None: + # user is specifying display data for agents + if isinstance(self.agent_display_parameters, dict) and len(self.agent_display_parameters.keys()) > 0: + display_data = {} + for name in species_names: + display_params = self.agent_display_parameters[name] + + # handle agent radius + radius_param = display_params.get('radius') + + # user has passed a mass and density for a given agent + if radius_param is None: + radius_param = calculate_agent_radius(m=display_params['mass'], rho=display_params['density']) + + # make kwargs for display data + display_data_kwargs = { + 'name': name, + 'display_type': DISPLAY_TYPE[display_params['display_type']], + 'radius': radius_param + } + + # check if self.agent_params as been passed as a mapping of species_name: {species_mass: , species_shape: } + display_data[name] = DisplayData(**display_data_kwargs) + + return display_data + + return None + + +# -- Output data generators: -- # + +class OutputGenerator(Step): + config_schema = { + 'input_file': 'string', + 'context': 'string', + } + + def __init__(self, config, core): + super().__init__(config, core) + self.input_file = self.config['input_file'] + self.context = self.config.get('context') + if self.context is None: + raise ValueError("context (i.e., simulator name) must be specified in this processes' config.") + + @abstractmethod + def generate(self, parameters: Optional[Dict[str, Any]] = None): + """Abstract method for generating output data upon which to base analysis from based on its origin. + + This can be used for logic of any scope. + NOTE: args and kwargs are not defined in this function, but rather should be defined by the + inheriting class' constructor: i,e; start_time, etc. + + Kwargs relate only to the given simulator api you are working with. + """ + pass + + def initial_state(self): + # base class method + return { + 'output_data': {} + } + + def inputs(self): + return { + 'parameters': 'tree[any]' + } + + def outputs(self): + return { + 'output_data': 'tree[any]' + } + + def update(self, state): + parameters = state.get('parameters') if isinstance(state, dict) else {} + data = self.generate(parameters) + return {'output_data': data} + + +class TimeCourseOutputGenerator(OutputGenerator): + # NOTE: we include defaults here as opposed to constructor for the purpose of deliberate declaration within .json state representation. + config_schema = { + # 'input_file': 'string', + # 'context': 'string', + 'start_time': { + '_type': 'integer', + '_default': 0 + }, + 'end_time': { + '_type': 'integer', + '_default': 10 + }, + 'num_steps': { + '_type': 'integer', + '_default': 100 + }, + } + + def __init__(self, config, core): + super().__init__(config, core) + if not self.input_file.endswith('.xml'): + raise ValueError('Input file must be a valid SBML (XML) file') + + self.start_time = self.config.get('start_time') + self.end_time = self.config.get('end_time') + self.num_steps = self.config.get('num_steps') + self.species_mapping = get_sbml_species_mapping(self.input_file) + + def initial_state(self): + # TODO: implement this + pass + + def generate(self, parameters: Optional[Dict[str, Any]] = None): + # TODO: add kwargs (initial state specs) here + executor = SBML_EXECUTORS[self.context] + data = executor(self.input_file, self.start_time, self.end_time, self.num_steps) + + return data + + +# -- process implementation utils -- # + +def generate_simularium_file( + input_fp: str, + dest_dir: str, + box_size: float, + translate_output: bool = True, + write_json: bool = True, + run_validation: bool = True, + agent_parameters: Dict[str, Dict[str, Any]] = None +) -> Dict[str, str]: + species_names = [] + float_pattern = re.compile(r'^-?\d+(\.\d+)?$') + with open(input_fp, 'r') as f: + output = [l.strip() for l in f.readlines()] + for line in output: + datum = line.split(' ')[0] + # Check if the datum is not a float string + if not float_pattern.match(datum): + species_names.append(datum) + f.close() + species_names = list(set(species_names)) + + simularium = SimulariumSmoldynStep(config={ + 'output_dest': dest_dir, + 'box_size': box_size, + 'translate_output': translate_output, + 'file_save_name': None, + 'write_json': write_json, + 'run_validation': run_validation, + 'agent_display_parameters': agent_parameters, + }) + + return simularium.update(inputs={ + 'results_file': input_fp, + 'species_names': species_names + }) + + +def make_fallback_serializer_function(process_registry) -> Callable: + """Creates a fallback function that is called by orjson on data of + types that are not natively supported. Define and register instances of + :py:class:`vivarium.core.registry.Serializer()` with serialization + routines for the types in question.""" + + def default(obj: Any) -> Any: + # Try to lookup by exclusive type + serializer = process_registry.access(str(type(obj))) + if not serializer: + compatible_serializers = [] + for serializer_name in process_registry.list(): + test_serializer = process_registry.access(serializer_name) + # Subclasses with registered serializers will be caught here + if isinstance(obj, test_serializer.python_type): + compatible_serializers.append(test_serializer) + if len(compatible_serializers) > 1: + raise TypeError( + f'Multiple serializers ({compatible_serializers}) found ' + f'for {obj} of type {type(obj)}') + if not compatible_serializers: + raise TypeError( + f'No serializer found for {obj} of type {type(obj)}') + serializer = compatible_serializers[0] + if not isinstance(obj, Process): + # We don't warn for processes because since their types + # based on their subclasses, it's not possible to avoid + # searching through the serializers. + warn( + f'Searched through serializers to find {serializer} ' + f'for data of type {type(obj)}. This is ' + f'inefficient.') + return serializer.serialize(obj) + return default + diff --git a/worker/bigraph_processes.py b/worker/datagen_src/data_model.pyx similarity index 100% rename from worker/bigraph_processes.py rename to worker/datagen_src/data_model.pyx diff --git a/worker/datagen_src/shared_worker.pyx b/worker/datagen_src/shared_worker.pyx new file mode 100644 index 000000000..e69de29bb diff --git a/worker/environment.worker.yml b/worker/environment.worker.yml index cc31a81df..c60378d78 100644 --- a/worker/environment.worker.yml +++ b/worker/environment.worker.yml @@ -8,9 +8,10 @@ channels: dependencies: - pip - python=3.10 + - cython - pysces - # - pymem3dg - readdy + - # - pymem3dg - pip: - h5py - process-bigraph diff --git a/worker/service/__init__.py b/worker/service/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/worker/service/bigraph_processes.py b/worker/service/bigraph_processes.py new file mode 100644 index 000000000..e69de29bb diff --git a/worker/bigraph_steps b/worker/service/bigraph_steps similarity index 100% rename from worker/bigraph_steps rename to worker/service/bigraph_steps diff --git a/worker/compatible.py b/worker/service/compatible.py similarity index 100% rename from worker/compatible.py rename to worker/service/compatible.py diff --git a/worker/data_model.py b/worker/service/data_model.py similarity index 100% rename from worker/data_model.py rename to worker/service/data_model.py diff --git a/worker/shared_worker.py b/worker/service/db_connector.py similarity index 72% rename from worker/shared_worker.py rename to worker/service/db_connector.py index 0c08d30f8..f3d52897a 100644 --- a/worker/shared_worker.py +++ b/worker/service/db_connector.py @@ -1,31 +1,12 @@ -import os -import uuid from abc import abstractmethod, ABC -from asyncio import sleep -from dataclasses import dataclass, asdict from datetime import datetime -from enum import Enum from typing import * -from dotenv import load_dotenv from pymongo import MongoClient from pymongo.collection import Collection from pymongo.database import Database -# from biosimulators_processes import CORE -# -- globally-shared content-- # - - -load_dotenv('../assets/dev/config/.env_dev') - -DB_TYPE = "mongo" # ie: postgres, etc -DB_NAME = "service_requests" -BUCKET_NAME = os.getenv("BUCKET_NAME") - - -# -- shared functions -- # - def check_jobs(coll): from main import db_connector as conn from job import Supervisor @@ -43,62 +24,6 @@ def check_jobs(coll): return not_complete -def unique_id(): - return str(uuid.uuid4()) - - -def handle_exception(context: str) -> str: - import traceback - from pprint import pformat - tb_str = traceback.format_exc() - error_message = pformat(f"{context} error:\n{tb_str}") - - return error_message - - -async def load_arrows(timer): - check_timer = timer - ell = "" - bars = "" - msg = "|" - n_ellipses = timer - log_interval = check_timer / n_ellipses - for n in range(n_ellipses): - single_interval = log_interval / 3 - await sleep(single_interval) - bars += "=" - disp = bars + ">" - if n == n_ellipses - 1: - disp += "|" - print(disp) - - -# -- base python dataclass with to_dict() method -- # - -@dataclass -class BaseClass: - """Base Python Dataclass multipurpose class with custom app configuration.""" - def to_dict(self): - return asdict(self) - - -# -- jobs -- # - -class JobStatus(Enum): - PENDING = "PENDING" - IN_PROGRESS = "IN_PROGRESS" - COMPLETED = "COMPLETED" - FAILED = "FAILED" - - -class DatabaseCollections(Enum): - PENDING_JOBS = "PENDING_JOBS".lower() - IN_PROGRESS_JOBS = "IN_PROGRESS_JOBS".lower() - COMPLETED_JOBS = "COMPLETED_JOBS".lower() - - -# -- database connectors: currently exclusive to mongodb. TODO: create a dbconnector for a relational db -- # - class DatabaseConnector(ABC): """Abstract class that is both serializable and interacts with the database (of any type). """ def __init__(self, connection_uri: str, database_id: str, connector_id: str): @@ -217,4 +142,4 @@ def _parse_enum_input(self, _input: Any) -> str: class MultipleConnectorError(Exception): def __init__(self, message: str): - self.message = message + self.message = message \ No newline at end of file diff --git a/worker/insert_test_data.py b/worker/service/insert_test_data.py similarity index 100% rename from worker/insert_test_data.py rename to worker/service/insert_test_data.py diff --git a/worker/io_worker.py b/worker/service/io_worker.py similarity index 100% rename from worker/io_worker.py rename to worker/service/io_worker.py diff --git a/worker/job.py b/worker/service/job.py similarity index 98% rename from worker/job.py rename to worker/service/job.py index 8c6f2a6b5..31a37100a 100644 --- a/worker/job.py +++ b/worker/service/job.py @@ -1,6 +1,7 @@ +import sys +import os import logging import math -import os import tempfile from abc import ABC, abstractmethod from asyncio import sleep @@ -15,17 +16,21 @@ from shared_worker import MongoDbConnector, JobStatus, DatabaseCollections, unique_id, BUCKET_NAME, handle_exception from log_config import setup_logging from io_worker import get_sbml_species_mapping, read_h5_reports, download_file, format_smoldyn_configuration, write_uploaded_file -from data_generator import ( - generate_time_course_data, - generate_composition_result_data, - run_smoldyn, - run_readdy, - handle_sbml_exception, - generate_biosimulator_utc_outputs, - generate_sbml_utc_outputs, - get_output_stack, - sbml_output_stack -) + + +# TODO: fully implement and import this data generator content from build and cython: +sys.path.append(os.path.abspath('../datagen_build')) +# from data_generator import ( +# generate_time_course_data, +# generate_composition_result_data, +# run_smoldyn, +# run_readdy, +# handle_sbml_exception, +# generate_biosimulator_utc_outputs, +# generate_sbml_utc_outputs, +# get_output_stack, +# sbml_output_stack +# ) # TODO: Create general Worker process implementation! diff --git a/worker/log_config.py b/worker/service/log_config.py similarity index 100% rename from worker/log_config.py rename to worker/service/log_config.py diff --git a/worker/main.py b/worker/service/main.py similarity index 100% rename from worker/main.py rename to worker/service/main.py diff --git a/worker/service/shared_worker.py b/worker/service/shared_worker.py new file mode 100644 index 000000000..ebdb719c1 --- /dev/null +++ b/worker/service/shared_worker.py @@ -0,0 +1,81 @@ +import os +import uuid +from asyncio import sleep +from dataclasses import dataclass, asdict +from enum import Enum + +from dotenv import load_dotenv +# from biosimulators_processes import CORE + + +# -- globally-shared content-- # + + +load_dotenv('../assets/dev/config/.env_dev') + +DB_TYPE = "mongo" # ie: postgres, etc +DB_NAME = "service_requests" +BUCKET_NAME = os.getenv("BUCKET_NAME") + + +# -- shared functions -- # + + + + +def unique_id(): + return str(uuid.uuid4()) + + +def handle_exception(context: str) -> str: + import traceback + from pprint import pformat + tb_str = traceback.format_exc() + error_message = pformat(f"{context} error:\n{tb_str}") + + return error_message + + +async def load_arrows(timer): + check_timer = timer + ell = "" + bars = "" + msg = "|" + n_ellipses = timer + log_interval = check_timer / n_ellipses + for n in range(n_ellipses): + single_interval = log_interval / 3 + await sleep(single_interval) + bars += "=" + disp = bars + ">" + if n == n_ellipses - 1: + disp += "|" + print(disp) + + +# -- base python dataclass with to_dict() method -- # + +@dataclass +class BaseClass: + """Base Python Dataclass multipurpose class with custom app configuration.""" + def to_dict(self): + return asdict(self) + + +# -- jobs -- # + +class JobStatus(Enum): + PENDING = "PENDING" + IN_PROGRESS = "IN_PROGRESS" + COMPLETED = "COMPLETED" + FAILED = "FAILED" + + +class DatabaseCollections(Enum): + PENDING_JOBS = "PENDING_JOBS".lower() + IN_PROGRESS_JOBS = "IN_PROGRESS_JOBS".lower() + COMPLETED_JOBS = "COMPLETED_JOBS".lower() + + +# -- database connectors: currently exclusive to mongodb. TODO: create a dbconnector for a relational db -- # + diff --git a/worker/simularium_utils.py b/worker/service/simularium_utils.py similarity index 100% rename from worker/simularium_utils.py rename to worker/service/simularium_utils.py diff --git a/worker/test_new_simulators.py b/worker/service/test_new_simulators.py similarity index 100% rename from worker/test_new_simulators.py rename to worker/service/test_new_simulators.py diff --git a/worker/test_worker.py b/worker/service/test_worker.py similarity index 100% rename from worker/test_worker.py rename to worker/service/test_worker.py diff --git a/worker/setup.py b/worker/setup.py new file mode 100644 index 000000000..54b83fbc3 --- /dev/null +++ b/worker/setup.py @@ -0,0 +1,23 @@ +import os +from setuptools import setup, find_packages + +from Cython.Build import cythonize + + +SOURCE_DIR = "datagen_src" +BUILD_DIR = "datagen_build" + +os.makedirs(BUILD_DIR, exist_ok=True) + + +setup( + ext_modules=cythonize(f"{SOURCE_DIR}/*.pyx"), # Compile all .pyx files in src/ + options={ + "build": {"build_base": BUILD_DIR}, # Set custom build directory + "build_ext": {"build_lib": BUILD_DIR}, # Place .so files in build/ + }, + packages=["service"] +) + +# TODO: do this PRIOR to building the image: +# python setup.py build_ext