diff --git a/analysis-pipeline/Dockerfile b/analysis-pipeline/Dockerfile deleted file mode 100644 index 9935f4ae9..000000000 --- a/analysis-pipeline/Dockerfile +++ /dev/null @@ -1,83 +0,0 @@ -FROM ubuntu:jammy - -# Set environment variables -ENV DEBIAN_FRONTEND=noninteractive -ENV LANG=en_US.UTF-8 -ENV LANGUAGE=en_US:en -ENV LC_ALL=en_US.UTF-8 -ENV PATH="/root/.local/bin:/usr/local/bin:${PATH}" - -# Install system dependencies in a single layer -RUN apt-get update && apt-get upgrade -y && \ - apt-get install -y \ - python3 \ - python3-pip \ - curl \ - unzip \ - build-essential \ - nano \ - libssl-dev \ - libffi-dev \ - python3-dev \ - virtualenv \ - wget \ - gpg \ - git \ - software-properties-common \ - locales \ - apt-transport-https \ - ca-certificates \ - gnupg \ - lsb-release \ - && locale-gen en_US.UTF-8 \ - && rm -rf /var/lib/apt/lists/* - -# Install Docker -RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg && \ - echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null && \ - apt-get update && \ - apt-get install -y docker-ce docker-ce-cli containerd.io && \ - rm -rf /var/lib/apt/lists/* - -# Install Python 3.9 for conseq -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install -y \ - python3.9 \ - python3.9-distutils \ - python3.9-dev \ - && rm -rf /var/lib/apt/lists/* - -# Create virtual environment for conseq -RUN virtualenv --python=python3.9 /install/conseq - -# Install taigapy in conseq environment -RUN /install/conseq/bin/pip install --extra-index-url=https://us-central1-python.pkg.dev/cds-artifacts/public-python/simple/ taigapy - -# Install conseq -COPY conseq-2.0.2.tar.gz /tmp/ -RUN mkdir -p /tmp/conseq-install && \ - tar -xzf /tmp/conseq-2.0.2.tar.gz -C /tmp/conseq-install && \ - cd /tmp/conseq-install/conseq-2.0.2 && \ - /install/conseq/bin/pip install . && \ - ln -s /install/conseq/bin/conseq /usr/bin/conseq && \ - cp /install/conseq/lib/python3.9/site-packages/conseq/helper.py /helper.py && \ - rm -rf /tmp/conseq* - -# Create necessary directories -RUN mkdir -p /root/.taiga /work/data-prep-pipeline - -# Set up Poetry -RUN curl -sSL https://install.python-poetry.org | python3 - && \ - poetry --version - -WORKDIR /work/analysis-pipeline -COPY pyproject.toml poetry.lock ./ -RUN poetry install - -# Install gcloud -RUN apt-get update && apt-get install -y curl gnupg && \ - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add - && \ - echo "deb https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ - apt-get update && \ - apt-get install -y google-cloud-cli diff --git a/analysis-pipeline/README.md b/analysis-pipeline/README.md deleted file mode 100644 index c63d7f8fb..000000000 --- a/analysis-pipeline/README.md +++ /dev/null @@ -1,12 +0,0 @@ -This is the very first draft of predictability as part of conseq in the new analysis pipeline. - -You just need conseq installed since everything else that is required is installed inside the us.gcr.io/broad-achilles/daintree-sparkles:v4 image where predicability is run. - -Note that there is a model-config.yaml file which has the config of all the models. -Once conseq is installed, you can run `conseq run fit.conseq` to start. - -The `fit.conseq` works as follows: - -1. It first creates model input json files based on the `model-config.yaml` file. -2. Once the input json file is created, daintree is run to produce the output for predictability. There are 3 different files that are uploaded to taiga for each model, predictions.csv, ensemble.csv, feature_metadata.csv.Running daintree also creates a `output_config.json` file which has the input config as well as the taiga ids of the 3 uploaded files. -3. The `output_config.json` file is then combined into a single `combined_daintree_output_config.json` where the screen is the key and the value is the list of the output config for each model. diff --git a/analysis-pipeline/image-name b/analysis-pipeline/image-name deleted file mode 100644 index 3d668b57e..000000000 --- a/analysis-pipeline/image-name +++ /dev/null @@ -1 +0,0 @@ -DOCKER_IMAGE=us.gcr.io/broad-achilles/analysis-pipeline-run:v3 diff --git a/analysis-pipeline/jenkins-run-pipeline.sh b/analysis-pipeline/jenkins-run-pipeline.sh deleted file mode 100755 index d9bb670ee..000000000 --- a/analysis-pipeline/jenkins-run-pipeline.sh +++ /dev/null @@ -1,119 +0,0 @@ -#!/bin/bash -if [ "$1" == "" ]; then -# required: env name - echo "needs name of environment" - exit 1 -fi - -ENV_NAME="$1" -CONSEQ_FILE="predictability/run_${ENV_NAME}_analysis.conseq" - -if [ "$2" == "" ]; then -# required: job name - echo "needs name to use for job" - exit 1 -fi - -JOB_NAME="$2" - -if [ "$3" != "" ]; then -# optional: export path - EXPORT_PATH="$3" - echo "Using export path: $EXPORT_PATH" -else - # Default export path if not provided - EXPORT_PATH="gs://preprocessing-pipeline-outputs/analysis-pipeline/$ENV_NAME/export" - echo "Using default export path: $EXPORT_PATH" -fi - -# set DOCKER_IMAGE from pipeline-run-docker/image-name -SCRIPT_PATH=`dirname $0` -source "$SCRIPT_PATH/image-name" - -COMMIT_SHA=`git rev-parse HEAD` -if [ "${COMMIT_SHA}" == "" ]; then - COMMIT_SHA="unknown" -fi - -set -ex -GOOGLE_APPLICATION_CREDENTIALS=/etc/google/auth/application_default_credentials.json docker pull ${DOCKER_IMAGE} - -# Copy all logs. I'm copying this to a new directory because each time we run we gc the state directory and that -# causes old logs to be deleted which makes it harder to investigate what happened. -function backup_conseq_logs { - file_list=`mktemp` - if [ -e analysis-pipeline/state ] ; then - ( cd analysis-pipeline/state && \ - find . -name "std*.txt" > ${file_list} && \ - find . -name "*.sh" >> ${file_list} && \ - find . -name "*.log" >> ${file_list} ) - rsync -a analysis-pipeline/state predictability-logs --files-from=${file_list} - rm ${file_list} - fi -} - -# use /data2/depmap-pipeline-taiga as the taiga dir because -# different versions of taigapy seem to conflict in pickle format -if [ "$TAIGA_DIR" == "" ] ; then - TAIGA_DIR="/data2/depmap-pipeline-taiga" -fi - -if [ "$PIPELINE_RUNNER_CREDS_DIR" == "" ] ; then - PIPELINE_RUNNER_CREDS_DIR='/etc/depmap-pipeline-runner-creds' -fi - -if [ ! "${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas" -o ! "${PIPELINE_RUNNER_CREDS_DIR}/sparkles" -o ! "${PIPELINE_RUNNER_CREDS_DIR}/depmap-pipeline-runner.json" ] ; then - echo "Could not find required file" - exit 1 -fi - -function run_via_container { - COMMAND="$1" - docker run \ - --rm \ - -v "$PWD":/work \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/sparkles:/root/.sparkles-cache" \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/depmap-pipeline-runner.json":/etc/google_default_creds.json \ - -v "${TAIGA_DIR}:/root/.taiga" \ - -v /etc/google/auth/application_default_credentials.json:/etc/google/auth/application_default_credentials.json \ - -e GOOGLE_APPLICATION_CREDENTIALS=/etc/google/auth/application_default_credentials.json \ - -e HOST_WORKSPACE_PATH="$PWD" \ - -v /var/run/docker.sock:/var/run/docker.sock \ - -w /work/analysis-pipeline \ - --name "$JOB_NAME" \ - ${DOCKER_IMAGE} \ - bash -c "gcloud auth configure-docker us.gcr.io && $COMMAND" -} - -# backup logs before running GC -backup_conseq_logs - -if [ "$MANUALLY_RUN_CONSEQ" = "true" ]; then - echo "executing: conseq $CONSEQ_ARGS" - run_via_container "conseq -D is_dev=False $CONSEQ_ARGS" -else - # Clean up unused directories from past runs - run_via_container "conseq gc" - - # Kick off new run - set +e - run_via_container "conseq run --addlabel commitsha=${COMMIT_SHA} --no-reattach --maxfail 20 --remove-unknown-artifacts -D sparkles_path=/install/sparkles/bin/sparkles -D is_dev=False $CONSEQ_FILE $CONSEQ_ARGS" - RUN_EXIT_STATUS=$? - set -e - - # Generate export - # run_via_container "conseq export $CONSEQ_FILE $EXPORT_PATH" - - # Generate report - # run_via_container "conseq report html" - - # copy the latest logs - backup_conseq_logs -fi - -echo "Pipeline run complete" - -# docker container is writing files as root. Fix up permissions after job completes -sudo chown -R ubuntu . - -exit $RUN_EXIT_STATUS diff --git a/analysis-pipeline/poetry.lock b/analysis-pipeline/poetry.lock deleted file mode 100644 index 5f42056b1..000000000 --- a/analysis-pipeline/poetry.lock +++ /dev/null @@ -1,1279 +0,0 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. - -[[package]] -name = "black" -version = "24.10.0" -description = "The uncompromising code formatter." -optional = false -python-versions = ">=3.9" -files = [ - {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"}, - {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"}, - {file = "black-24.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:649fff99a20bd06c6f727d2a27f401331dc0cc861fb69cde910fe95b01b5928f"}, - {file = "black-24.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe4d6476887de70546212c99ac9bd803d90b42fc4767f058a0baa895013fbb3e"}, - {file = "black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad"}, - {file = "black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50"}, - {file = "black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392"}, - {file = "black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175"}, - {file = "black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3"}, - {file = "black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65"}, - {file = "black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f"}, - {file = "black-24.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:30d2c30dc5139211dda799758559d1b049f7f14c580c409d6ad925b74a4208a8"}, - {file = "black-24.10.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cbacacb19e922a1d75ef2b6ccaefcd6e93a2c05ede32f06a21386a04cedb981"}, - {file = "black-24.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1f93102e0c5bb3907451063e08b9876dbeac810e7da5a8bfb7aeb5a9ef89066b"}, - {file = "black-24.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ddacb691cdcdf77b96f549cf9591701d8db36b2f19519373d60d31746068dbf2"}, - {file = "black-24.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:680359d932801c76d2e9c9068d05c6b107f2584b2a5b88831c83962eb9984c1b"}, - {file = "black-24.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:17374989640fbca88b6a448129cd1745c5eb8d9547b464f281b251dd00155ccd"}, - {file = "black-24.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:63f626344343083322233f175aaf372d326de8436f5928c042639a4afbbf1d3f"}, - {file = "black-24.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfa1d0cb6200857f1923b602f978386a3a2758a65b52e0950299ea014be6800"}, - {file = "black-24.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cd9c95431d94adc56600710f8813ee27eea544dd118d45896bb734e9d7a0dc7"}, - {file = "black-24.10.0-py3-none-any.whl", hash = "sha256:3bb2b7a1f7b685f85b11fed1ef10f8a9148bceb49853e47a294a3dd963c1dd7d"}, - {file = "black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875"}, -] - -[package.dependencies] -click = ">=8.0.0" -mypy-extensions = ">=0.4.3" -packaging = ">=22.0" -pathspec = ">=0.9.0" -platformdirs = ">=2" -tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} -typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} - -[package.extras] -colorama = ["colorama (>=0.4.3)"] -d = ["aiohttp (>=3.10)"] -jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] -uvloop = ["uvloop (>=0.15.2)"] - -[[package]] -name = "boto3" -version = "1.37.8" -description = "The AWS SDK for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "boto3-1.37.8-py3-none-any.whl", hash = "sha256:b9f506e08c9f54687d6c073ef1c550a24a62cc2d1e0bc7cda9f13112a38818bf"}, - {file = "boto3-1.37.8.tar.gz", hash = "sha256:9448f4a079189e19c3253cfdc5b8ef6dc51a3b82431e8347a51f4c1b2d9dab42"}, -] - -[package.dependencies] -botocore = ">=1.37.8,<1.38.0" -jmespath = ">=0.7.1,<2.0.0" -s3transfer = ">=0.11.0,<0.12.0" - -[package.extras] -crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] - -[[package]] -name = "botocore" -version = "1.37.8" -description = "Low-level, data-driven core of boto 3." -optional = false -python-versions = ">=3.8" -files = [ - {file = "botocore-1.37.8-py3-none-any.whl", hash = "sha256:a6c94f33de12f4b10b10684019e554c980469b8394c6d82448a738cbd8452cef"}, - {file = "botocore-1.37.8.tar.gz", hash = "sha256:b5825e08dd3e25642aa22a0d7d92bf81fef1ef857117e4155f923bbccf5aba63"}, -] - -[package.dependencies] -jmespath = ">=0.7.1,<2.0.0" -python-dateutil = ">=2.1,<3.0.0" -urllib3 = [ - {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, - {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, -] - -[package.extras] -crt = ["awscrt (==0.23.8)"] - -[[package]] -name = "cachetools" -version = "5.5.2" -description = "Extensible memoizing collections and decorators" -optional = false -python-versions = ">=3.7" -files = [ - {file = "cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a"}, - {file = "cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4"}, -] - -[[package]] -name = "certifi" -version = "2025.1.31" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.6" -files = [ - {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, - {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, -] - -[[package]] -name = "cfgv" -version = "3.4.0" -description = "Validate configuration and produce human readable error messages." -optional = false -python-versions = ">=3.8" -files = [ - {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, - {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, -] - -[[package]] -name = "charset-normalizer" -version = "3.4.1" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false -python-versions = ">=3.7" -files = [ - {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, - {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, - {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, - {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, - {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"}, - {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"}, - {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, - {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, - {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, - {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, -] - -[[package]] -name = "click" -version = "8.1.8" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -files = [ - {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, - {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "colorful" -version = "0.5.6" -description = "Terminal string styling done right, in Python." -optional = false -python-versions = "*" -files = [ - {file = "colorful-0.5.6-py2.py3-none-any.whl", hash = "sha256:eab8c1c809f5025ad2b5238a50bd691e26850da8cac8f90d660ede6ea1af9f1e"}, - {file = "colorful-0.5.6.tar.gz", hash = "sha256:b56d5c01db1dac4898308ea889edcb113fbee3e6ec5df4bacffd61d5241b5b8d"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "distlib" -version = "0.3.9" -description = "Distribution utilities" -optional = false -python-versions = "*" -files = [ - {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, - {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, -] - -[[package]] -name = "filelock" -version = "3.17.0" -description = "A platform independent file lock." -optional = false -python-versions = ">=3.9" -files = [ - {file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"}, - {file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"}, -] - -[package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] -typing = ["typing-extensions (>=4.12.2)"] - -[[package]] -name = "google-api-core" -version = "2.24.1" -description = "Google API client core library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"}, - {file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"}, -] - -[package.dependencies] -google-auth = ">=2.14.1,<3.0.dev0" -googleapis-common-protos = ">=1.56.2,<2.0.dev0" -proto-plus = [ - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, -] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" -requests = ">=2.18.0,<3.0.0.dev0" - -[package.extras] -async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"] -grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] -grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] - -[[package]] -name = "google-auth" -version = "2.38.0" -description = "Google Authentication Library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a"}, - {file = "google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4"}, -] - -[package.dependencies] -cachetools = ">=2.0.0,<6.0" -pyasn1-modules = ">=0.2.1" -rsa = ">=3.1.4,<5" - -[package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] -enterprise-cert = ["cryptography", "pyopenssl"] -pyjwt = ["cryptography (>=38.0.3)", "pyjwt (>=2.0)"] -pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] -reauth = ["pyu2f (>=0.1.5)"] -requests = ["requests (>=2.20.0,<3.0.0.dev0)"] - -[[package]] -name = "google-cloud-core" -version = "2.4.2" -description = "Google Cloud API client core library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_cloud_core-2.4.2-py2.py3-none-any.whl", hash = "sha256:7459c3e83de7cb8b9ecfec9babc910efb4314030c56dd798eaad12c426f7d180"}, - {file = "google_cloud_core-2.4.2.tar.gz", hash = "sha256:a4fcb0e2fcfd4bfe963837fad6d10943754fd79c1a50097d68540b6eb3d67f35"}, -] - -[package.dependencies] -google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" -google-auth = ">=1.25.0,<3.0dev" - -[package.extras] -grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] - -[[package]] -name = "google-cloud-storage" -version = "2.19.0" -description = "Google Cloud Storage API client library" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_cloud_storage-2.19.0-py2.py3-none-any.whl", hash = "sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba"}, - {file = "google_cloud_storage-2.19.0.tar.gz", hash = "sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2"}, -] - -[package.dependencies] -google-api-core = ">=2.15.0,<3.0.0dev" -google-auth = ">=2.26.1,<3.0dev" -google-cloud-core = ">=2.3.0,<3.0dev" -google-crc32c = ">=1.0,<2.0dev" -google-resumable-media = ">=2.7.2" -requests = ">=2.18.0,<3.0.0dev" - -[package.extras] -protobuf = ["protobuf (<6.0.0dev)"] -tracing = ["opentelemetry-api (>=1.1.0)"] - -[[package]] -name = "google-crc32c" -version = "1.6.0" -description = "A python wrapper of the C library 'Google CRC32C'" -optional = false -python-versions = ">=3.9" -files = [ - {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa"}, - {file = "google_crc32c-1.6.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9"}, - {file = "google_crc32c-1.6.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7"}, - {file = "google_crc32c-1.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e"}, - {file = "google_crc32c-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc"}, - {file = "google_crc32c-1.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42"}, - {file = "google_crc32c-1.6.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4"}, - {file = "google_crc32c-1.6.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8"}, - {file = "google_crc32c-1.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d"}, - {file = "google_crc32c-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f"}, - {file = "google_crc32c-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3"}, - {file = "google_crc32c-1.6.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d"}, - {file = "google_crc32c-1.6.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b"}, - {file = "google_crc32c-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00"}, - {file = "google_crc32c-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3"}, - {file = "google_crc32c-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760"}, - {file = "google_crc32c-1.6.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205"}, - {file = "google_crc32c-1.6.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0"}, - {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2"}, - {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871"}, - {file = "google_crc32c-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57"}, - {file = "google_crc32c-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c"}, - {file = "google_crc32c-1.6.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc"}, - {file = "google_crc32c-1.6.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d"}, - {file = "google_crc32c-1.6.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24"}, - {file = "google_crc32c-1.6.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d"}, - {file = "google_crc32c-1.6.0.tar.gz", hash = "sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc"}, -] - -[package.extras] -testing = ["pytest"] - -[[package]] -name = "google-resumable-media" -version = "2.7.2" -description = "Utilities for Google Media Downloads and Resumable Uploads" -optional = false -python-versions = ">=3.7" -files = [ - {file = "google_resumable_media-2.7.2-py2.py3-none-any.whl", hash = "sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa"}, - {file = "google_resumable_media-2.7.2.tar.gz", hash = "sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0"}, -] - -[package.dependencies] -google-crc32c = ">=1.0,<2.0dev" - -[package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"] -requests = ["requests (>=2.18.0,<3.0.0dev)"] - -[[package]] -name = "googleapis-common-protos" -version = "1.69.1" -description = "Common protobufs used in Google APIs" -optional = false -python-versions = ">=3.7" -files = [ - {file = "googleapis_common_protos-1.69.1-py2.py3-none-any.whl", hash = "sha256:4077f27a6900d5946ee5a369fab9c8ded4c0ef1c6e880458ea2f70c14f7b70d5"}, - {file = "googleapis_common_protos-1.69.1.tar.gz", hash = "sha256:e20d2d8dda87da6fe7340afbbdf4f0bcb4c8fae7e6cadf55926c31f946b0b9b1"}, -] - -[package.dependencies] -protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" - -[package.extras] -grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] - -[[package]] -name = "h5py" -version = "3.13.0" -description = "Read and write HDF5 files from Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "h5py-3.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5540daee2b236d9569c950b417f13fd112d51d78b4c43012de05774908dff3f5"}, - {file = "h5py-3.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10894c55d46df502d82a7a4ed38f9c3fdbcb93efb42e25d275193e093071fade"}, - {file = "h5py-3.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb267ce4b83f9c42560e9ff4d30f60f7ae492eacf9c7ede849edf8c1b860e16b"}, - {file = "h5py-3.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2cf6a231a07c14acd504a945a6e9ec115e0007f675bde5e0de30a4dc8d86a31"}, - {file = "h5py-3.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:851ae3a8563d87a5a0dc49c2e2529c75b8842582ccaefbf84297d2cfceeacd61"}, - {file = "h5py-3.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8a8e38ef4ceb969f832cc230c0cf808c613cc47e31e768fd7b1106c55afa1cb8"}, - {file = "h5py-3.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f35640e81b03c02a88b8bf99fb6a9d3023cc52f7c627694db2f379e0028f2868"}, - {file = "h5py-3.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:337af114616f3656da0c83b68fcf53ecd9ce9989a700b0883a6e7c483c3235d4"}, - {file = "h5py-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:782ff0ac39f455f21fd1c8ebc007328f65f43d56718a89327eec76677ebf238a"}, - {file = "h5py-3.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:22ffe2a25770a2d67213a1b94f58006c14dce06933a42d2aaa0318c5868d1508"}, - {file = "h5py-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:477c58307b6b9a2509c59c57811afb9f598aedede24a67da808262dfa0ee37b4"}, - {file = "h5py-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57c4c74f627c616f02b7aec608a8c706fe08cb5b0ba7c08555a4eb1dde20805a"}, - {file = "h5py-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:357e6dc20b101a805ccfd0024731fbaf6e8718c18c09baf3b5e4e9d198d13fca"}, - {file = "h5py-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f13f9b5ce549448c01e4dfe08ea8d1772e6078799af2c1c8d09e941230a90d"}, - {file = "h5py-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:21daf38171753899b5905f3d82c99b0b1ec2cbbe282a037cad431feb620e62ec"}, - {file = "h5py-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e520ec76de00943dd017c8ea3f354fa1d2f542eac994811943a8faedf2a7d5cb"}, - {file = "h5py-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e79d8368cd9295045956bfb436656bea3f915beaa11d342e9f79f129f5178763"}, - {file = "h5py-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56dd172d862e850823c4af02dc4ddbc308f042b85472ffdaca67f1598dff4a57"}, - {file = "h5py-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be949b46b7388074c5acae017fbbe3e5ba303fd9daaa52157fdfef30bbdacadd"}, - {file = "h5py-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:4f97ecde7ac6513b21cd95efdfc38dc6d19f96f6ca6f2a30550e94e551458e0a"}, - {file = "h5py-3.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:82690e89c72b85addf4fc4d5058fb1e387b6c14eb063b0b879bf3f42c3b93c35"}, - {file = "h5py-3.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d571644958c5e19a61c793d8d23cd02479572da828e333498c9acc463f4a3997"}, - {file = "h5py-3.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:560e71220dc92dfa254b10a4dcb12d56b574d2d87e095db20466b32a93fec3f9"}, - {file = "h5py-3.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c10f061764d8dce0a9592ce08bfd5f243a00703325c388f1086037e5d619c5f1"}, - {file = "h5py-3.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:9c82ece71ed1c2b807b6628e3933bc6eae57ea21dac207dca3470e3ceaaf437c"}, - {file = "h5py-3.13.0.tar.gz", hash = "sha256:1870e46518720023da85d0895a1960ff2ce398c5671eac3b1a41ec696b7105c3"}, -] - -[package.dependencies] -numpy = ">=1.19.3" - -[[package]] -name = "identify" -version = "2.6.8" -description = "File identification library for Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "identify-2.6.8-py2.py3-none-any.whl", hash = "sha256:83657f0f766a3c8d0eaea16d4ef42494b39b34629a4b3192a9d020d349b3e255"}, - {file = "identify-2.6.8.tar.gz", hash = "sha256:61491417ea2c0c5c670484fd8abbb34de34cdae1e5f39a73ee65e48e4bb663fc"}, -] - -[package.extras] -license = ["ukkonen"] - -[[package]] -name = "idna" -version = "3.10" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.6" -files = [ - {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, - {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, -] - -[package.extras] -all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] - -[[package]] -name = "jmespath" -version = "1.0.1" -description = "JSON Matching Expressions" -optional = false -python-versions = ">=3.7" -files = [ - {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, - {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, -] - -[[package]] -name = "mypy-extensions" -version = "1.0.0" -description = "Type system extensions for programs checked with the mypy type checker." -optional = false -python-versions = ">=3.5" -files = [ - {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, - {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, -] - -[[package]] -name = "nodeenv" -version = "1.9.1" -description = "Node.js virtual environment builder" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, - {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, -] - -[[package]] -name = "numpy" -version = "2.0.2" -description = "Fundamental package for array computing in Python" -optional = false -python-versions = ">=3.9" -files = [ - {file = "numpy-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:51129a29dbe56f9ca83438b706e2e69a39892b5eda6cedcb6b0c9fdc9b0d3ece"}, - {file = "numpy-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f15975dfec0cf2239224d80e32c3170b1d168335eaedee69da84fbe9f1f9cd04"}, - {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8c5713284ce4e282544c68d1c3b2c7161d38c256d2eefc93c1d683cf47683e66"}, - {file = "numpy-2.0.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:becfae3ddd30736fe1889a37f1f580e245ba79a5855bff5f2a29cb3ccc22dd7b"}, - {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2da5960c3cf0df7eafefd806d4e612c5e19358de82cb3c343631188991566ccd"}, - {file = "numpy-2.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:496f71341824ed9f3d2fd36cf3ac57ae2e0165c143b55c3a035ee219413f3318"}, - {file = "numpy-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a61ec659f68ae254e4d237816e33171497e978140353c0c2038d46e63282d0c8"}, - {file = "numpy-2.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d731a1c6116ba289c1e9ee714b08a8ff882944d4ad631fd411106a30f083c326"}, - {file = "numpy-2.0.2-cp310-cp310-win32.whl", hash = "sha256:984d96121c9f9616cd33fbd0618b7f08e0cfc9600a7ee1d6fd9b239186d19d97"}, - {file = "numpy-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:c7b0be4ef08607dd04da4092faee0b86607f111d5ae68036f16cc787e250a131"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:49ca4decb342d66018b01932139c0961a8f9ddc7589611158cb3c27cbcf76448"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:11a76c372d1d37437857280aa142086476136a8c0f373b2e648ab2c8f18fb195"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:807ec44583fd708a21d4a11d94aedf2f4f3c3719035c76a2bbe1fe8e217bdc57"}, - {file = "numpy-2.0.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8cafab480740e22f8d833acefed5cc87ce276f4ece12fdaa2e8903db2f82897a"}, - {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15f476a45e6e5a3a79d8a14e62161d27ad897381fecfa4a09ed5322f2085669"}, - {file = "numpy-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13e689d772146140a252c3a28501da66dfecd77490b498b168b501835041f951"}, - {file = "numpy-2.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9ea91dfb7c3d1c56a0e55657c0afb38cf1eeae4544c208dc465c3c9f3a7c09f9"}, - {file = "numpy-2.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c1c9307701fec8f3f7a1e6711f9089c06e6284b3afbbcd259f7791282d660a15"}, - {file = "numpy-2.0.2-cp311-cp311-win32.whl", hash = "sha256:a392a68bd329eafac5817e5aefeb39038c48b671afd242710b451e76090e81f4"}, - {file = "numpy-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:286cd40ce2b7d652a6f22efdfc6d1edf879440e53e76a75955bc0c826c7e64dc"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:df55d490dea7934f330006d0f81e8551ba6010a5bf035a249ef61a94f21c500b"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8df823f570d9adf0978347d1f926b2a867d5608f434a7cff7f7908c6570dcf5e"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9a92ae5c14811e390f3767053ff54eaee3bf84576d99a2456391401323f4ec2c"}, - {file = "numpy-2.0.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a842d573724391493a97a62ebbb8e731f8a5dcc5d285dfc99141ca15a3302d0c"}, - {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05e238064fc0610c840d1cf6a13bf63d7e391717d247f1bf0318172e759e692"}, - {file = "numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0123ffdaa88fa4ab64835dcbde75dcdf89c453c922f18dced6e27c90d1d0ec5a"}, - {file = "numpy-2.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:96a55f64139912d61de9137f11bf39a55ec8faec288c75a54f93dfd39f7eb40c"}, - {file = "numpy-2.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec9852fb39354b5a45a80bdab5ac02dd02b15f44b3804e9f00c556bf24b4bded"}, - {file = "numpy-2.0.2-cp312-cp312-win32.whl", hash = "sha256:671bec6496f83202ed2d3c8fdc486a8fc86942f2e69ff0e986140339a63bcbe5"}, - {file = "numpy-2.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:cfd41e13fdc257aa5778496b8caa5e856dc4896d4ccf01841daee1d96465467a"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9059e10581ce4093f735ed23f3b9d283b9d517ff46009ddd485f1747eb22653c"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:423e89b23490805d2a5a96fe40ec507407b8ee786d66f7328be214f9679df6dd"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:2b2955fa6f11907cf7a70dab0d0755159bca87755e831e47932367fc8f2f2d0b"}, - {file = "numpy-2.0.2-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:97032a27bd9d8988b9a97a8c4d2c9f2c15a81f61e2f21404d7e8ef00cb5be729"}, - {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e795a8be3ddbac43274f18588329c72939870a16cae810c2b73461c40718ab1"}, - {file = "numpy-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b258c385842546006213344c50655ff1555a9338e2e5e02a0756dc3e803dd"}, - {file = "numpy-2.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fec9451a7789926bcf7c2b8d187292c9f93ea30284802a0ab3f5be8ab36865d"}, - {file = "numpy-2.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9189427407d88ff25ecf8f12469d4d39d35bee1db5d39fc5c168c6f088a6956d"}, - {file = "numpy-2.0.2-cp39-cp39-win32.whl", hash = "sha256:905d16e0c60200656500c95b6b8dca5d109e23cb24abc701d41c02d74c6b3afa"}, - {file = "numpy-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:a3f4ab0caa7f053f6797fcd4e1e25caee367db3112ef2b6ef82d749530768c73"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7f0a0c6f12e07fa94133c8a67404322845220c06a9e80e85999afe727f7438b8"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:312950fdd060354350ed123c0e25a71327d3711584beaef30cdaa93320c392d4"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26df23238872200f63518dd2aa984cfca675d82469535dc7162dc2ee52d9dd5c"}, - {file = "numpy-2.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a46288ec55ebbd58947d31d72be2c63cbf839f0a63b49cb755022310792a3385"}, - {file = "numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78"}, -] - -[[package]] -name = "numpy" -version = "2.2.3" -description = "Fundamental package for array computing in Python" -optional = false -python-versions = ">=3.10" -files = [ - {file = "numpy-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cbc6472e01952d3d1b2772b720428f8b90e2deea8344e854df22b0618e9cce71"}, - {file = "numpy-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdfe0c22692a30cd830c0755746473ae66c4a8f2e7bd508b35fb3b6a0813d787"}, - {file = "numpy-2.2.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:e37242f5324ffd9f7ba5acf96d774f9276aa62a966c0bad8dae692deebec7716"}, - {file = "numpy-2.2.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:95172a21038c9b423e68be78fd0be6e1b97674cde269b76fe269a5dfa6fadf0b"}, - {file = "numpy-2.2.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5b47c440210c5d1d67e1cf434124e0b5c395eee1f5806fdd89b553ed1acd0a3"}, - {file = "numpy-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0391ea3622f5c51a2e29708877d56e3d276827ac5447d7f45e9bc4ade8923c52"}, - {file = "numpy-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f6b3dfc7661f8842babd8ea07e9897fe3d9b69a1d7e5fbb743e4160f9387833b"}, - {file = "numpy-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1ad78ce7f18ce4e7df1b2ea4019b5817a2f6a8a16e34ff2775f646adce0a5027"}, - {file = "numpy-2.2.3-cp310-cp310-win32.whl", hash = "sha256:5ebeb7ef54a7be11044c33a17b2624abe4307a75893c001a4800857956b41094"}, - {file = "numpy-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:596140185c7fa113563c67c2e894eabe0daea18cf8e33851738c19f70ce86aeb"}, - {file = "numpy-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:16372619ee728ed67a2a606a614f56d3eabc5b86f8b615c79d01957062826ca8"}, - {file = "numpy-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5521a06a3148686d9269c53b09f7d399a5725c47bbb5b35747e1cb76326b714b"}, - {file = "numpy-2.2.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:7c8dde0ca2f77828815fd1aedfdf52e59071a5bae30dac3b4da2a335c672149a"}, - {file = "numpy-2.2.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:77974aba6c1bc26e3c205c2214f0d5b4305bdc719268b93e768ddb17e3fdd636"}, - {file = "numpy-2.2.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d42f9c36d06440e34226e8bd65ff065ca0963aeecada587b937011efa02cdc9d"}, - {file = "numpy-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2712c5179f40af9ddc8f6727f2bd910ea0eb50206daea75f58ddd9fa3f715bb"}, - {file = "numpy-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c8b0451d2ec95010d1db8ca733afc41f659f425b7f608af569711097fd6014e2"}, - {file = "numpy-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d9b4a8148c57ecac25a16b0e11798cbe88edf5237b0df99973687dd866f05e1b"}, - {file = "numpy-2.2.3-cp311-cp311-win32.whl", hash = "sha256:1f45315b2dc58d8a3e7754fe4e38b6fce132dab284a92851e41b2b344f6441c5"}, - {file = "numpy-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:9f48ba6f6c13e5e49f3d3efb1b51c8193215c42ac82610a04624906a9270be6f"}, - {file = "numpy-2.2.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12c045f43b1d2915eca6b880a7f4a256f59d62df4f044788c8ba67709412128d"}, - {file = "numpy-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:87eed225fd415bbae787f93a457af7f5990b92a334e346f72070bf569b9c9c95"}, - {file = "numpy-2.2.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:712a64103d97c404e87d4d7c47fb0c7ff9acccc625ca2002848e0d53288b90ea"}, - {file = "numpy-2.2.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a5ae282abe60a2db0fd407072aff4599c279bcd6e9a2475500fc35b00a57c532"}, - {file = "numpy-2.2.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5266de33d4c3420973cf9ae3b98b54a2a6d53a559310e3236c4b2b06b9c07d4e"}, - {file = "numpy-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b787adbf04b0db1967798dba8da1af07e387908ed1553a0d6e74c084d1ceafe"}, - {file = "numpy-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:34c1b7e83f94f3b564b35f480f5652a47007dd91f7c839f404d03279cc8dd021"}, - {file = "numpy-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4d8335b5f1b6e2bce120d55fb17064b0262ff29b459e8493d1785c18ae2553b8"}, - {file = "numpy-2.2.3-cp312-cp312-win32.whl", hash = "sha256:4d9828d25fb246bedd31e04c9e75714a4087211ac348cb39c8c5f99dbb6683fe"}, - {file = "numpy-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:83807d445817326b4bcdaaaf8e8e9f1753da04341eceec705c001ff342002e5d"}, - {file = "numpy-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bfdb06b395385ea9b91bf55c1adf1b297c9fdb531552845ff1d3ea6e40d5aba"}, - {file = "numpy-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:23c9f4edbf4c065fddb10a4f6e8b6a244342d95966a48820c614891e5059bb50"}, - {file = "numpy-2.2.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:a0c03b6be48aaf92525cccf393265e02773be8fd9551a2f9adbe7db1fa2b60f1"}, - {file = "numpy-2.2.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:2376e317111daa0a6739e50f7ee2a6353f768489102308b0d98fcf4a04f7f3b5"}, - {file = "numpy-2.2.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fb62fe3d206d72fe1cfe31c4a1106ad2b136fcc1606093aeab314f02930fdf2"}, - {file = "numpy-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52659ad2534427dffcc36aac76bebdd02b67e3b7a619ac67543bc9bfe6b7cdb1"}, - {file = "numpy-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b416af7d0ed3271cad0f0a0d0bee0911ed7eba23e66f8424d9f3dfcdcae1304"}, - {file = "numpy-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1402da8e0f435991983d0a9708b779f95a8c98c6b18a171b9f1be09005e64d9d"}, - {file = "numpy-2.2.3-cp313-cp313-win32.whl", hash = "sha256:136553f123ee2951bfcfbc264acd34a2fc2f29d7cdf610ce7daf672b6fbaa693"}, - {file = "numpy-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5b732c8beef1d7bc2d9e476dbba20aaff6167bf205ad9aa8d30913859e82884b"}, - {file = "numpy-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:435e7a933b9fda8126130b046975a968cc2d833b505475e588339e09f7672890"}, - {file = "numpy-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7678556eeb0152cbd1522b684dcd215250885993dd00adb93679ec3c0e6e091c"}, - {file = "numpy-2.2.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2e8da03bd561504d9b20e7a12340870dfc206c64ea59b4cfee9fceb95070ee94"}, - {file = "numpy-2.2.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:c9aa4496fd0e17e3843399f533d62857cef5900facf93e735ef65aa4bbc90ef0"}, - {file = "numpy-2.2.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4ca91d61a4bf61b0f2228f24bbfa6a9facd5f8af03759fe2a655c50ae2c6610"}, - {file = "numpy-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:deaa09cd492e24fd9b15296844c0ad1b3c976da7907e1c1ed3a0ad21dded6f76"}, - {file = "numpy-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:246535e2f7496b7ac85deffe932896a3577be7af8fb7eebe7146444680297e9a"}, - {file = "numpy-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:daf43a3d1ea699402c5a850e5313680ac355b4adc9770cd5cfc2940e7861f1bf"}, - {file = "numpy-2.2.3-cp313-cp313t-win32.whl", hash = "sha256:cf802eef1f0134afb81fef94020351be4fe1d6681aadf9c5e862af6602af64ef"}, - {file = "numpy-2.2.3-cp313-cp313t-win_amd64.whl", hash = "sha256:aee2512827ceb6d7f517c8b85aa5d3923afe8fc7a57d028cffcd522f1c6fd082"}, - {file = "numpy-2.2.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3c2ec8a0f51d60f1e9c0c5ab116b7fc104b165ada3f6c58abf881cb2eb16044d"}, - {file = "numpy-2.2.3-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ed2cf9ed4e8ebc3b754d398cba12f24359f018b416c380f577bbae112ca52fc9"}, - {file = "numpy-2.2.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39261798d208c3095ae4f7bc8eaeb3481ea8c6e03dc48028057d3cbdbdb8937e"}, - {file = "numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4"}, - {file = "numpy-2.2.3.tar.gz", hash = "sha256:dbdc15f0c81611925f382dfa97b3bd0bc2c1ce19d4fe50482cb0ddc12ba30020"}, -] - -[[package]] -name = "packaging" -version = "24.2" -description = "Core utilities for Python packages" -optional = false -python-versions = ">=3.8" -files = [ - {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, - {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, -] - -[[package]] -name = "pandas" -version = "2.2.3" -description = "Powerful data structures for data analysis, time series, and statistics" -optional = false -python-versions = ">=3.9" -files = [ - {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, - {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, - {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, - {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, - {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, - {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, - {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, - {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, - {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, - {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, - {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, - {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, - {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, - {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, - {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, - {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, - {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, - {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, - {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, - {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, - {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, - {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, - {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, - {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, - {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, - {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, - {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, - {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, - {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, - {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, - {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, - {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, - {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, - {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, - {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, - {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, - {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, - {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, - {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, - {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, - {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, - {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, -] - -[package.dependencies] -numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, -] -python-dateutil = ">=2.8.2" -pytz = ">=2020.1" -tzdata = ">=2022.7" - -[package.extras] -all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] -aws = ["s3fs (>=2022.11.0)"] -clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] -compression = ["zstandard (>=0.19.0)"] -computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] -consortium-standard = ["dataframe-api-compat (>=0.1.7)"] -excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] -feather = ["pyarrow (>=10.0.1)"] -fss = ["fsspec (>=2022.11.0)"] -gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] -hdf5 = ["tables (>=3.8.0)"] -html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] -mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] -output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] -parquet = ["pyarrow (>=10.0.1)"] -performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] -plot = ["matplotlib (>=3.6.3)"] -postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] -pyarrow = ["pyarrow (>=10.0.1)"] -spss = ["pyreadstat (>=1.2.0)"] -sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] -test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] -xml = ["lxml (>=4.9.2)"] - -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - -[[package]] -name = "platformdirs" -version = "4.3.6" -description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." -optional = false -python-versions = ">=3.8" -files = [ - {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, - {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, -] - -[package.extras] -docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] -type = ["mypy (>=1.11.2)"] - -[[package]] -name = "pre-commit" -version = "3.8.0" -description = "A framework for managing and maintaining multi-language pre-commit hooks." -optional = false -python-versions = ">=3.9" -files = [ - {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, - {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, -] - -[package.dependencies] -cfgv = ">=2.0.0" -identify = ">=1.0.0" -nodeenv = ">=0.11.1" -pyyaml = ">=5.1" -virtualenv = ">=20.10.0" - -[[package]] -name = "progressbar2" -version = "3.55.0" -description = "A Python Progressbar library to provide visual (yet text based) progress to long running operations." -optional = false -python-versions = "*" -files = [ - {file = "progressbar2-3.55.0-py2.py3-none-any.whl", hash = "sha256:e98fee031da31ab9138fd8dd838ac80eafba82764eb75a43d25e3ca622f47d14"}, - {file = "progressbar2-3.55.0.tar.gz", hash = "sha256:86835d1f1a9317ab41aeb1da5e4184975e2306586839d66daf63067c102f8f04"}, -] - -[package.dependencies] -python-utils = ">=2.3.0" -six = "*" - -[package.extras] -docs = ["sphinx (>=1.7.4)"] -tests = ["flake8 (>=3.7.7)", "freezegun (>=0.3.11)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "sphinx (>=1.8.5)"] - -[[package]] -name = "proto-plus" -version = "1.26.0" -description = "Beautiful, Pythonic protocol buffers" -optional = false -python-versions = ">=3.7" -files = [ - {file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"}, - {file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"}, -] - -[package.dependencies] -protobuf = ">=3.19.0,<6.0.0dev" - -[package.extras] -testing = ["google-api-core (>=1.31.5)"] - -[[package]] -name = "protobuf" -version = "5.29.3" -description = "" -optional = false -python-versions = ">=3.8" -files = [ - {file = "protobuf-5.29.3-cp310-abi3-win32.whl", hash = "sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888"}, - {file = "protobuf-5.29.3-cp310-abi3-win_amd64.whl", hash = "sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a"}, - {file = "protobuf-5.29.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e"}, - {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84"}, - {file = "protobuf-5.29.3-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f"}, - {file = "protobuf-5.29.3-cp38-cp38-win32.whl", hash = "sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252"}, - {file = "protobuf-5.29.3-cp38-cp38-win_amd64.whl", hash = "sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107"}, - {file = "protobuf-5.29.3-cp39-cp39-win32.whl", hash = "sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7"}, - {file = "protobuf-5.29.3-cp39-cp39-win_amd64.whl", hash = "sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da"}, - {file = "protobuf-5.29.3-py3-none-any.whl", hash = "sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f"}, - {file = "protobuf-5.29.3.tar.gz", hash = "sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620"}, -] - -[[package]] -name = "pyarrow" -version = "19.0.1" -description = "Python library for Apache Arrow" -optional = false -python-versions = ">=3.9" -files = [ - {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fc28912a2dc924dddc2087679cc8b7263accc71b9ff025a1362b004711661a69"}, - {file = "pyarrow-19.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:fca15aabbe9b8355800d923cc2e82c8ef514af321e18b437c3d782aa884eaeec"}, - {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad76aef7f5f7e4a757fddcdcf010a8290958f09e3470ea458c80d26f4316ae89"}, - {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d03c9d6f2a3dffbd62671ca070f13fc527bb1867b4ec2b98c7eeed381d4f389a"}, - {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:65cf9feebab489b19cdfcfe4aa82f62147218558d8d3f0fc1e9dea0ab8e7905a"}, - {file = "pyarrow-19.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:41f9706fbe505e0abc10e84bf3a906a1338905cbbcf1177b71486b03e6ea6608"}, - {file = "pyarrow-19.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6cb2335a411b713fdf1e82a752162f72d4a7b5dbc588e32aa18383318b05866"}, - {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:cc55d71898ea30dc95900297d191377caba257612f384207fe9f8293b5850f90"}, - {file = "pyarrow-19.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7a544ec12de66769612b2d6988c36adc96fb9767ecc8ee0a4d270b10b1c51e00"}, - {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0148bb4fc158bfbc3d6dfe5001d93ebeed253793fff4435167f6ce1dc4bddeae"}, - {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f24faab6ed18f216a37870d8c5623f9c044566d75ec586ef884e13a02a9d62c5"}, - {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:4982f8e2b7afd6dae8608d70ba5bd91699077323f812a0448d8b7abdff6cb5d3"}, - {file = "pyarrow-19.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:49a3aecb62c1be1d822f8bf629226d4a96418228a42f5b40835c1f10d42e4db6"}, - {file = "pyarrow-19.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:008a4009efdb4ea3d2e18f05cd31f9d43c388aad29c636112c2966605ba33466"}, - {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:80b2ad2b193e7d19e81008a96e313fbd53157945c7be9ac65f44f8937a55427b"}, - {file = "pyarrow-19.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:ee8dec072569f43835932a3b10c55973593abc00936c202707a4ad06af7cb294"}, - {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d5d1ec7ec5324b98887bdc006f4d2ce534e10e60f7ad995e7875ffa0ff9cb14"}, - {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3ad4c0eb4e2a9aeb990af6c09e6fa0b195c8c0e7b272ecc8d4d2b6574809d34"}, - {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d383591f3dcbe545f6cc62daaef9c7cdfe0dff0fb9e1c8121101cabe9098cfa6"}, - {file = "pyarrow-19.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b4c4156a625f1e35d6c0b2132635a237708944eb41df5fbe7d50f20d20c17832"}, - {file = "pyarrow-19.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bd1618ae5e5476b7654c7b55a6364ae87686d4724538c24185bbb2952679960"}, - {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e45274b20e524ae5c39d7fc1ca2aa923aab494776d2d4b316b49ec7572ca324c"}, - {file = "pyarrow-19.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d9dedeaf19097a143ed6da37f04f4051aba353c95ef507764d344229b2b740ae"}, - {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ebfb5171bb5f4a52319344ebbbecc731af3f021e49318c74f33d520d31ae0c4"}, - {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a21d39fbdb948857f67eacb5bbaaf36802de044ec36fbef7a1c8f0dd3a4ab2"}, - {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:99bc1bec6d234359743b01e70d4310d0ab240c3d6b0da7e2a93663b0158616f6"}, - {file = "pyarrow-19.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1b93ef2c93e77c442c979b0d596af45e4665d8b96da598db145b0fec014b9136"}, - {file = "pyarrow-19.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:d9d46e06846a41ba906ab25302cf0fd522f81aa2a85a71021826f34639ad31ef"}, - {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:c0fe3dbbf054a00d1f162fda94ce236a899ca01123a798c561ba307ca38af5f0"}, - {file = "pyarrow-19.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:96606c3ba57944d128e8a8399da4812f56c7f61de8c647e3470b417f795d0ef9"}, - {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f04d49a6b64cf24719c080b3c2029a3a5b16417fd5fd7c4041f94233af732f3"}, - {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a9137cf7e1640dce4c190551ee69d478f7121b5c6f323553b319cac936395f6"}, - {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7c1bca1897c28013db5e4c83944a2ab53231f541b9e0c3f4791206d0c0de389a"}, - {file = "pyarrow-19.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:58d9397b2e273ef76264b45531e9d552d8ec8a6688b7390b5be44c02a37aade8"}, - {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:b9766a47a9cb56fefe95cb27f535038b5a195707a08bf61b180e642324963b46"}, - {file = "pyarrow-19.0.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6c5941c1aac89a6c2f2b16cd64fe76bcdb94b2b1e99ca6459de4e6f07638d755"}, - {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd44d66093a239358d07c42a91eebf5015aa54fccba959db899f932218ac9cc8"}, - {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:335d170e050bcc7da867a1ed8ffb8b44c57aaa6e0843b156a501298657b1e972"}, - {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:1c7556165bd38cf0cd992df2636f8bcdd2d4b26916c6b7e646101aff3c16f76f"}, - {file = "pyarrow-19.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:699799f9c80bebcf1da0983ba86d7f289c5a2a5c04b945e2f2bcf7e874a91911"}, - {file = "pyarrow-19.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8464c9fbe6d94a7fe1599e7e8965f350fd233532868232ab2596a71586c5a429"}, - {file = "pyarrow-19.0.1.tar.gz", hash = "sha256:3bf266b485df66a400f282ac0b6d1b500b9d2ae73314a153dbe97d6d5cc8a99e"}, -] - -[package.extras] -test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"] - -[[package]] -name = "pyasn1" -version = "0.6.1" -description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, - {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, -] - -[[package]] -name = "pyasn1-modules" -version = "0.4.1" -description = "A collection of ASN.1-based protocols modules" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd"}, - {file = "pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c"}, -] - -[package.dependencies] -pyasn1 = ">=0.4.6,<0.7.0" - -[[package]] -name = "python-dateutil" -version = "2.9.0.post0" -description = "Extensions to the standard Python datetime module" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, - {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, -] - -[package.dependencies] -six = ">=1.5" - -[[package]] -name = "python-utils" -version = "3.9.1" -description = "Python Utils is a module with some convenient utilities not included with the standard Python install" -optional = false -python-versions = ">=3.9.0" -files = [ - {file = "python_utils-3.9.1-py2.py3-none-any.whl", hash = "sha256:0273d7363c7ad4b70999b2791d5ba6b55333d6f7a4e4c8b6b39fb82b5fab4613"}, - {file = "python_utils-3.9.1.tar.gz", hash = "sha256:eb574b4292415eb230f094cbf50ab5ef36e3579b8f09e9f2ba74af70891449a0"}, -] - -[package.dependencies] -typing_extensions = ">3.10.0.2" - -[package.extras] -docs = ["mock", "python-utils", "sphinx"] -loguru = ["loguru"] -tests = ["blessings", "loguru", "loguru-mypy", "mypy-ipython", "pyright", "pytest", "pytest-asyncio", "pytest-cov", "pytest-mypy", "ruff", "sphinx", "types-setuptools"] - -[[package]] -name = "pytz" -version = "2025.1" -description = "World timezone definitions, modern and historical" -optional = false -python-versions = "*" -files = [ - {file = "pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57"}, - {file = "pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e"}, -] - -[[package]] -name = "pyyaml" -version = "6.0.2" -description = "YAML parser and emitter for Python" -optional = false -python-versions = ">=3.8" -files = [ - {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, - {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, - {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, - {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, - {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, - {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, - {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, - {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, - {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, - {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, - {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, - {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, - {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, - {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, - {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, - {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, - {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, -] - -[[package]] -name = "requests" -version = "2.32.3" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.8" -files = [ - {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, - {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "rsa" -version = "4.9" -description = "Pure-Python RSA implementation" -optional = false -python-versions = ">=3.6,<4" -files = [ - {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, - {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, -] - -[package.dependencies] -pyasn1 = ">=0.1.3" - -[[package]] -name = "s3transfer" -version = "0.11.4" -description = "An Amazon S3 Transfer Manager" -optional = false -python-versions = ">=3.8" -files = [ - {file = "s3transfer-0.11.4-py3-none-any.whl", hash = "sha256:ac265fa68318763a03bf2dc4f39d5cbd6a9e178d81cc9483ad27da33637e320d"}, - {file = "s3transfer-0.11.4.tar.gz", hash = "sha256:559f161658e1cf0a911f45940552c696735f5c74e64362e515f333ebed87d679"}, -] - -[package.dependencies] -botocore = ">=1.37.4,<2.0a.0" - -[package.extras] -crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"] - -[[package]] -name = "six" -version = "1.17.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -files = [ - {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, - {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, -] - -[[package]] -name = "sqlite-shelve" -version = "2.2.1" -description = "A SQLite implementation of the Python Shelf interface" -optional = false -python-versions = ">=3.6" -files = [ - {file = "sqlite-shelve-2.2.1.tar.gz", hash = "sha256:49bfd073a014a0e89aa6c58567a347f33bec70d20397598c5fc5fb831ae8bd82"}, - {file = "sqlite_shelve-2.2.1-py3-none-any.whl", hash = "sha256:c09e08139bc858d8970cd0325433da80b1ebf72706280096dbcfddf071c73ab9"}, -] - -[[package]] -name = "taigapy" -version = "3.13.0" -description = "Client library for fetching data from Taiga" -optional = false -python-versions = ">=3.9" -files = [ - {file = "taigapy-3.13.0-py3-none-any.whl", hash = "sha256:f8005d1e830555d3f6132a507daa33ec9772c4de6ca55b2ff81be6dadd3b8689"}, - {file = "taigapy-3.13.0.tar.gz", hash = "sha256:d0a07824a9db34a7008768bb9b9fcfc3e3dc9ee4f33a3f347771bc3b5b02db21"}, -] - -[package.dependencies] -black = ">=24.3.0,<25.0.0" -boto3 = ">=1.26.0,<2.0.0" -colorful = ">=0.5.5,<0.6.0" -google-cloud-storage = ">=2.2.0,<3.0.0" -h5py = ">=3.10.0,<4.0.0" -pandas = ">=1.0.0" -pre-commit = ">=3.7.0,<4.0.0" -progressbar2 = ">=3.3.0,<4.0.0" -pyarrow = ">3.0.0" -requests = ">=2.28.2,<3.0.0" -sqlite-shelve = ">=2.0.1,<3.0.0" -typing-extensions = ">=4.8.0,<5.0.0" - -[package.source] -type = "legacy" -url = "https://us-central1-python.pkg.dev/cds-artifacts/public-python/simple" -reference = "gcp-artifact-registry" - -[[package]] -name = "tomli" -version = "2.2.1" -description = "A lil' TOML parser" -optional = false -python-versions = ">=3.8" -files = [ - {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, - {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, - {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, - {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, - {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, - {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, - {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, - {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, - {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, - {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, - {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, - {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, - {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, - {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, - {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, - {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, - {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, - {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, - {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, - {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, -] - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[[package]] -name = "tzdata" -version = "2025.1" -description = "Provider of IANA time zone data" -optional = false -python-versions = ">=2" -files = [ - {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, - {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, -] - -[[package]] -name = "urllib3" -version = "1.26.20" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" -files = [ - {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, - {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, -] - -[package.extras] -brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] -secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] -socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] - -[[package]] -name = "urllib3" -version = "2.3.0" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.9" -files = [ - {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, - {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "virtualenv" -version = "20.29.3" -description = "Virtual Python Environment builder" -optional = false -python-versions = ">=3.8" -files = [ - {file = "virtualenv-20.29.3-py3-none-any.whl", hash = "sha256:3e3d00f5807e83b234dfb6122bf37cfadf4be216c53a49ac059d02414f819170"}, - {file = "virtualenv-20.29.3.tar.gz", hash = "sha256:95e39403fcf3940ac45bc717597dba16110b74506131845d9b687d5e73d947ac"}, -] - -[package.dependencies] -distlib = ">=0.3.7,<1" -filelock = ">=3.12.2,<4" -platformdirs = ">=3.9.1,<5" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] - -[metadata] -lock-version = "2.0" -python-versions = "^3.9" -content-hash = "7d865a5e1efc7647a38a11f35a403c43f525de9b02e522c4cb7204c67ed8fe06" diff --git a/analysis-pipeline/predictability/fit.conseq b/analysis-pipeline/predictability/fit.conseq deleted file mode 100644 index 04c032fed..000000000 --- a/analysis-pipeline/predictability/fit.conseq +++ /dev/null @@ -1,181 +0,0 @@ -# Three Steps: -# 1. Generate a daintree input config file for each model and screen -# 2. Run the model fitting -# 3. Combine the output config files - -rule process_model_config: - inputs: - # Model config yaml file - model_config=fileref("model-config.yaml"), - - # Target matrices - crispr_gene_effect={"type": "target_matrix", "label": "crispr_gene_effect"}, - rnai={"type": "target_matrix", "label": "rnai"}, - oncref={"type": "target_matrix", "label": "oncref"}, - - # Features - lineage={"type": "feature", "label": "lineage"}, - crispr_confounder={"type": "feature", "label": "crispr_confounder"}, - rnai_confounder={"type": "feature", "label": "rnai_confounder"}, - oncref_confounder={"type": "feature", "label": "oncref_confounder"}, - driver_events={"type": "feature", "label": "driver_events"}, - armlevel_cna={"type": "feature", "label": "armlevel_cna"}, - cytoband_cn={"type": "feature", "label": "cytoband_cn"}, - genetic_signature={"type": "feature", "label": "genetic_signature"}, - mutations_hotspot={"type": "feature", "label": "mutations_hotspot"}, - mutations_damaging={"type": "feature", "label": "mutations_damaging"}, - gene_cn={"type": "feature", "label": "gene_cn"}, - loh={"type": "feature", "label": "loh"}, - rnaseq={"type": "feature", "label": "rnaseq"}, - - # Script to generate daintree input config file - script=fileref("scripts/generate_daintree_input_configs.py"), - - run "python" with """ - import json - - config_dict = {{inputs}} - with open("daintree_input_config.json", 'w') as f: - json.dump(config_dict, f, indent=2) - - """ - run "python {{ inputs.script.filename }} --model_config {{ inputs.model_config.filename }} --input_config 'daintree_input_config.json'" - - -rule run_fit_models: - resources: {'slots': "0.5"} # let up to 2 of these run in parallel - inputs: - daintree_input_config={ - "type": "daintree_input_config" - }, - release_taiga_id={ - "type": "release_taiga_id" - }, - sparkles_config=fileref("sparkles-config", copy_to="sparkles-config") - outputs: - { - "type": "daintree_output_config", - "name": "{{ inputs.daintree_input_config.label }}", - "filename": {"$filename": "daintree_output_config.json"} - } - run "python" with """ - import subprocess - import os - import glob - import json - import shutil - - # Get the input config file and copy it to current directory - input_config_filepath = "{{ inputs.daintree_input_config.filename }}" - local_input_config = "{{ inputs.daintree_input_config.label }}.json" - shutil.copy(input_config_filepath, local_input_config) - - relative_path = os.path.relpath(os.getcwd(), '/work') - host_work_dir = os.environ.get('HOST_WORKSPACE_PATH', '/data1/jenkins/workspace/Analysis_Pipeline/analysis-pipeline') - host_current_dir = os.path.join(host_work_dir, relative_path) - - print(f"relative_path: {relative_path}") - print(f"host_work_dir: {host_work_dir}") - print(f"host_current_dir: {host_current_dir}") - - docker_command = [ - "docker", "run", - "--rm", - "-v", f"{host_current_dir}:/daintree", - "-v", f"/home/ubuntu/.taiga/token:/root/.taiga/token", - "-v", f"/home/ubuntu/.sparkles-cache/service-keys/broad-achilles.json:/root/.sparkles-cache/service-keys/broad-achilles.json", - "--entrypoint", "/bin/bash", - "us.gcr.io/broad-achilles/daintree:v1", - "-c", f"mkdir -p /daintree/daintree_scripts/ && cp -r /daintree_scripts/* /daintree/daintree_scripts/ && /install/depmap-py/bin/python3.9 -u /daintree/daintree_scripts/run_fit_models.py collect-and-fit \ - --input-config /daintree/{local_input_config} \ - --sparkles-config /daintree/sparkles-config \ - --out /daintree/output_data \ - --test \"True\" \ - --skipfit \"False\" \ - --upload-to-taiga \"{{ inputs.release_taiga_id.dataset_id }}\"" - ] - - subprocess.run( - docker_command, - check=True - ) - - # Find the output config file using glob - output_config_files = glob.glob(os.path.join(os.getcwd(), "output_data", "output_config_files", "*.json")) - if not output_config_files: - raise FileNotFoundError("No output config files found") - - # Use the first, there should only be one matching file - output_config_file = output_config_files[0] - - try: - with open(output_config_file, 'r') as f: - output_config = json.load(f) - with open("daintree_output_config.json", 'w') as f: - json.dump(output_config, f, indent=2) - except json.JSONDecodeError as e: - logger.error(f"Invalid JSON in output config: {e}") - raise - """ - - -rule combine_output_configs: - inputs: - daintree_output_config = all{ - "type": "daintree_output_config" - } - outputs: - { - "type": "combined_daintree_output_config", - "filename": {"$filename": "combined_daintree_output_config.json"} - } - run "python" with """ - import json - import os - - def merge_json_files(json_files): - combined = {} - - for file_path in json_files: - with open(file_path, 'r') as f: - data = json.load(f) - - model_name = list(data.keys())[0] - screen_name = data[model_name]["input"]["screen_name"] - - # Initialize the screen in combined if it doesn't exist - if screen_name not in combined: - combined[screen_name] = {} - - # Add the model data to the appropriate screen - combined[screen_name][model_name] = data[model_name] - - return combined - - artifacts = {{ inputs.daintree_output_config }} - list_of_files = [artifact['filename'] for artifact in artifacts] - - combined_output_config = merge_json_files(list_of_files) - - try: - with open("combined_daintree_output_config.json", 'w') as f: - json.dump(combined_output_config, f, indent=2) - except json.JSONDecodeError as e: - logger.error(f"Invalid JSON in combined output config: {e}") - raise - - # Publish the combined output config file to gcp bucket - import subprocess - import datetime - - os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/etc/google_default_creds.json" - bucket_name = "preprocessing-pipeline-outputs" - timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') - - subprocess.run([ - "gsutil", "-o", f"Credentials:gs_service_key_file=/etc/google_default_creds.json", - "cp", "combined_daintree_output_config.json", - f"gs://{bucket_name}/analysis-pipeline/combined_daintree_output_config-{timestamp}.json" - ]) - - """ diff --git a/analysis-pipeline/predictability/predictability_inputs.conseq b/analysis-pipeline/predictability/predictability_inputs.conseq deleted file mode 100644 index dd15e788e..000000000 --- a/analysis-pipeline/predictability/predictability_inputs.conseq +++ /dev/null @@ -1,110 +0,0 @@ -# Target Matrices -add-if-missing { - "type": "target_matrix", - "label": "crispr_gene_effect", - "source_dataset_id": "internal-24q2-3719.82/CRISPRGeneEffect" -} - -add-if-missing { - "type": "target_matrix", - "label": "rnai", - "source_dataset_id": "predictability-legacy-datasets-8c54.14/RNAiDep" -} - -add-if-missing { - "type": "target_matrix", - "label": "oncref", - "source_dataset_id": "oncref-24q4-target-b029.1/OncRef-24Q4-Target-Matrix" -} - -# Feature Matrices -add-if-missing { - "type": "feature", - "label": "lineage", - "category": "lineage", - "source_dataset_id": "predictability-76d5.94/PredictabilityLineageTransformed" -} - -add-if-missing { - "type": "feature", - "label": "crispr_confounder", - "category": "confounder", - "source_dataset_id": "predictability-76d5.111/PredictabilityCRISPRConfoundersTransformed" -} - -add-if-missing { - "type": "feature", - "label": "rnai_confounder", - "category": "confounder", - "source_dataset_id": "predictability-legacy-datasets-8c54.14/RNAiConfounders" -} - -add-if-missing { - "type": "feature", - "label": "oncref_confounder", - "category": "confounder", - "source_dataset_id": "prism-oncology-reference-set-24q4-c0d0.3/PRISMOncologyReferenceConfounderMatrix" -} - -add-if-missing { - "type": "feature", - "label": "driver_events", - "category": "driverevents", - "source_dataset_id": "predictability-76d5.99/DriverEvents" -} - -add-if-missing { - "type": "feature", - "label": "armlevel_cna", - "category": "armlevel", - "source_dataset_id": "internal-24q2-3719.82/OmicsArmLevelCNA" -} - -add-if-missing { - "type": "feature", - "label": "cytoband_cn", - "category": "cytoband", - "source_dataset_id": "predictability-76d5.99/PredictabilityGenticDerangementTransformed" -} - -add-if-missing { - "type": "feature", - "label": "genetic_signature", - "category": "geneticsignature", - "source_dataset_id": "internal-24q2-3719.82/OmicsSignatures" -} - -add-if-missing { - "type": "feature", - "label": "mutations_hotspot", - "category": "gene", - "source_dataset_id": "internal-24q2-3719.82/OmicsSomaticMutationsMatrixHotspot" -} - -add-if-missing { - "type": "feature", - "label": "mutations_damaging", - "category": "gene", - "source_dataset_id": "internal-24q2-3719.82/OmicsSomaticMutationsMatrixDamaging" -} - -add-if-missing { - "type": "feature", - "label": "gene_cn", - "category": "gene", - "source_dataset_id": "internal-24q2-3719.82/OmicsCNGene" -} - -add-if-missing { - "type": "feature", - "label": "loh", - "category": "gene", - "source_dataset_id": "internal-24q2-3719.82/OmicsLoH" -} - -add-if-missing { - "type": "feature", - "label": "rnaseq", - "category": "gene", - "source_dataset_id": "internal-24q2-3719.82/OmicsExpressionProteinCodingGenesTPMLogp1" -} diff --git a/analysis-pipeline/predictability/predictability_inputs_internal.template b/analysis-pipeline/predictability/predictability_inputs_internal.template deleted file mode 100644 index e5096e16b..000000000 --- a/analysis-pipeline/predictability/predictability_inputs_internal.template +++ /dev/null @@ -1,185 +0,0 @@ -# SET_TAIGA_PREPROCESSOR release_taiga_id "nayeem-test-internal-24q4-8352" - -# Destination Taiga ID -add-if-missing { - "type": "release_taiga_id", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id) -} - -# CN gene expression data -add-if-missing { - "type": "cngene", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsCNGene") -} - -# Model -add-if-missing { - "type": "model", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "Model") -} - -# Portal Compounds -add-if-missing { - "type": "repsdrug_matrix", - "dataset_id": "repurposing-public-24q2-875f.4/Repurposing_Public_24Q2_Extended_Primary_Data_Matrix" -} - -add-if-missing { - "type": "repsdrug_auc_matrix", - "dataset_id": "public-non-quarterly-processed-files-8e90.64/repsdrug-auc-matrix" -} - -add-if-missing { - "type": "portal_compounds", - "dataset_id": "compound-metadata-de37.32/PortalCompounds" -} - -add-if-missing { - "type": "prism_oncology_reference_auc_matrix", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "PRISMOncologyReferenceLog2AUCMatrix") -} - -# HGNC Gene Table -add-if-missing { - "type": "hgnc_gene_table", - "dataset_id": "hgnc-gene-table-e250.3/hgnc_complete_set" -} - -# Driver Events -add-if-missing { - "type": "mutations", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsSomaticMutations") -} - -add-if-missing { - "type": "oncokb_annotated", - "dataset_id": "oncokb-annotated-mutations-7e2e.17/oncokb_annotated" -} - -# Fusion -add-if-missing { - "type": "fusion", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsFusionFiltered") -} - -# CRISPR Confounders -add-if-missing { - "type": "achilles_screen_qc_report", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "AchillesScreenQCReport") -} - -add-if-missing { - "type": "crispr_screen_map", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRScreenMap") -} - -# Target Matrices -add-if-missing { - "type": "target_matrix", - "label": "crispr_gene_effect", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRGeneEffect") -} - -add-if-missing { - "type": "target_matrix", - "label": "rnai", - "source_dataset_id": "predictability-legacy-datasets-8c54.14/RNAiDep" -} - -add-if-missing { - "type": "target_matrix", - "label": "oncref", - "source_dataset_id": "oncref-24q4-target-b029.1/OncRef-24Q4-Target-Matrix" -} - -# Feature Matrices -add-if-missing { - "type": "feature", - "label": "lineage", - "category": "lineage", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "PredictabilityLineageTransformed") -} - -add-if-missing { - "type": "feature", - "label": "crispr_confounder", - "category": "confounder", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "PredictabilityCRISPRConfoundersTransformed") -} - -add-if-missing { - "type": "feature", - "label": "rnai_confounder", - "category": "confounder", - "source_dataset_id": "predictability-legacy-datasets-8c54.14/RNAiConfounders" -} - -add-if-missing { - "type": "feature", - "label": "oncref_confounder", - "category": "confounder", - "source_dataset_id": "prism-oncology-reference-set-24q4-c0d0.3/PRISMOncologyReferenceConfounderMatrix" -} - -add-if-missing { - "type": "feature", - "label": "driver_events", - "category": "driverevents", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "PredictabilityDriverEventsTransformed") -} - -add-if-missing { - "type": "feature", - "label": "armlevel_cna", - "category": "armlevel", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsArmLevelCNA") -} - -add-if-missing { - "type": "feature", - "label": "cytoband_cn", - "category": "cytoband", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "PredictabilityGeneticDerangementTransformed") -} - -add-if-missing { - "type": "feature", - "label": "genetic_signature", - "category": "geneticsignature", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsSignatures") -} - -add-if-missing { - "type": "feature", - "label": "mutations_hotspot", - "category": "gene", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsSomaticMutationsMatrixHotspot") -} - -add-if-missing { - "type": "feature", - "label": "mutations_damaging", - "category": "gene", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsSomaticMutationsMatrixDamaging") -} - -add-if-missing { - "type": "feature", - "label": "gene_cn", - "category": "gene", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsCNGene") -} - -add-if-missing { - "type": "feature", - "label": "loh", - "category": "gene", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsLoH") -} - -add-if-missing { - "type": "feature", - "label": "rnaseq", - "category": "gene", - "source_dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsExpressionProteinCodingGenesTPMLogp1") -} diff --git a/analysis-pipeline/predictability/preprocess_taiga_ids.py b/analysis-pipeline/predictability/preprocess_taiga_ids.py deleted file mode 100644 index 33b969cc0..000000000 --- a/analysis-pipeline/predictability/preprocess_taiga_ids.py +++ /dev/null @@ -1,77 +0,0 @@ -import sys -import re -from taigapy import create_taiga_client_v3 -import os - -# this script exists to rewrite any Taiga IDs into their canonical form. (This allows conseq to recognize when data files are the same by just comparing taiga IDs) -# -# as a secondary concern, all these taiga IDs must exist in a file that this processes, so this also handles a "TAIGA_PREPROCESSOR_INCLUDE" statement to merge multiple files -# into one while the taiga IDs are being processed - -tc = create_taiga_client_v3() - - -def _rewrite_stream(vars, in_name, in_lines, out_fd): - fd = out_fd - for line in in_lines: - m = re.match('#\\s*TAIGA_PREPROCESSOR_INCLUDE\\s+"([^"]+)"\\s*', line) - if m is not None: - filename = m.group(1) - filename = os.path.join(os.path.dirname(in_name), filename) - with open(filename, "rt") as fd_in: - included_lines = fd_in.readlines() - _rewrite_stream(vars, filename, included_lines, fd) - continue - - m = re.match('#\\s*SET_TAIGA_PREPROCESSOR\\s+(\\S+)\\s+"([^"]+)"\\s*', line) - if m is not None: - variable_name = m.group(1) - value = m.group(2) - vars[variable_name] = value - - m = re.match("(.*)PREPROCESS_TAIGA_ID\\(([^ ,]+)\\)(.*)", line, re.DOTALL) - if m is not None: - line_prefix = m.group(1) - orig_taiga_dataset_var_name = m.group(2) - line_suffix = m.group(3) - line = ( - line_prefix - + '"' - + vars[orig_taiga_dataset_var_name] - + '"' - + line_suffix - ) - - m = re.match( - '(.*)PREPROCESS_TAIGA_ID\\(([^ ,]+), "([^"]+)"\\)(.*)', line, re.DOTALL - ) - if m is not None: - orig_taiga_dataset_var_name = m.group(2) - line_prefix = m.group(1) - line_suffix = m.group(4) - - taiga_filename = m.group(3) - taiga_permaname = vars[orig_taiga_dataset_var_name] - taiga_dataset_id_with_latest_version = tc.get_latest_version_id( - taiga_permaname - ) - taiga_id = taiga_dataset_id_with_latest_version + "/" + taiga_filename - try: - tc.get_canonical_id(taiga_id) - except: - print(f"failed to get data from canonical taiga id for {taiga_id}") - line = line_prefix + '"' + tc.get_canonical_id(taiga_id) + '"' + line_suffix - fd.write(line) - - -def rewrite_file(in_name, out_name): - with open(in_name, "rt") as fd: - lines = fd.readlines() - - vars = {} - with open(out_name, "wt") as out_fd: - _rewrite_stream(vars, in_name, lines, out_fd) - - -if __name__ == "__main__": - rewrite_file(sys.argv[1], sys.argv[2]) diff --git a/analysis-pipeline/predictability/run_internal_analysis.conseq b/analysis-pipeline/predictability/run_internal_analysis.conseq deleted file mode 100644 index 2c8f0147c..000000000 --- a/analysis-pipeline/predictability/run_internal_analysis.conseq +++ /dev/null @@ -1,10 +0,0 @@ -eval """ -import subprocess -subprocess.check_call(["python", "predictability/preprocess_taiga_ids.py", - "predictability/predictability_inputs_internal.template", - "predictability/predictability_inputs_internal-DO-NOT-EDIT-ME"]) -""" - -include "predictability/predictability_inputs_internal-DO-NOT-EDIT-ME" - -include "predictability/fit.conseq" diff --git a/analysis-pipeline/predictability/scripts/generate_daintree_input_configs.py b/analysis-pipeline/predictability/scripts/generate_daintree_input_configs.py deleted file mode 100644 index 199ccaca2..000000000 --- a/analysis-pipeline/predictability/scripts/generate_daintree_input_configs.py +++ /dev/null @@ -1,127 +0,0 @@ -import argparse -import json -import yaml -import os -from typing import List, Dict, Any - -screens = ["crispr", "rnai", "oncref"] - - -def generate_daintree_configs( - model_config_path: str, input_config_path: str -) -> List[Dict[str, Any]]: - """ - Generate Daintree input configs for each model and screen - Args: - model_config_path: A yaml file that contains the configuration for the models - input_config_path: A json file that contains the configuration for the input data - Returns: - List of artifacts containing the generated config information - """ - # add a check to make sure the files exist - if not os.path.exists(model_config_path): - raise FileNotFoundError(f"Model config file not found: {model_config_path}") - if not os.path.exists(input_config_path): - raise FileNotFoundError(f"Input config file not found: {input_config_path}") - - # add a check to make sure the files are yaml and json - if not model_config_path.endswith(".yaml"): - raise ValueError(f"Model config file must be a yaml file: {model_config_path}") - if not input_config_path.endswith(".json"): - raise ValueError(f"Input config file must be a json file: {input_config_path}") - - artifacts = [] - - # Load model config - with open(model_config_path, "r") as file: - config = yaml.safe_load(file) - - assert len(config) > 0, "Model config cannot be empty" - - # Load input config - with open(input_config_path, "r") as file: - input_config = json.load(file) - - assert len(input_config) > 0, "Input config cannot be empty" - - # Process each model for both CRISPR and RNAi screens - for model_name, model_config in config.items(): - for screen in screens: - output_json = {"model_name": model_name, "screen_name": screen, "data": {}} - - # Set target based on screen type - target = screen - if screen == "crispr": - target_key = "crispr_gene_effect" - elif screen == "rnai": - target_key = "rnai" - elif screen == "oncref": - target_key = "oncref" - target_input = input_config[target_key] - - output_json["data"][target] = { - "taiga_id": target_input["source_dataset_id"], - "table_type": "target_matrix", - "relation": model_config["Relation"], - } - - # Map features to their corresponding inputs - feature_mapping = { - "lineage": "lineage", - "confounder": f"{screen}_confounder", - "driver_events": "driver_events", - "armlevel_cn": "armlevel_cna", - "cytoband_cn": "cytoband_cn", - "genetic_signature": "genetic_signature", - "mutations_hotspot": "mutations_hotspot", - "mutations_damaging": "mutations_damaging", - "gene_cn": "gene_cn", - "loh": "loh", - "rnaseq": "rnaseq", - } - - for feature in model_config["Features"]: - input_key = feature_mapping[feature] - feature_input = input_config[input_key] - - # Special handling for confounder naming in output - feature_name = feature - if feature == "confounder": - feature_name = f"{screen}_confounder" - - output_json["data"][feature_name] = { - "taiga_id": feature_input["source_dataset_id"], - "table_type": feature_input["type"], - "dim_type": feature_input["category"], - "required": feature in model_config["Required"], - "exempt": False, - } - - # Generate output filename - model_and_screen = f"{model_name}{screen}" - output_filename = f"DaintreeInputConfig{model_and_screen}.json" - - with open(output_filename, "w") as f: - json.dump(output_json, f, indent=2) - artifacts.append( - { - "type": "daintree_input_config", - "model_and_screen": model_and_screen, - "label": f"DaintreeInputConfig{model_and_screen}", - "filename": {"$filename": output_filename}, - } - ) - - # Write results - with open("results.json", "w") as f: - json.dump({"outputs": artifacts}, f, indent=2) - - return artifacts - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--model_config", type=str, required=True) - parser.add_argument("--input_config", type=str, required=True) - args = parser.parse_args() - generate_daintree_configs(args.model_config, args.input_config) diff --git a/analysis-pipeline/predictability/sparkles-config b/analysis-pipeline/predictability/sparkles-config deleted file mode 100644 index 50ad08a3e..000000000 --- a/analysis-pipeline/predictability/sparkles-config +++ /dev/null @@ -1,12 +0,0 @@ -[config] -default_url_prefix=gs://broad-achilles-kubeque/depmap-pipeline -project=broad-achilles -default_image=us.gcr.io/broad-achilles/tda-pipeline:v2 -region=us-central1 -zones=us-central1-b -machine_type=n2-highmem-2 -account=856306709302-compute@developer.gserviceaccount.com -boot_volume_in_gb=40 -mount_1_type=pd-standard -mount_1_size_in_gb=50 -#max_preemptable_attempts_scale=10 diff --git a/analysis-pipeline/pyproject.toml b/analysis-pipeline/pyproject.toml deleted file mode 100644 index 42b688001..000000000 --- a/analysis-pipeline/pyproject.toml +++ /dev/null @@ -1,27 +0,0 @@ -[tool.poetry] -name = "analysis-pipeline" -version = "0.1.1" -description = "Scripts for running the analysis pipeline" -authors = ["Nayeem Aquib "] -packages = [{include = "analysis_pipeline"}] -package-mode = false - - -[tool.poetry.dependencies] -python = "^3.9" -taigapy = {version = "3.13.0", source = "gcp-artifact-registry"} - - -[[tool.poetry.source]] -name = "gcp-artifact-registry" -url = "https://us-central1-python.pkg.dev/cds-artifacts/public-python/simple" -priority = "explicit" - -[[tool.poetry.source]] -name = "public-python" -url = "https://us-central1-python.pkg.dev/cds-artifacts/public-python/simple/" -priority = "supplemental" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/data-prep-pipeline/Dockerfile b/data-prep-pipeline/Dockerfile deleted file mode 100644 index 54ee7dbd2..000000000 --- a/data-prep-pipeline/Dockerfile +++ /dev/null @@ -1,75 +0,0 @@ -FROM ubuntu:jammy - -# Set environment variables -ENV DEBIAN_FRONTEND=noninteractive -ENV LANG=en_US.UTF-8 -ENV LANGUAGE=en_US:en -ENV LC_ALL=en_US.UTF-8 -ENV PATH="/root/.local/bin:/install/gsutil/bin:${PATH}" - -# Install system dependencies in a single layer -RUN apt-get update && apt-get upgrade -y && \ - apt-get install -y \ - python3 \ - python3-pip \ - curl \ - unzip \ - build-essential \ - nano \ - libssl-dev \ - libffi-dev \ - python3-dev \ - libcurl4-openssl-dev \ - libxml2-dev \ - virtualenv \ - wget \ - gpg \ - git \ - software-properties-common \ - locales \ - && locale-gen en_US.UTF-8 \ - && rm -rf /var/lib/apt/lists/* - -# Install Python versions -RUN add-apt-repository ppa:deadsnakes/ppa && \ - apt-get update && \ - apt-get install -y \ - python3.9 \ - python3.9-distutils \ - python3.9-dev \ - && rm -rf /var/lib/apt/lists/* - -# Create virtual environments and install tools -RUN virtualenv --python=python3.9 /install/conseq && \ - virtualenv --python=python3.10 /install/sparkles && \ - virtualenv /install/gsutil && \ - virtualenv /install/dsub - -RUN /install/dsub/bin/pip install git+https://github.com/pgm/dsub.git@patched-0.4.13 && \ - ln -s /install/dsub/bin/dsub /usr/local/bin/dsub && \ - ln -s /install/dsub/bin/dstat /usr/local/bin/dstat && \ - /install/sparkles/bin/pip install https://github.com/broadinstitute/sparklespray/releases/download/v4.0.2/sparklespray-4.0.2.tar.gz && \ - /install/gsutil/bin/pip install gsutil - -RUN pip install boto google.cloud.storage - -# Install conseq -COPY conseq-2.0.2.tar.gz /tmp/ -RUN mkdir -p /tmp/conseq-install && \ - tar -xzf /tmp/conseq-2.0.2.tar.gz -C /tmp/conseq-install && \ - cd /tmp/conseq-install/conseq-2.0.2 && \ - /install/conseq/bin/pip install . && \ - ln -s /install/conseq/bin/conseq /usr/bin/conseq && \ - cp /install/conseq/lib/python3.9/site-packages/conseq/helper.py /helper.py && \ - rm -rf /tmp/conseq* - -# Create necessary directories -RUN mkdir -p /root/.taiga /work/data-prep-pipeline - -# Set up Poetry -RUN curl -sSL https://install.python-poetry.org | python3 - && \ - ln -s /opt/poetry/bin/poetry /usr/local/bin/poetry - -WORKDIR /work/data-prep-pipeline -COPY pyproject.toml poetry.lock ./ -RUN poetry install diff --git a/data-prep-pipeline/data_prep_pipeline/preprocess_taiga_ids.py b/data-prep-pipeline/data_prep_pipeline/preprocess_taiga_ids.py deleted file mode 100644 index fd98610f3..000000000 --- a/data-prep-pipeline/data_prep_pipeline/preprocess_taiga_ids.py +++ /dev/null @@ -1,78 +0,0 @@ -import sys -import re -from taigapy import create_taiga_client_v3 -import requests -import os - -# this script exists to rewrite any Taiga IDs into their canonical form. (This allows conseq to recognize when data files are the same by just comparing taiga IDs) -# -# as a secondary concern, all these taiga IDs must exist in a file that this processes, so this also handles a "TAIGA_PREPROCESSOR_INCLUDE" statement to merge multiple files -# into one while the taiga IDs are being processed - -tc = create_taiga_client_v3() - - -def _rewrite_stream(vars, in_name, in_lines, out_fd): - fd = out_fd - for line in in_lines: - m = re.match('#\\s*TAIGA_PREPROCESSOR_INCLUDE\\s+"([^"]+)"\\s*', line) - if m is not None: - filename = m.group(1) - filename = os.path.join(os.path.dirname(in_name), filename) - with open(filename, "rt") as fd_in: - included_lines = fd_in.readlines() - _rewrite_stream(vars, filename, included_lines, fd) - continue - - m = re.match('#\\s*SET_TAIGA_PREPROCESSOR\\s+(\\S+)\\s+"([^"]+)"\\s*', line) - if m is not None: - variable_name = m.group(1) - value = m.group(2) - vars[variable_name] = value - - m = re.match("(.*)PREPROCESS_TAIGA_ID\\(([^ ,]+)\\)(.*)", line, re.DOTALL) - if m is not None: - line_prefix = m.group(1) - orig_taiga_dataset_var_name = m.group(2) - line_suffix = m.group(3) - line = ( - line_prefix - + '"' - + vars[orig_taiga_dataset_var_name] - + '"' - + line_suffix - ) - - m = re.match( - '(.*)PREPROCESS_TAIGA_ID\\(([^ ,]+), "([^"]+)"\\)(.*)', line, re.DOTALL - ) - if m is not None: - orig_taiga_dataset_var_name = m.group(2) - line_prefix = m.group(1) - line_suffix = m.group(4) - - taiga_filename = m.group(3) - taiga_permaname = vars[orig_taiga_dataset_var_name] - taiga_dataset_id_with_latest_version = tc.get_latest_version_id( - taiga_permaname - ) - taiga_id = taiga_dataset_id_with_latest_version + "/" + taiga_filename - try: - tc.get_canonical_id(taiga_id) - except: - print(f"failed to get data from canonical taiga id for {taiga_id}") - line = line_prefix + '"' + tc.get_canonical_id(taiga_id) + '"' + line_suffix - fd.write(line) - - -def rewrite_file(in_name, out_name): - with open(in_name, "rt") as fd: - lines = fd.readlines() - - vars = {} - with open(out_name, "wt") as out_fd: - _rewrite_stream(vars, in_name, lines, out_fd) - - -if __name__ == "__main__": - rewrite_file(sys.argv[1], sys.argv[2]) diff --git a/data-prep-pipeline/data_prep_pipeline/publish.conseq b/data-prep-pipeline/data_prep_pipeline/publish.conseq deleted file mode 100644 index 93b891fd5..000000000 --- a/data-prep-pipeline/data_prep_pipeline/publish.conseq +++ /dev/null @@ -1,98 +0,0 @@ -rule publish_hgnc_gene_table: - inputs: - hgnc_gene_table={"type": "gene"}, - release_taiga_id={"type": "release_taiga_id"}, - update_taiga_script=fileref('upload_to_taiga.py') - outputs:{"type": "release_hgnc_gene_table_published"} - - run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Updated HGNC gene table for release from data-prep-pipeline' 'Gene' {{inputs.hgnc_gene_table.filename}} 'csv_table'" - - -rule publish_cngene_log2: - inputs: - cngene_log2={"type": "cngene_log2"}, - release_taiga_id={"type": "release_taiga_id"}, - update_taiga_script=fileref('upload_to_taiga.py') - outputs:{"type": "release_cngene_log2_published"} - - run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Updated cngene log2 data for release from data-prep-pipeline' 'PortalOmicsCNGeneLog2' {{inputs.cngene_log2.filename}} 'csv_matrix'" - - -rule publish_filtered_portal_compounds: - inputs: - filtered_portal_compounds={"type": "filtered_portal_compounds"}, - release_taiga_id={"type": "release_taiga_id"}, - update_taiga_script=fileref('upload_to_taiga.py') - outputs:{"type": "release_filtered_portal_compounds_published"} - - run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Filtered portal compounds data for release from data-prep-pipeline' 'PortalCompounds' {{inputs.filtered_portal_compounds.filename}} 'csv_table'" - - -rule publish_subtype_tree: - inputs: - subtype_tree={"type": "subtype_tree"}, - release_taiga_id={"type": "release_taiga_id"}, - update_taiga_script=fileref('upload_to_taiga.py') - outputs:{"type": "release_subtype_tree_published"} - - run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Created SubtypeTree for release from data-prep-pipeline' 'SubtypeTree' {{inputs.subtype_tree.filename}} 'csv_table'" - - -rule publish_context_matrix: - inputs: - context_matrix={"type": "context_matrix"}, - release_taiga_id={"type": "release_taiga_id"}, - update_taiga_script=fileref('upload_to_taiga.py') - outputs:{"type": "release_context_matrix_published"} - - run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Created SubtypeMatrix for release from data-prep-pipeline' 'SubtypeMatrix' {{inputs.context_matrix.filename}} 'csv_matrix'" - - -# rule publish_driver_events: -# inputs: -# driver_events_data={"type": "predictability_driver_events"}, -# release_taiga_id={"type": "release_taiga_id"}, -# update_taiga_script=fileref('upload_to_taiga.py') -# outputs:{"type": "predictability_driver_events_published"} - -# run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Generated driver events data for predictability' 'PredictabilityDriverEventsTransformed' {{inputs.driver_events_data.filename}} 'csv_matrix'" - - -# rule publish_genetic_derangement: -# inputs: -# genetic_derangement_data={"type": "predictability_genetic_derangement"}, -# release_taiga_id={"type": "release_taiga_id"}, -# update_taiga_script=fileref('upload_to_taiga.py') -# outputs:{"type": "predictability_genetic_derangement_published"} - -# run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Generated genetic derangement data for predictability' 'PredictabilityGeneticDerangementTransformed' {{inputs.genetic_derangement_data.filename}} 'csv_matrix'" - - -# rule publish_fusion: -# inputs: -# fusion_data={"type": "predictability_fusion"}, -# release_taiga_id={"type": "release_taiga_id"}, -# update_taiga_script=fileref('upload_to_taiga.py') -# outputs:{"type": "predictability_fusion_published"} - -# run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Generated fusion data for predictability' 'PredictabilityFusionTransformed' {{inputs.fusion_data.filename}} 'csv_matrix'" - - -# rule publish_lineage: -# inputs: -# lineage_data={"type": "predictability_lineage"}, -# release_taiga_id={"type": "release_taiga_id"}, -# update_taiga_script=fileref('upload_to_taiga.py') -# outputs:{"type": "predictability_lineage_published"} - -# run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Generated lineage data for predictability' 'PredictabilityLineageTransformed' {{inputs.lineage_data.filename}} 'csv_matrix'" - - -# rule publish_crispr_confounders: -# inputs: -# crispr_confounders_data={"type": "predictability_crispr_confounders"}, -# release_taiga_id={"type": "release_taiga_id"}, -# update_taiga_script=fileref('upload_to_taiga.py') -# outputs:{"type": "predictability_crispr_confounders_published"} - -# run "python3 {{inputs.update_taiga_script.filename}} {{inputs.release_taiga_id.dataset_id}} 'Generated CRISPR confounders data for predictability' 'PredictabilityCRISPRConfoundersTransformed' {{inputs.crispr_confounders_data.filename}} 'csv_matrix'" diff --git a/data-prep-pipeline/data_prep_pipeline/release_inputs_external.template b/data-prep-pipeline/data_prep_pipeline/release_inputs_external.template deleted file mode 100644 index 1b85a13ff..000000000 --- a/data-prep-pipeline/data_prep_pipeline/release_inputs_external.template +++ /dev/null @@ -1,137 +0,0 @@ -# SET_TAIGA_PREPROCESSOR release_taiga_id "public-25q3-b56c" - -# Destination Taiga ID -add-if-missing { - "type": "release_taiga_id", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id) -} - -# CN gene expression data -add-if-missing { - "type": "cngene", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsCNGeneWGS") -} - -# Model -add-if-missing { - "type": "model", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "Model") -} - -# Portal Compounds -add-if-missing { - "type": "repsdrug_matrix", - "dataset_id": "repurposing-public-24q2-875f.4/Repurposing_Public_24Q2_Extended_Primary_Data_Matrix" -} - -add-if-missing { - "type": "repsdrug_auc_matrix", - "dataset_id": "public-non-quarterly-processed-files-8e90.64/repsdrug-auc-matrix" -} - -add-if-missing { - "type": "portal_compounds", - "dataset_id": "compound-metadata-de37.40/PortalCompounds" -} - -# HGNC Gene Table -add-if-missing { - "type": "hgnc_gene_table", - "dataset_id": "hgnc-gene-table-e250.4/hgnc_complete_set" -} - -# Driver Events -add-if-missing { - "type": "mutations", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsSomaticMutations") -} - -add-if-missing { - "type": "oncokb_annotated", - "dataset_id": "oncokb-annotated-mutations-7e2e.17/oncokb_annotated" -} - -# Fusion -add-if-missing { - "type": "fusion", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsFusionFiltered") -} - -# CRISPR Confounders -add-if-missing { - "type": "achilles_screen_qc_report", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "AchillesScreenQCReport") -} - -add-if-missing { - "type": "crispr_screen_map", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRScreenMap") -} - -# Legacy Data - -add-if-missing { - "type": "rnai_data", - "dataset_id": "demeter2-combined-dc9c.19/gene_means_proc" -} - -add-if-missing { - "type": "rnai_confounders", - "dataset_id": "confounders-f38f.2/demeter2-combined-v12-confounders" -} - -add-if-missing { - "type": "oncref_confounders", - "dataset_id": "prism-oncology-reference-set-23q4-1a7c.11/PRISM_Oncology_Reference_23Q4_Confounders" -} - -add-if-missing { - "type": "rep_single_pt_confounders", - "dataset_id": "repurposing-public-23q2-341f.10/Repurposing_Public_23Q2_Extended_Matrix_Confounders" -} - -add-if-missing { - "type": "metabolomics", - "dataset_id": "metabolomics-cd0c.4/CCLE_metabolomics_20190502" -} - -add-if-missing { - "type": "oncotree", - "dataset_id": "subtypetree-919e.7/oncotree" -} - -add-if-missing { - "type": "lineage_tree_genetic_subtype_whitelist", - "dataset_id": "subtypetree-919e.9/lineage_tree_genetic_subtype_whitelist", -} - -add-if-missing { - "type": "omics_inferred_molecular_subtype", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsInferredMolecularSubtypes") -} - -# Dummy artifact for filter portal compounds -add-if-missing { - "type": "stub-artifact", -} - -add-if-missing { - "type": "raw-dep-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRGeneEffect"), - "label": "Chronos_Combined", - "rows": "cell-lines", - "confounders_label": "crispr-confounders", -} - -add-if-missing { - "type": "raw-dep-prob-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRGeneDependency"), - "label": "Chronos_Combined", - "rows": "cell-lines" -} - -add-if-missing { - "type": "crispr-inferred-common-essentials", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRInferredCommonEssentials"), - "label": "Chronos_Combined" -} diff --git a/data-prep-pipeline/data_prep_pipeline/release_inputs_internal.template b/data-prep-pipeline/data_prep_pipeline/release_inputs_internal.template deleted file mode 100644 index 16e162ade..000000000 --- a/data-prep-pipeline/data_prep_pipeline/release_inputs_internal.template +++ /dev/null @@ -1,145 +0,0 @@ -# SET_TAIGA_PREPROCESSOR release_taiga_id "internal-25q3-b129" - -# Destination Taiga ID -add-if-missing { - "type": "release_taiga_id", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id) -} - -# CN gene expression data -add-if-missing { - "type": "cngene", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsCNGeneWGS") -} - -# Model -add-if-missing { - "type": "model", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "Model") -} - -# Portal Compounds -add-if-missing { - "type": "repsdrug_matrix", - "dataset_id": "repurposing-public-24q2-875f.4/Repurposing_Public_24Q2_Extended_Primary_Data_Matrix" -} - -add-if-missing { - "type": "repsdrug_auc_matrix", - "dataset_id": "public-non-quarterly-processed-files-8e90.64/repsdrug-auc-matrix" -} - -add-if-missing { - "type": "portal_compounds", - "dataset_id": "compound-metadata-de37.42/PortalCompounds" -} - -# HGNC Gene Table -add-if-missing { - "type": "hgnc_gene_table", - "dataset_id": "hgnc-gene-table-e250.4/hgnc_complete_set" -} - -# Driver Events -add-if-missing { - "type": "mutations", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsSomaticMutations") -} - -add-if-missing { - "type": "oncokb_annotated", - "dataset_id": "oncokb-annotated-mutations-7e2e.17/oncokb_annotated" -} - -# Fusion -add-if-missing { - "type": "fusion", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsFusionFiltered") -} - -# CRISPR Confounders -add-if-missing { - "type": "achilles_screen_qc_report", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "AchillesScreenQCReport") -} - -add-if-missing { - "type": "crispr_screen_map", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRScreenMap") -} - -# Legacy Data - -add-if-missing { - "type": "rnai_data", - "dataset_id": "demeter2-combined-dc9c.19/gene_means_proc" -} - -add-if-missing { - "type": "rnai_confounders", - "dataset_id": "confounders-f38f.2/demeter2-combined-v12-confounders" -} - -add-if-missing { - "type": "oncref_confounders", - "dataset_id": "prism-oncology-reference-set-23q4-1a7c.11/PRISM_Oncology_Reference_23Q4_Confounders" -} - -add-if-missing { - "type": "rep_single_pt_confounders", - "dataset_id": "repurposing-public-23q2-341f.10/Repurposing_Public_23Q2_Extended_Matrix_Confounders" -} - -add-if-missing { - "type": "metabolomics", - "dataset_id": "metabolomics-cd0c.4/CCLE_metabolomics_20190502" -} - -add-if-missing { - "type": "oncotree", - "dataset_id": "subtypetree-919e.7/oncotree" -} - -add-if-missing { - "type": "lineage_tree_genetic_subtype_whitelist", - "dataset_id": "subtypetree-919e.9/lineage_tree_genetic_subtype_whitelist", -} - -add-if-missing { - "type": "omics_inferred_molecular_subtype", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "OmicsInferredMolecularSubtypes") -} - -# Dummy artifact for filter portal compounds. Read the details in filter_portal_compounds.conseq. -add-if-missing { - "type": "stub-artifact" -} - -add-if-missing { - "type": "prism_oncology_reference_auc_matrix", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "PRISMOncologyReferenceLog2AUCMatrix") -} - -# Dummy artifact for filter portal compounds -add-if-missing { - "type": "stub-artifact" -} - -add-if-missing { - "type": "raw-dep-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRGeneEffect"), - "label": "Chronos_Combined", - "confounders_label": "crispr-confounders", -} - -add-if-missing { - "type": "raw-dep-prob-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRGeneDependency"), - "label": "Chronos_Combined", -} - -add-if-missing { - "type": "crispr-inferred-common-essentials", - "dataset_id": PREPROCESS_TAIGA_ID(release_taiga_id, "CRISPRInferredCommonEssentials"), - "label": "Chronos_Combined" -} diff --git a/data-prep-pipeline/data_prep_pipeline/run_external.conseq b/data-prep-pipeline/data_prep_pipeline/run_external.conseq deleted file mode 100644 index c12aef376..000000000 --- a/data-prep-pipeline/data_prep_pipeline/run_external.conseq +++ /dev/null @@ -1,19 +0,0 @@ -eval """ -import subprocess -subprocess.check_call(["python", "data_prep_pipeline/preprocess_taiga_ids.py", - "data_prep_pipeline/release_inputs_external.template", - "data_prep_pipeline/release_inputs_external-DO-NOT-EDIT-ME"]) -""" - -include "data_prep_pipeline/release_inputs_external-DO-NOT-EDIT-ME" - -include "data_prep_pipeline/update_hgnc_gene_table.conseq" -include "data_prep_pipeline/cngene_log_2_transformation.conseq" -include "data_prep_pipeline/filter_portal_compounds.conseq" -# include "data_prep_pipeline/predictability.conseq" -include "data_prep_pipeline/subtype_tree.conseq" - - -if "config.get('is_dev', 'True') != 'True'": - include "data_prep_pipeline/publish.conseq" -endif diff --git a/data-prep-pipeline/data_prep_pipeline/run_internal.conseq b/data-prep-pipeline/data_prep_pipeline/run_internal.conseq deleted file mode 100644 index 5250a5aa5..000000000 --- a/data-prep-pipeline/data_prep_pipeline/run_internal.conseq +++ /dev/null @@ -1,18 +0,0 @@ -eval """ -import subprocess -subprocess.check_call(["python", "data_prep_pipeline/preprocess_taiga_ids.py", - "data_prep_pipeline/release_inputs_internal.template", - "data_prep_pipeline/release_inputs_internal-DO-NOT-EDIT-ME"]) -""" - -include "data_prep_pipeline/release_inputs_internal-DO-NOT-EDIT-ME" - -include "data_prep_pipeline/update_hgnc_gene_table.conseq" -include "data_prep_pipeline/cngene_log_2_transformation.conseq" -include "data_prep_pipeline/filter_portal_compounds.conseq" -# include "data_prep_pipeline/predictability.conseq" -include "data_prep_pipeline/subtype_tree.conseq" - -if "config.get('is_dev', 'True') != 'True'": - include "data_prep_pipeline/publish.conseq" -endif diff --git a/data-prep-pipeline/data_prep_pipeline/upload_to_taiga.py b/data-prep-pipeline/data_prep_pipeline/upload_to_taiga.py deleted file mode 100644 index ca9d43116..000000000 --- a/data-prep-pipeline/data_prep_pipeline/upload_to_taiga.py +++ /dev/null @@ -1,119 +0,0 @@ -import argparse -import hashlib -from pathlib import Path - -from taigapy.client_v3 import UploadedFile, LocalFormat -from taigapy import create_taiga_client_v3 - - -def get_sha256(file_path: Path) -> str: - """Calculate the SHA256 hash of a file.""" - try: - chunk_size = 1 * 1024 * 1024 # 1 MB - sha256_hash = hashlib.sha256() - with open(file_path, "rb") as f: - while chunk := f.read(chunk_size): - sha256_hash.update(chunk) - return sha256_hash.hexdigest() - - except FileNotFoundError as e: - print(f"File not found: {e}") - raise - - -def update_taiga( - dataset_id: str, - description_of_changes: str, - matrix_name_in_taiga: str, - file_local_path: Path, - file_format: str, -) -> None: - """Update a dataset in Taiga with transformed data.""" - assert dataset_id, "Dataset ID cannot be empty" - assert description_of_changes, "Description of changes cannot be empty" - assert matrix_name_in_taiga, "Matrix name in Taiga cannot be empty" - assert file_local_path, "File path cannot be empty" - assert file_format, "File format cannot be empty" - - if file_format == "csv_table": - file_format = LocalFormat.CSV_TABLE - elif file_format == "csv_matrix": - file_format = LocalFormat.CSV_MATRIX - try: - tc = create_taiga_client_v3() - # Update the dataset with the transformed data - version = tc.update_dataset( - dataset_id, - description_of_changes, - additions=[ - UploadedFile( - matrix_name_in_taiga, - local_path=file_local_path, - format=file_format, - ) - ], - ) - print( - f"Updated dataset: {version.permaname} to version number: {version.version_number}" - ) - except Exception as e: - print(f"Error updating Taiga: {e}") - raise - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Update Taiga dataset with transformed data." - ) - parser.add_argument("dataset_id", help="Taiga ID of the dataset to update") - parser.add_argument("description_of_changes", help="Description of the changes") - parser.add_argument("matrix_name_in_taiga", help="Name of the matrix in Taiga") - parser.add_argument( - "file_local_path", help="Path to the file that will be uploaded" - ) - parser.add_argument("file_format", help="Format of the file to upload") - args = parser.parse_args() - - tc = create_taiga_client_v3() - - dataset_id_with_latest_version = tc.get_latest_version_id(args.dataset_id) - existing_file_taiga_id = ( - f"{dataset_id_with_latest_version}/{args.matrix_name_in_taiga}" - ) - print(f"Taiga ID of the existing file: {existing_file_taiga_id}") - - # Check if the file with the same name already exists in the dataset - if tc.get_datafile_metadata(existing_file_taiga_id) is None: - print( - f"File with Taiga ID {existing_file_taiga_id} does not exist. Uploading a new file." - ) - update_taiga( - args.dataset_id, - args.description_of_changes, - args.matrix_name_in_taiga, - args.file_local_path, - args.file_format, - ) - else: - # Check if the file to upload is the same as the existing file in Taiga - file_to_upload_sha256 = get_sha256(args.file_local_path) - print(f"SHA256 hash of the file to upload: {file_to_upload_sha256}") - - existing_file_sha256 = tc.get_datafile_metadata( - existing_file_taiga_id - ).original_file_sha256 - print(f"SHA256 hash of the existing file in Taiga: {existing_file_sha256}") - - if file_to_upload_sha256 == existing_file_sha256: - print( - "The file to upload is the same as the existing file in Taiga. Skipping the update." - ) - else: - # If the file to upload is different from the existing file in Taiga, update the dataset - update_taiga( - args.dataset_id, - args.description_of_changes, - args.matrix_name_in_taiga, - args.file_local_path, - args.file_format, - ) diff --git a/data-prep-pipeline/image-name b/data-prep-pipeline/image-name deleted file mode 100644 index 131557179..000000000 --- a/data-prep-pipeline/image-name +++ /dev/null @@ -1 +0,0 @@ -DOCKER_IMAGE=us.gcr.io/broad-achilles/data-prep-pipeline-run:v1 diff --git a/data-prep-pipeline/jenkins-run-pipeline-external.sh b/data-prep-pipeline/jenkins-run-pipeline-external.sh deleted file mode 100755 index ae51efc3a..000000000 --- a/data-prep-pipeline/jenkins-run-pipeline-external.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/bin/bash - -if [ "$1" == "" ]; then -# required: env name - echo "needs name of environment" - exit 1 -fi - -ENV_NAME="$1" -CONSEQ_FILE="data_prep_pipeline/run_external.conseq" -# CONSEQ_FILE="run_$ENV_NAME.conseq" - -if [ "$2" == "" ]; then -# required: job name - echo "needs name to use for job" - exit 1 -fi - -JOB_NAME="$2" - -# if [ "$3" != "" ]; then -# # required: s3 path override -# PUBLISH_DEST="$3" -# echo "let publish_dest = \"$PUBLISH_DEST\"" > "pipeline/overriden-$CONSEQ_FILE" -# # append the result of the conseq file, except for the previous assignment of publish_dest -# grep -v 'let publish_dest' "pipeline/$CONSEQ_FILE" >> "pipeline/overriden-$CONSEQ_FILE" -# CONSEQ_FILE="overriden-$CONSEQ_FILE" -# else -# echo "No s3 path override specified" -# fi - -# set DOCKER_IMAGE from pipeline-run-docker/image-name -SCRIPT_PATH=`dirname $0` -source "$SCRIPT_PATH/image-name" - -COMMIT_SHA=`git rev-parse HEAD` -if [ "${COMMIT_SHA}" == "" ]; then - COMMIT_SHA="unknown" -fi - -set -ex -GOOGLE_APPLICATION_CREDENTIALS=/etc/google/auth/application_default_credentials.json docker pull ${DOCKER_IMAGE} - -# Copy all logs. I'm copying this to a new directory because each time we run we gc the state directory and that -# causes old logs to be deleted which makes it harder to investigate what happened. -function backup_conseq_logs { - file_list=`mktemp` - if [ -e data-prep-pipeline/state ] ; then - ( cd data-prep-pipeline/state && \ - find . -name "std*.txt" > ${file_list} && \ - find . -name "*.sh" >> ${file_list} && \ - find . -name "*.log" >> ${file_list} ) - rsync -a data-prep-pipeline/state preprocess-logs --files-from=${file_list} - rm ${file_list} - fi -} - -if [ "$TAIGA_DIR" == "" ] ; then - TAIGA_DIR="/data2/depmap-pipeline-taiga" -fi - -if [ "$PIPELINE_RUNNER_CREDS_DIR" == "" ] ; then - PIPELINE_RUNNER_CREDS_DIR='/etc/depmap-pipeline-runner-creds' -fi - -if [ ! "${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas" -o ! "${PIPELINE_RUNNER_CREDS_DIR}/sparkles" -o ! "${PIPELINE_RUNNER_CREDS_DIR}/depmap-pipeline-runner.json" ] ; then - echo "Could not find required file" - exit 1 -fi - -function run_via_container { - COMMAND="$1" - docker run \ - --rm \ - -v "$PWD":/work \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas:/aws-keys/broad-paquitas" \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/sparkles:/root/.sparkles-cache" \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/depmap-pipeline-runner.json":/etc/google_default_creds.json \ - -v "${TAIGA_DIR}:/root/.taiga" \ - -e GOOGLE_APPLICATION_CREDENTIALS=/etc/google_default_creds.json \ - -w /work/data-prep-pipeline \ - --name "$JOB_NAME" \ - ${DOCKER_IMAGE} \ - bash -c "source /aws-keys/broad-paquitas && poetry run $COMMAND" -} - -# use /data2/depmap-pipeline-taiga as the taiga dir because -# different versions of taigapy seem to conflict in pickle format - - -# backup logs before running GC -backup_conseq_logs - -if [ "$START_WITH" != "" ]; then - # clean out old invocation - sudo chown -R ubuntu data-prep-pipeline - rm -rf data-prep-pipeline/state - bash -c "source ${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas && gsutil cp $START_WITH data-prep-pipeline/data_prep_pipeline/downloaded-export.conseq" - run_via_container "conseq run downloaded-export.conseq" - # forget all the executions of "publish" rules because the publish location has changed - run_via_container "conseq forget --regex publish.*" -fi - -if [ "$MANUALLY_RUN_CONSEQ" = "true" ]; then - echo "executing: conseq $CONSEQ_ARGS" - run_via_container "conseq -D is_dev=False $CONSEQ_ARGS" -else - # Clean up unused directories from past runs - run_via_container "conseq gc" - - # Kick off new run - set +e - run_via_container "conseq run --addlabel commitsha=${COMMIT_SHA} --no-reattach --maxfail 20 --remove-unknown-artifacts -D sparkles_path=/install/sparkles/bin/sparkles -D is_dev=False $CONSEQ_FILE $CONSEQ_ARGS" - RUN_EXIT_STATUS=$? - set -e - - # Generate export - # run_via_container "conseq export $CONSEQ_FILE $EXPORT_PATH" - - # Generate report - # run_via_container "conseq report html" - - # copy the latest logs - backup_conseq_logs -fi - -echo "Pipeline run complete" - -# docker container is writing files as root. Fix up permissions after job completes -sudo chown -R ubuntu . - -exit $RUN_EXIT_STATUS diff --git a/data-prep-pipeline/jenkins-run-pipeline.sh b/data-prep-pipeline/jenkins-run-pipeline.sh deleted file mode 100755 index a026214d6..000000000 --- a/data-prep-pipeline/jenkins-run-pipeline.sh +++ /dev/null @@ -1,132 +0,0 @@ -#!/bin/bash - -if [ "$1" == "" ]; then -# required: env name - echo "needs name of environment" - exit 1 -fi - -ENV_NAME="$1" -CONSEQ_FILE="data_prep_pipeline/run_internal.conseq" -# CONSEQ_FILE="run_$ENV_NAME.conseq" - -if [ "$2" == "" ]; then -# required: job name - echo "needs name to use for job" - exit 1 -fi - -JOB_NAME="$2" - -# if [ "$3" != "" ]; then -# # required: s3 path override -# PUBLISH_DEST="$3" -# echo "let publish_dest = \"$PUBLISH_DEST\"" > "pipeline/overriden-$CONSEQ_FILE" -# # append the result of the conseq file, except for the previous assignment of publish_dest -# grep -v 'let publish_dest' "pipeline/$CONSEQ_FILE" >> "pipeline/overriden-$CONSEQ_FILE" -# CONSEQ_FILE="overriden-$CONSEQ_FILE" -# else -# echo "No s3 path override specified" -# fi - -# set DOCKER_IMAGE from pipeline-run-docker/image-name -SCRIPT_PATH=`dirname $0` -source "$SCRIPT_PATH/image-name" - -COMMIT_SHA=`git rev-parse HEAD` -if [ "${COMMIT_SHA}" == "" ]; then - COMMIT_SHA="unknown" -fi - -set -ex -GOOGLE_APPLICATION_CREDENTIALS=/etc/google/auth/application_default_credentials.json docker pull ${DOCKER_IMAGE} - -# Copy all logs. I'm copying this to a new directory because each time we run we gc the state directory and that -# causes old logs to be deleted which makes it harder to investigate what happened. -function backup_conseq_logs { - file_list=`mktemp` - if [ -e data-prep-pipeline/state ] ; then - ( cd data-prep-pipeline/state && \ - find . -name "std*.txt" > ${file_list} && \ - find . -name "*.sh" >> ${file_list} && \ - find . -name "*.log" >> ${file_list} ) - rsync -a data-prep-pipeline/state preprocess-logs --files-from=${file_list} - rm ${file_list} - fi -} - -if [ "$TAIGA_DIR" == "" ] ; then - TAIGA_DIR="/data2/depmap-pipeline-taiga" -fi - -if [ "$PIPELINE_RUNNER_CREDS_DIR" == "" ] ; then - PIPELINE_RUNNER_CREDS_DIR='/etc/depmap-pipeline-runner-creds' -fi - -if [ ! "${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas" -o ! "${PIPELINE_RUNNER_CREDS_DIR}/sparkles" -o ! "${PIPELINE_RUNNER_CREDS_DIR}/depmap-pipeline-runner.json" ] ; then - echo "Could not find required file" - exit 1 -fi - -function run_via_container { - COMMAND="$1" - docker run \ - --rm \ - -v "$PWD":/work \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas:/aws-keys/broad-paquitas" \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/sparkles:/root/.sparkles-cache" \ - -v "${PIPELINE_RUNNER_CREDS_DIR}/depmap-pipeline-runner.json":/etc/google_default_creds.json \ - -v "${TAIGA_DIR}:/root/.taiga" \ - -e GOOGLE_APPLICATION_CREDENTIALS=/etc/google_default_creds.json \ - -w /work/data-prep-pipeline \ - --name "$JOB_NAME" \ - ${DOCKER_IMAGE} \ - bash -c "source /aws-keys/broad-paquitas && poetry run $COMMAND" -} - -# use /data2/depmap-pipeline-taiga as the taiga dir because -# different versions of taigapy seem to conflict in pickle format - - -# backup logs before running GC -backup_conseq_logs - -if [ "$START_WITH" != "" ]; then - # clean out old invocation - sudo chown -R ubuntu data-prep-pipeline - rm -rf data-prep-pipeline/state - bash -c "source ${PIPELINE_RUNNER_CREDS_DIR}/broad-paquitas && gsutil cp $START_WITH data-prep-pipeline/data_prep_pipeline/downloaded-export.conseq" - run_via_container "conseq run downloaded-export.conseq" - # forget all the executions of "publish" rules because the publish location has changed - run_via_container "conseq forget --regex publish.*" -fi - -if [ "$MANUALLY_RUN_CONSEQ" = "true" ]; then - echo "executing: conseq $CONSEQ_ARGS" - run_via_container "conseq -D is_dev=False $CONSEQ_ARGS" -else - # Clean up unused directories from past runs - run_via_container "conseq gc" - - # Kick off new run - set +e - run_via_container "conseq run --addlabel commitsha=${COMMIT_SHA} --no-reattach --maxfail 20 --remove-unknown-artifacts -D sparkles_path=/install/sparkles/bin/sparkles -D is_dev=False $CONSEQ_FILE $CONSEQ_ARGS" - RUN_EXIT_STATUS=$? - set -e - - # Generate export - # run_via_container "conseq export $CONSEQ_FILE $EXPORT_PATH" - - # Generate report - # run_via_container "conseq report html" - - # copy the latest logs - backup_conseq_logs -fi - -echo "Pipeline run complete" - -# docker container is writing files as root. Fix up permissions after job completes -sudo chown -R ubuntu . - -exit $RUN_EXIT_STATUS diff --git a/data-prep-pipeline/scripts/hgnc_gene_table/add_ce_and_selectivity.py b/data-prep-pipeline/scripts/hgnc_gene_table/add_ce_and_selectivity.py deleted file mode 100644 index 0c5e4510c..000000000 --- a/data-prep-pipeline/scripts/hgnc_gene_table/add_ce_and_selectivity.py +++ /dev/null @@ -1,162 +0,0 @@ -import argparse -import pandas as pd -from typing import Tuple - -from taigapy import create_taiga_client_v3 - -# Constants for calculating selectivity -DEPENDENCY_THRESHOLD = 0.5 -SELECTIVITY_THRESHOLD = -0.86 - - -def parse_gene_series(gene_series): - """ - Parse pandas Series with 'GENE_SYMBOL (entrez_id)' format. - """ - pattern = r'^(?P[^\(]+?)\s*\(\s*(?P\d+)\s*\)$' - return gene_series.str.extract(pattern) - -def parse_and_validate_genes(gene_series, name): - """ - Parse and validate gene series, converting entrez_id to numeric. - """ - parsed = parse_gene_series(gene_series) - parsed['entrez_id'] = pd.to_numeric(parsed['entrez_id'], errors='coerce') - - if not parsed.notnull().all().all(): - raise ValueError(f"Failed to parse {name}") - - return parsed - - -def calculate_gene_selectivity(crispr_gene_dependency: pd.DataFrame, crispr_gene_effect: pd.DataFrame, - dependency_threshold: float = DEPENDENCY_THRESHOLD, - selectivity_threshold: float = SELECTIVITY_THRESHOLD) -> pd.Series: - """ - Calculate gene selectivity based on gene dependency and effect data. - """ - - # Count dependent lines for each gene - dep_lines = (crispr_gene_dependency > dependency_threshold).sum(axis=0) - - # Calculate statistical moments - skewness = crispr_gene_effect.skew(axis=0) - kurtosis = crispr_gene_effect.kurtosis(axis=0) - - # Calculate is_strongly_selective - is_strongly_selective = (skewness * kurtosis < selectivity_threshold) & (dep_lines > 0) - - return is_strongly_selective - - -def load_taiga_data(tc, hgnc_id: str, ce_id: str, effect_id: str, dependency_id: str) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: - """ - Load all required datasets from Taiga. - """ - hgnc_gene_table = tc.get(hgnc_id) - crispr_inferred_common_essentials = tc.get(ce_id) - crispr_gene_effect = tc.get(effect_id) - crispr_gene_dependency = tc.get(dependency_id) - - assert 'entrez_id' in hgnc_gene_table.columns, "HGNC table must have 'entrez_id' column" - assert 'Essentials' in crispr_inferred_common_essentials.columns, "Common essentials must have 'Essentials' column" - - return hgnc_gene_table, crispr_inferred_common_essentials, crispr_gene_effect, crispr_gene_dependency - - -def add_essentiality_to_hgnc(hgnc_gene_table: pd.DataFrame, - common_essentials: pd.DataFrame, - gene_effect: pd.DataFrame) -> pd.DataFrame: - """Add essentiality information to HGNC gene table.""" - print("Number of rows in crispr_inferred_common_essentials: ", len(common_essentials)) - - parsed_ce = parse_and_validate_genes(common_essentials['Essentials'], "common essential genes") - parsed_gene_effect = parse_and_validate_genes(pd.Series(gene_effect.columns), "gene effect column names") - - # Check that all common essentials are in the gene effect matrix - missing_essentials = ~parsed_ce['entrez_id'].isin(parsed_gene_effect['entrez_id']) - if missing_essentials.any(): - raise ValueError(f"Found {parsed_ce[missing_essentials]['entrez_id'].values} common essential genes that are not in the gene effect matrix") - - # Add a common essential column so that we now have a full gene list with common essentials (the crispr_inferred_common_essentials matrix only contains True values) - full_gene_list_with_common_essentials = parsed_gene_effect.copy() - full_gene_list_with_common_essentials['is_common_essential'] = ( - full_gene_list_with_common_essentials['entrez_id'] - .isin(parsed_ce['entrez_id']) - ) - - print(f"Total genes in gene_effect: {len(parsed_gene_effect)}") - print(f"Common essentials found in gene_effect: {full_gene_list_with_common_essentials['is_common_essential'].sum()}") - - # Add essentiality column to HGNC gene table - hgnc_gene_table = hgnc_gene_table.copy() - hgnc_gene_table['entrez_id'] = pd.to_numeric(hgnc_gene_table['entrez_id'], errors='coerce') - essentiality_mapping = full_gene_list_with_common_essentials.set_index('entrez_id')['is_common_essential'].to_dict() - - hgnc_gene_table['essentiality'] = ( - hgnc_gene_table['entrez_id'] - .map(essentiality_mapping) - .replace({True: "common essential", False: "not common essential"}) - ) - - print(f"Essentiality column value counts: {hgnc_gene_table['essentiality'].value_counts(dropna=False)}") - - return hgnc_gene_table - - -def add_selectivity_to_hgnc(hgnc_gene_table: pd.DataFrame, - gene_dependency: pd.DataFrame, - gene_effect: pd.DataFrame) -> pd.DataFrame: - """Add selectivity information to HGNC gene table.""" - # Validate that gene dependency and gene effect matrices have the same number of genes - if len(gene_dependency.columns) != len(gene_effect.columns): - raise ValueError( - f"Gene dependency matrix has {len(gene_dependency.columns)} genes " - f"but gene effect matrix has {len(gene_effect.columns)} genes" - ) - - is_strongly_selective = calculate_gene_selectivity(gene_dependency, gene_effect) - - parsed_strongly_selective = parse_and_validate_genes(pd.Series(is_strongly_selective.index), "selectivity gene names") - parsed_strongly_selective['selectivity'] = is_strongly_selective.values - - selectivity_mapping = parsed_strongly_selective.set_index('entrez_id')['selectivity'].to_dict() - - hgnc_gene_table = hgnc_gene_table.copy() - hgnc_gene_table['selectivity'] = ( - hgnc_gene_table['entrez_id'] - .map(selectivity_mapping) - .replace({True: "strongly selective", False: "not strongly selective"}) - ) - - print(f"Selectivity column value counts: {hgnc_gene_table['selectivity'].value_counts(dropna=False)}") - - return hgnc_gene_table - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Add common essential and selectivity to HGNC gene table." - ) - parser.add_argument("hgnc_gene_table_taiga_id", help="Taiga ID of HGNC gene table") - parser.add_argument("crispr_inferred_common_essentials_taiga_id", help="Taiga ID of CRISPR inferred common essentials") - parser.add_argument("crispr_gene_effect_taiga_id", help="Taiga ID of CRISPR gene effect") - parser.add_argument("crispr_gene_dependency_taiga_id", help="Taiga ID of CRISPR gene dependency") - parser.add_argument("output", help="Path to write the output") - args = parser.parse_args() - - tc = create_taiga_client_v3() - - # Load all data - hgnc_gene_table, crispr_inferred_common_essentials, crispr_gene_effect, crispr_gene_dependency = load_taiga_data( - tc, args.hgnc_gene_table_taiga_id, args.crispr_inferred_common_essentials_taiga_id, - args.crispr_gene_effect_taiga_id, args.crispr_gene_dependency_taiga_id - ) - - # Add essentiality - hgnc_gene_table = add_essentiality_to_hgnc(hgnc_gene_table, crispr_inferred_common_essentials, crispr_gene_effect) - - # Add selectivity - hgnc_gene_table = add_selectivity_to_hgnc(hgnc_gene_table, crispr_gene_dependency, crispr_gene_effect) - - hgnc_gene_table.to_csv(args.output, index=False) diff --git a/data-prep-pipeline/scripts/predictability/transform_fusion.py b/data-prep-pipeline/scripts/predictability/transform_fusion.py deleted file mode 100644 index 85c799817..000000000 --- a/data-prep-pipeline/scripts/predictability/transform_fusion.py +++ /dev/null @@ -1,101 +0,0 @@ -import argparse -import re -import pandas as pd -from typing import Dict -from taigapy import create_taiga_client_v3 -from pathlib import Path - - -def extract_id(x: str) -> str: - """Extract the Ensembl gene ID from a given string.""" - - m = re.match(r"\S+ \(([^.]+)\.\d+\)", x) - if m is None: - print("Warning: Could not find ensemble ID in:", x) - return None - return m.group(1) - - -def make_fusion_name(left: str, right: str, symbol_by_ensembl) -> str: - """Create a fusion name from two gene names by combining their symbols.""" - - left_ensembl = extract_id(left) - right_ensembl = extract_id(right) - left_symbol = symbol_by_ensembl.get(left_ensembl) - right_symbol = symbol_by_ensembl.get(right_ensembl) - if ( - left_ensembl is None - or right_ensembl is None - or left_symbol is None - or right_symbol is None - ): - # fall back to the symbols in the orignal left and right gene names if we can't figure it out - left_symbol = left.split(" ")[0] - right_symbol = right.split(" ")[0] - return f"{left_symbol}_{right_symbol}" - - -def generate_fusion_matrix( - df: pd.DataFrame, symbol_by_ensembl: Dict[str, str] -) -> pd.DataFrame: - """Transform a DataFrame containing gene fusion information into a one-hot encoded DataFrame.""" - - df["fusion_name"] = [ - make_fusion_name(rec["Gene1"], rec["Gene2"], symbol_by_ensembl) - for rec in df.to_records() - ] - df["one"] = 1 - one_hot = pd.pivot_table( - df, - values="one", - columns="ModelID", - index="fusion_name", - aggfunc=lambda x: 1, - fill_value=0, - ) - - # This is done twice to get rid of the index column names, ModelID and fusion_name - one_hot.reset_index(inplace=True) - one_hot.columns.name = None - one_hot.set_index("fusion_name", inplace=True) - one_hot = one_hot.transpose() - one_hot.columns.name = None - - return one_hot - - -def process_and_transform_fusion( - fusion_taiga_id: str, hgnc_gene_table_csv: Path -) -> pd.DataFrame: - - """Transform fusion data for predictability and upload it to Taiga.""" - - tc = create_taiga_client_v3() - - print("Getting fusion data...") - fusion_filtered_data = tc.get(fusion_taiga_id) - sym_map_df = pd.read_csv(hgnc_gene_table_csv) - symbol_by_ensembl = sym_map_df.set_index("ensembl_gene_id")["symbol"].to_dict() - - print("Transforming fusion data...") - fusion_matrix = generate_fusion_matrix(fusion_filtered_data, symbol_by_ensembl) - print("Transformed fusion data") - - return fusion_matrix - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Generate fusion matrix for predictability" - ) - parser.add_argument("fusion_taiga_id", help="Taiga ID of fusion data") - parser.add_argument("hgnc_gene_table_csv", help="Path to HGNC gene table") - parser.add_argument("output", help="Path to write the output") - args = parser.parse_args() - - fusion_matrix = process_and_transform_fusion( - args.fusion_taiga_id, args.hgnc_gene_table_csv - ) - - if fusion_matrix is not None: - fusion_matrix.to_csv(args.output) diff --git a/pipeline/analysis-pipeline/analysis_pipeline_local_run.sh b/pipeline/analysis-pipeline/analysis_pipeline_local_run.sh new file mode 100755 index 000000000..4e4318b23 --- /dev/null +++ b/pipeline/analysis-pipeline/analysis_pipeline_local_run.sh @@ -0,0 +1,13 @@ +#!/usr/bin/bash + +set -ex + +# all files that the pipeline uses must be accessible under the current working directory +# in order for the docker container to access it. So, copy things that are from outside of this tree +# before starting. Maybe this should be moved into run_pipeline.py ? +#mkdir -p extern +#cp ../pipeline/preprocess_taiga_ids.py extern + +GOOGLE_APPLICATION_CREDENTIALS=$HOME/.secrets/depmap-pipeline-runner.json exec ./run_analysis_pipeline.py \ + --publish-dest gs://preprocessing-pipeline-outputs/depmap-pipeline/test-pred/metadata --env internal "$@" + diff --git a/data-prep-pipeline/data_prep_pipeline/__init__.py b/pipeline/analysis-pipeline/analysis_pipeline_runner.py similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/__init__.py rename to pipeline/analysis-pipeline/analysis_pipeline_runner.py diff --git a/pipeline/analysis-pipeline/predictability/fit.conseq b/pipeline/analysis-pipeline/predictability/fit.conseq new file mode 100644 index 000000000..0a0d6305f --- /dev/null +++ b/pipeline/analysis-pipeline/predictability/fit.conseq @@ -0,0 +1,4 @@ +# Three Steps: +# 1. Generate a daintree input config file for each model and screen +# 2. Run the model fitting +# 3. Combine the output config files diff --git a/analysis-pipeline/predictability/model-config.yaml b/pipeline/analysis-pipeline/predictability/model-config.yaml similarity index 90% rename from analysis-pipeline/predictability/model-config.yaml rename to pipeline/analysis-pipeline/predictability/model-config.yaml index 3ec16ab00..d43ee8b4e 100644 --- a/analysis-pipeline/predictability/model-config.yaml +++ b/pipeline/analysis-pipeline/predictability/model-config.yaml @@ -6,6 +6,7 @@ CellContext: - lineage - confounder Relation: All + EstimatedSecondsPerModel: 80 DriverEvents: Features: @@ -17,6 +18,7 @@ DriverEvents: - confounder - driver_events Relation: All + EstimatedSecondsPerModel: 95 GeneticDerangement: Features: @@ -34,6 +36,7 @@ GeneticDerangement: - cytoband_cn - genetic_signature Relation: All + EstimatedSecondsPerModel: 1000 DNA: Features: @@ -58,6 +61,7 @@ DNA: - mutations_damaging - gene_cn Relation: All + EstimatedSecondsPerModel: 600 RNASeq: Features: @@ -84,3 +88,4 @@ RNASeq: - gene_cn - rnaseq Relation: All + EstimatedSecondsPerModel: 900 diff --git a/pipeline/analysis-pipeline/predictability/sparkles-config b/pipeline/analysis-pipeline/predictability/sparkles-config new file mode 100644 index 000000000..019999198 --- /dev/null +++ b/pipeline/analysis-pipeline/predictability/sparkles-config @@ -0,0 +1,9 @@ +[config] +default_url_prefix=gs://dpp-sparkles/depmap-pipeline +project=depmap-portal-pipeline +region=us-central1 +machine_type=n1-highmem-2 +account=856306709302-compute@developer.gserviceaccount.com +boot_volume_in_gb=40 +default_image=ubuntu +sparklesworker_image=us-central1-docker.pkg.dev/cds-docker-containers/docker/sparklesworker:5.0.0-alpha3 diff --git a/pipeline/context_explorer/parallelized_get_context_analysis.json b/pipeline/analysis-pipeline/publish.conseq similarity index 100% rename from pipeline/context_explorer/parallelized_get_context_analysis.json rename to pipeline/analysis-pipeline/publish.conseq diff --git a/pipeline/base_pipeline_runner.py b/pipeline/base_pipeline_runner.py new file mode 100644 index 000000000..96909c4d5 --- /dev/null +++ b/pipeline/base_pipeline_runner.py @@ -0,0 +1,336 @@ +import json +import os +import subprocess +import sys +import tempfile +import uuid +import yaml +from abc import ABC, abstractmethod +from pathlib import Path +from datetime import datetime + + +class PipelineRunner(ABC): + """Base class for all pipeline runners.""" + + def __init__(self): + self.script_path = None + self.pipeline_name = None + self.pipeline_run_id = str(uuid.uuid4()) + self.config_data = self._load_config() + + def _load_config(self): + """Load pipeline configuration from YAML file.""" + config_path = Path(__file__).parent / "pipeline_config.yaml" + assert config_path.exists(), f"Config file not found: {config_path}" + + with open(config_path, "r") as f: + config = yaml.safe_load(f) + + assert config, "Config file is empty or invalid" + return config + + def get_git_commit_sha(self): + """Get the current git commit SHA.""" + result = subprocess.run( + ["git", "rev-parse", "HEAD"], capture_output=True, text=True, check=True + ) + return result.stdout.strip() + + def read_docker_image_name(self, script_dir): + """Load Docker image name from image-name file.""" + image_name_file = script_dir.parent / "image-name" + assert ( + image_name_file.exists() + ), f"Could not find image-name file in {script_dir.parent}" + + with open(image_name_file, "r") as f: + for line in f: + line = line.strip() + if line.startswith("DOCKER_IMAGE="): + image_name = line.split("=", 1)[1].strip("\"'") + return image_name + + raise ValueError(f"Could not find DOCKER_IMAGE= in {image_name_file}") + + def backup_conseq_logs(self, state_path, log_destination): + """Copy all logs to specified directory.""" + state_dir = Path(state_path) + if not state_dir.exists(): + return + + with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: + temp_file = f.name + + assert temp_file, "Temporary file name cannot be empty" + assert os.path.exists(temp_file), f"Temporary file was not created: {temp_file}" + + find_commands = [ + ["find", ".", "-name", "std*.txt"], + ["find", ".", "-name", "*.sh"], + ["find", ".", "-name", "*.log"], + ] + + with open(temp_file, "w") as f: + for cmd in find_commands: + assert cmd, "Find command cannot be empty" + result = subprocess.run( + cmd, cwd=state_dir, capture_output=True, text=True, check=True + ) + f.write(result.stdout) + + subprocess.run( + ["rsync", "-a", state_path, log_destination, f"--files-from={temp_file}",], + check=True, + ) + + os.unlink(temp_file) + + def check_credentials(self, creds_dir): + """Check that required credential files exist.""" + required_files = self.config_data["credentials"]["required_files"] + + for filename in required_files: + filepath = Path(creds_dir) / filename + if not filepath.exists(): + raise FileNotFoundError(f"Could not find required file: {filepath}") + + def pull_docker_image(self, docker_image): + """Pull Docker image if it has a registry path.""" + if docker_image and "/" in docker_image: + print("Pulling Docker image...") + env_vars = { + **os.environ, + "GOOGLE_APPLICATION_CREDENTIALS": "/etc/google/auth/application_default_credentials.json", + } + subprocess.run(["docker", "pull", docker_image], check=True, env=env_vars) + + def log_dataset_usage(self, dataset_taiga_id): + """Print dataset usage information.""" + final_log = { + "pipeline_run_id": self.pipeline_run_id, + "dataset_taiga_id": dataset_taiga_id, + "pipeline": self.pipeline_name, + "timestamp": datetime.now().astimezone().isoformat(), + } + print("=" * 50) + print(json.dumps(final_log, indent=2)) + print("=" * 50) + + def track_dataset_usage_from_conseq(self, pipeline_dir): + """Track dataset usage from DO-NOT-EDIT-ME files and log to usage tracker.""" + import re + + pipeline_path = Path(pipeline_dir) + version_files = list(pipeline_path.glob("*-DO-NOT-EDIT-ME")) + + if not version_files: + raise ValueError(f"No *-DO-NOT-EDIT-ME files found in {pipeline_dir}") + + for version_file in version_files: + assert version_file.exists(), f"Version file does not exist: {version_file}" + + with open(version_file, "r") as f: + content = f.read() + assert content, f"Version file is empty: {version_file}" + + release_pattern = ( + r'"type":\s*"release_taiga_id"[^}]*"dataset_id":\s*"([^"]+)"' + ) + match = re.search(release_pattern, content, re.DOTALL) + + if match: + release_taiga_id = match.group(1) + assert release_taiga_id, "Release taiga ID is empty" + self.log_dataset_usage(release_taiga_id) + return + + raise ValueError( + f"Release taiga ID not found in any *-DO-NOT-EDIT-ME files in {pipeline_dir}. " + "Please check the files and try again." + ) + + def add_common_arguments(self, parser): + """Add common CLI arguments that all pipelines share.""" + defaults = self.config_data["defaults"] + + parser.add_argument("env_name", help="Name of environment") + parser.add_argument("job_name", help="Name to use for job") + parser.add_argument( + "--taiga-dir", default=defaults["taiga_dir"], help="Taiga directory path" + ) + parser.add_argument( + "--creds-dir", + default=defaults["creds_dir"], + help="Pipeline runner credentials directory", + ) + parser.add_argument( + "--image", help="If set, use this docker image when running the pipeline" + ) + + def build_common_config(self, args, pipeline_name): + """Build common configuration dictionary that all pipelines share.""" + pipeline_cfg = self.config_data["pipelines"][pipeline_name] + + config = { + "env_name": args.env_name, + "job_name": args.job_name, + "taiga_dir": args.taiga_dir, + "creds_dir": args.creds_dir, + "image": args.image, + "state_path": pipeline_cfg["state_path"], + "log_destination": pipeline_cfg["log_destination"], + "working_dir": pipeline_cfg["working_dir"], + } + + self.check_credentials(config["creds_dir"]) + return config + + def run_via_container(self, command, config): + """Run command inside Docker container with pipeline-specific configuration.""" + cwd = os.getcwd() + docker_cfg = self.config_data["docker"] + volumes = docker_cfg["volumes"] + env_vars = docker_cfg["env_vars"] + cred_files = self.config_data["credentials"]["required_files"] + + # Start building docker command + docker_cmd = ["docker", "run"] + + # Add pipeline-specific options (e.g., security settings) + pipeline_options = docker_cfg["options"].get(self.pipeline_name, {}) + if "security_opt" in pipeline_options: + docker_cmd.extend(["--security-opt", pipeline_options["security_opt"]]) + + # Add common options + docker_cmd.extend( + [ + "--rm", + "-v", + f"{cwd}:{volumes['work_dir']}", + "-w", + config["working_dir"], + "-v", + f"{config['creds_dir']}/{cred_files[0]}:{volumes['aws_keys']}", + "-v", + f"{config['creds_dir']}/{cred_files[1]}:{volumes['sparkles_cache']}", + "-v", + f"{config['creds_dir']}/{cred_files[2]}:{volumes['google_creds']}", + "-v", + f"{config['taiga_dir']}:{volumes['taiga']}", + "-e", + f"GOOGLE_APPLICATION_CREDENTIALS={env_vars['GOOGLE_APPLICATION_CREDENTIALS']}", + "--name", + config["job_name"], + config["docker_image"], + "bash", + "-c", + f"source {volumes['aws_keys']} && {command}", + ] + ) + + print("=" * 50) + print(f"{self.pipeline_name} Pipeline Runner command:") + print(f" {command}") + print("=" * 50) + + return subprocess.run(docker_cmd) + + @abstractmethod + def create_argument_parser(self): + """Create and return the argument parser for this pipeline.""" + pass + + @abstractmethod + def get_pipeline_config(self, args): + """Return pipeline-specific configuration.""" + pass + + @abstractmethod + def get_conseq_file(self, config): + """Get the conseq file to use for this pipeline.""" + pass + + @abstractmethod + def handle_special_features(self, config): + """Handle pipeline-specific features like START_WITH, override files, etc.""" + pass + + def run(self, script_file_path): + """Main entry point for running the pipeline.""" + self.script_path = Path(script_file_path) + self.pipeline_name = self.script_path.parent.name + + print(f"Pipeline run ID: {self.pipeline_run_id}") + + parser = self.create_argument_parser() + args = parser.parse_args() + + config = self.get_pipeline_config(args) + + docker_image = config.get("image") or self.read_docker_image_name( + self.script_path.parent + ) + config["docker_image"] = docker_image + config["commit_sha"] = self.get_git_commit_sha() + + self.pull_docker_image(docker_image) + + self.backup_conseq_logs(config["state_path"], config["log_destination"]) + self.handle_special_features(config) + + config["conseq_file"] = self.get_conseq_file(config) + + if config.get("manually_run_conseq"): + conseq_args = config.get("conseq_args", []) + print(f"executing: conseq {' '.join(conseq_args)}") + result = self.run_via_container( + f"conseq -D is_dev=False {' '.join(conseq_args)}", config + ) + run_exit_status = result.returncode + else: + # Clean up unused directories from past runs + result = self.run_via_container("conseq gc", config) + assert result.returncode == 0, "Conseq gc failed" + + # Build and run main conseq command + conseq_run_cmd = self.build_conseq_run_command(config) + result = self.run_via_container(conseq_run_cmd, config) + run_exit_status = result.returncode + + # Handle post-run tasks (export, reports, etc.) + self.handle_post_run_tasks(config) + + # Copy the latest logs + self.backup_conseq_logs(config["state_path"], config["log_destination"]) + + print("Pipeline run complete") + subprocess.run(["sudo", "chown", "-R", "ubuntu", "."], check=True) + sys.exit(run_exit_status) + + def build_conseq_run_command(self, config): + """Build the main conseq run command.""" + conseq_cfg = self.config_data["conseq"] + common_args = " ".join(conseq_cfg["common_args"]) + + cmd_parts = [ + f"conseq run --addlabel commitsha={config['commit_sha']}", + f"{common_args} --maxfail {conseq_cfg['max_fail']}", + f"-D sparkles_path={conseq_cfg['sparkles_path']}", + "-D is_dev=False", + ] + + # Add pipeline-specific options + if config.get("s3_staging_url"): + cmd_parts.append(f"-D S3_STAGING_URL={config['s3_staging_url']}") + if config.get("publish_dest"): + cmd_parts.append(f"-D publish_dest={config['publish_dest']}") + + conseq_args = config.get("conseq_args", []) + cmd_parts.extend([config["conseq_file"], " ".join(conseq_args)]) + return " ".join(cmd_parts) + + def handle_post_run_tasks(self, config): + """Handle post-run tasks like export and report generation.""" + # Default implementation - can be overridden by specific pipelines + pass diff --git a/pipeline/celligner/dstat_wrapper.py b/pipeline/celligner/dstat_wrapper.py deleted file mode 100644 index d1ab64c45..000000000 --- a/pipeline/celligner/dstat_wrapper.py +++ /dev/null @@ -1,44 +0,0 @@ -import subprocess -import sys -import json -import re - -# this script will run the command (passed as the command args) and then parse the json that it reads from -# stdout and emit a single line with the status which starts with either "completed" or "in-progress". This is all to make it easier for conseq to determine -# whether a job is running or not - -# based on https://cloud.google.com/batch/docs/reference/rest/v1alpha/projects.locations.jobs#State -terminal_states = ["SUCCEEDED", "FAILED", "CANCELLED"] -in_progress_state = [ - "QUEUED", - "SCHEDULED", - "RUNNING", - "DELETION_IN_PROGRESS", - "CANCELLATION_IN_PROGRESS", -] - -command = sys.argv[1:] -stdout = subprocess.check_output(command) -try: - status = json.loads(stdout) - assert len(status) == 1 - status_message = status[0]["status-message"] - if status_message is None: # seems to happen right after job submission - prefix = "IN_PROGRESS" - else: - m = re.match( - "Job state is set from [A-Z]+ to ([A-Z]+) for job.*", status_message - ) - assert m is not None - state = m.group(1) - if state in terminal_states: - prefix = "COMPLETED" - else: - assert state in in_progress_state - prefix = "IN_PROGRESS" -except Exception as ex: - sys.stderr.write(f"got exception parsing output from command {command}: {stdout}") - raise ex -with open("last_check_status.log", "wt") as fd: - fd.write(f"command: {command}\n{stdout}") -print(f"{prefix}: {status_message}") diff --git a/pipeline/cn_gene/README.md b/pipeline/cn_gene/README.md deleted file mode 100644 index aa09155f5..000000000 --- a/pipeline/cn_gene/README.md +++ /dev/null @@ -1,17 +0,0 @@ -Run the `transform_cngene_to_log2.py` script to generate a log transformed version of the OmicsCNGene file and upload it to taiga named as `PortalOmicsCNGeneLog2`. - -To run the script: - -1. First activate the poetry environment in portal-backend. `cd portal-backend && poetry shell` -2. Then go back to the pipeline/cn_gene directory. `cd ../pipeline/cn_gene` -3. Now run: - `python transform_cngene_to_log2.py ` - -The `release_cn_gene_taiga_id` parameter is the taiga ID of OmicsCNGene data. - -For example, to transform the 23q4 internal cn gene data to log2 format, run: -`python transform_cngene_to_log2.py internal-23q4-ac2b.16/OmicsCNGene` - -Once run successfully, the output should print a statement with the taiga ID of the dataset that was updated and the new version number. - -E.g. Updated dataset: internal-23q4-ac2b to version number: 73 diff --git a/pipeline/cn_gene/transform_cngene_to_log2.py b/pipeline/cn_gene/transform_cngene_to_log2.py deleted file mode 100644 index 6c1f328f0..000000000 --- a/pipeline/cn_gene/transform_cngene_to_log2.py +++ /dev/null @@ -1,56 +0,0 @@ -import argparse -import numpy as np -from taigapy import create_taiga_client_v3 -from taigapy.client_v3 import UploadedFile, LocalFormat -import tempfile - - -def transform_cngene_to_log2_and_upload_to_taiga(cngene_dataset_id): - """Transform CN gene expression data to log2 scale and upload to Taiga - - Args: - cngene_dataset_id (pd.DataFrame): The dataset id of the CN gene expression data - output_filename (str): The filename to save the transformed data to locally - """ - taiga_client = create_taiga_client_v3() - # Get the CN gene expression data - print("Getting CN gene expression data...") - cngene_expression_data = taiga_client.get(cngene_dataset_id) - # Transform the CN gene expression data to log2 scale - print("Transforming CN gene expression data to log2 scale...") - log2_transformed_data = np.log2(cngene_expression_data + 1) - - # Create a temporary file - with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file: - # Save the transformed data to the temporary file - log2_transformed_data.to_csv(temp_file.name) - temp_filename = temp_file.name - - # Get the taiga permaname (e.g. internal-23q4-ac2b) from the dataset id - taiga_permaname = cngene_dataset_id.split(".")[0] - print(f"Taiga permaname: {taiga_permaname}") - - # Update the dataset with the transformed data - version = taiga_client.update_dataset( - taiga_permaname, - "Transformed CN gene expression data to log2 scale", - additions=[ - UploadedFile( - "PortalOmicsCNGeneLog2", - local_path=temp_filename, - format=LocalFormat.CSV_MATRIX, - ) - ], - ) - print( - f"Updated dataset: {version.permaname} to version number: {version.version_number}" - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Transform CN gene expression data to log2 scale." - ) - parser.add_argument("cngene_dataset_id") - args = parser.parse_args() - transform_cngene_to_log2_and_upload_to_taiga(args.cngene_dataset_id) diff --git a/data-prep-pipeline/README.md b/pipeline/data-prep-pipeline/README.md similarity index 53% rename from data-prep-pipeline/README.md rename to pipeline/data-prep-pipeline/README.md index a0482ea6e..4bc906adc 100644 --- a/data-prep-pipeline/README.md +++ b/pipeline/data-prep-pipeline/README.md @@ -1,6 +1,6 @@ # Data Prep Pipeline -The data prep pipeline gets data from **Taiga** and prepares the data to make them ready for the analysis pipeline. At this point, there are two such preparations happen: +The data prep pipeline gets data from **Taiga** and prepares the data to make them ready for the analysis pipeline and preprocessing pipeline. At this point, there are two such preparations happen: 1. **Transformed Data**: These datasets are generated from current release or a combination of current release and other relevant data such as hgnc gene table, onocokb annotated data, etc. 2. **Legacy Data**: These datasets are not part of the currrent release anymore. However, they are still used for some analysis. E.g. RNAi data. @@ -8,19 +8,11 @@ The data prep pipeline gets data from **Taiga** and prepares the data to make th ## How to run the Data Prep Pipeline locally First, make sure you have conseq installed from here: https://github.com/broadinstitute/conseq and conseq is executable. +Second, check out the `depmap-deploy` repo if you have not already and put that in the same directory where `depmap-portal` repo is located. +Then, assuming you are in `depmap-portal/pipeline/data-prep-pipeline` where this readme is located: -Then, assuming you are in `depmap-portal/data-prep-pipeline` where this readme is located, install and activate a poetry environment. Then: - -1. Run `poetry shell`. -2. Once inside the poetry environment, run `data_prep_pipeline/common.conseq` which will run each rule mentioned there and produce the relevant output. - -Note that there are two primary configuration files, `release_inputs.conseq` which contains all the taiga ids of the initial inputs for different rules and `common.conseq` which contains all the available rules. There's a `data_prep_pipeline/publish.conseq` file where each upload to taiga is configured and executed. If you would like to modify or skip the upload for a particular rule, then do so in that file. - -## Run the Data Prep Pipeline in Jenkins - -Go to the Data Prep Pipeline 1.0 jenkins job here: https://datascidev.broadinstitute.org/job/Data%20Prep%20Pipeline%201.0/ - -Then click on build. Optionally choose one of the parameters if you would like a clean start or start with a specific export or want to automatically rebuild the db once done. +1. Run `eval $(poetry env activate)` or `poetry shell` if poetry is <2.0. Then install the packages inside the poetry env. +2. Once inside the poetry environment, run `local_run.sh` with either `internal` or `external` as parameters depending on the environment which will run each rule mentioned there and produce the relevant output. ## How to extend the pipeline to add additional files to the release diff --git a/data-prep-pipeline/data_prep_pipeline/.gitignore b/pipeline/data-prep-pipeline/data_prep_pipeline/.gitignore similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/.gitignore rename to pipeline/data-prep-pipeline/data_prep_pipeline/.gitignore diff --git a/pipeline/nonquarterly-processed.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/__init__.py similarity index 100% rename from pipeline/nonquarterly-processed.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/__init__.py diff --git a/data-prep-pipeline/data_prep_pipeline/cngene_log_2_transformation.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/cngene_log_2_transformation.conseq similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/cngene_log_2_transformation.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/cngene_log_2_transformation.conseq diff --git a/data-prep-pipeline/data_prep_pipeline/filter_portal_compounds.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/filter_portal_compounds.conseq similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/filter_portal_compounds.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/filter_portal_compounds.conseq diff --git a/data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data.conseq similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data.conseq diff --git a/data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data_inputs.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data_inputs.conseq similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data_inputs.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/legacy_data/legacy_data_inputs.conseq diff --git a/data-prep-pipeline/data_prep_pipeline/predictability.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/predictability.conseq similarity index 80% rename from data-prep-pipeline/data_prep_pipeline/predictability.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/predictability.conseq index 671f85bf3..bf6799a17 100644 --- a/data-prep-pipeline/data_prep_pipeline/predictability.conseq +++ b/pipeline/data-prep-pipeline/data_prep_pipeline/predictability.conseq @@ -29,25 +29,13 @@ rule transform_fusion: inputs: fusion={"type": "fusion"}, gene={"type": "gene"}, - script=fileref('../scripts/predictability/transform_fusion.py'), + script=fileref('../../preprocessing-pipeline/scripts/make_fusions_matrix.py'), outputs: { "type": "predictability_fusion", "filename": {"$filename": "fusion.csv"}, } - run "python3 {{inputs.script.filename}} {{inputs.fusion.dataset_id}} {{inputs.gene.filename}} fusion.csv" - - -rule transform_lineage: - inputs: - model={"type": "model"}, - script=fileref('../scripts/predictability/transform_lineage.py'), - outputs: - { - "type": "predictability_lineage", - "filename": {"$filename": "lineage.csv"}, - } - run "python3 {{inputs.script.filename}} {{inputs.model.dataset_id}} lineage.csv" + run "python3 {{inputs.script.filename}} {{inputs.fusion.dataset_id}} {{inputs.gene.filename}} fusion.csv --rows-per-model" rule transform_crispr_confounders: diff --git a/pipeline/data-prep-pipeline/data_prep_pipeline/publish.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/publish.conseq new file mode 100644 index 000000000..e5065ddc3 --- /dev/null +++ b/pipeline/data-prep-pipeline/data_prep_pipeline/publish.conseq @@ -0,0 +1,88 @@ +rule publish_hgnc_gene_table: + inputs: + hgnc_gene_table={"type": "gene"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "release_hgnc_gene_table_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Updated HGNC gene table for release from data-prep-pipeline' 'Gene' {{inputs.hgnc_gene_table.filename}} 'csv_table'" + + +rule publish_cngene_log2: + inputs: + cngene_log2={"type": "cngene_log2"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "release_cngene_log2_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Updated cngene log2 data for release from data-prep-pipeline' 'PortalOmicsCNGeneLog2' {{inputs.cngene_log2.filename}} 'csv_matrix'" + + +rule publish_filtered_portal_compounds: + inputs: + filtered_portal_compounds={"type": "filtered_portal_compounds"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "release_filtered_portal_compounds_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Filtered portal compounds data for release from data-prep-pipeline' 'PortalCompounds' {{inputs.filtered_portal_compounds.filename}} 'csv_table'" + + +rule publish_subtype_tree: + inputs: + subtype_tree={"type": "subtype_tree"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "release_subtype_tree_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Created SubtypeTree for release from data-prep-pipeline' 'SubtypeTree' {{inputs.subtype_tree.filename}} 'csv_table'" + + +rule publish_context_matrix: + inputs: + context_matrix={"type": "context_matrix"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "release_context_matrix_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Created SubtypeMatrix for release from data-prep-pipeline' 'SubtypeMatrix' {{inputs.context_matrix.filename}} 'csv_matrix'" + + +rule publish_driver_events: + inputs: + driver_events_data={"type": "predictability_driver_events"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "predictability_driver_events_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Generated PredictabilityDriverEventsTransformed for predictability' 'PredictabilityDriverEventsTransformed' {{inputs.driver_events_data.filename}} 'csv_matrix'" + + +rule publish_genetic_derangement: + inputs: + genetic_derangement_data={"type": "predictability_genetic_derangement"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "predictability_genetic_derangement_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Generated PredictabilityGeneticDerangementTransformed for predictability' 'PredictabilityGeneticDerangementTransformed' {{inputs.genetic_derangement_data.filename}} 'csv_matrix'" + + +rule publish_fusion: + inputs: + fusion_data={"type": "predictability_fusion"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "predictability_fusion_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Generated PredictabilityFusionTransformed for predictability' 'PredictabilityFusionTransformed' {{inputs.fusion_data.filename}} 'csv_matrix'" + + +# rule publish_lineage: +# inputs: +# lineage_data={"type": "predictability_lineage"}, +# update_taiga_script=fileref('upload_to_taiga.py') +# outputs:{"type": "predictability_lineage_published"} + +# run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Generated lineage data for predictability' 'PredictabilityLineageTransformed' {{inputs.lineage_data.filename}} 'csv_matrix'" + + +rule publish_crispr_confounders: + inputs: + crispr_confounders_data={"type": "predictability_crispr_confounders"}, + update_taiga_script=fileref('upload_to_taiga.py') + outputs:{"type": "predictability_crispr_confounders_published"} + + run "python3 {{inputs.update_taiga_script.filename}} {{config.RELEASE_PERMANAME}} 'Generated PredictabilityCRISPRConfoundersTransformed data for predictability' 'PredictabilityCRISPRConfoundersTransformed' {{inputs.crispr_confounders_data.filename}} 'csv_matrix'" diff --git a/pipeline/data-prep-pipeline/data_prep_pipeline/run_common.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/run_common.conseq new file mode 100644 index 000000000..2db12c491 --- /dev/null +++ b/pipeline/data-prep-pipeline/data_prep_pipeline/run_common.conseq @@ -0,0 +1,9 @@ +include "data_prep_pipeline/update_hgnc_gene_table.conseq" +include "data_prep_pipeline/cngene_log_2_transformation.conseq" +include "data_prep_pipeline/filter_portal_compounds.conseq" +include "data_prep_pipeline/subtype_tree.conseq" +include "data_prep_pipeline/predictability.conseq" + +if "config.get('is_dev', 'True') != 'True'": + include "data_prep_pipeline/publish.conseq" +endif diff --git a/pipeline/data-prep-pipeline/data_prep_pipeline/run_external.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/run_external.conseq new file mode 100644 index 000000000..b1022436d --- /dev/null +++ b/pipeline/data-prep-pipeline/data_prep_pipeline/run_external.conseq @@ -0,0 +1,2 @@ +include "release_inputs_external-DO-NOT-EDIT-ME" +include "data_prep_pipeline/run_common.conseq" diff --git a/pipeline/data-prep-pipeline/data_prep_pipeline/run_internal.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/run_internal.conseq new file mode 100644 index 000000000..b4de6373d --- /dev/null +++ b/pipeline/data-prep-pipeline/data_prep_pipeline/run_internal.conseq @@ -0,0 +1,2 @@ +include "release_inputs_internal-DO-NOT-EDIT-ME" +include "data_prep_pipeline/run_common.conseq" diff --git a/data-prep-pipeline/data_prep_pipeline/subtype_tree.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/subtype_tree.conseq similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/subtype_tree.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/subtype_tree.conseq diff --git a/data-prep-pipeline/data_prep_pipeline/update_hgnc_gene_table.conseq b/pipeline/data-prep-pipeline/data_prep_pipeline/update_hgnc_gene_table.conseq similarity index 100% rename from data-prep-pipeline/data_prep_pipeline/update_hgnc_gene_table.conseq rename to pipeline/data-prep-pipeline/data_prep_pipeline/update_hgnc_gene_table.conseq diff --git a/pipeline/data-prep-pipeline/data_prep_pipeline/upload_to_taiga.py b/pipeline/data-prep-pipeline/data_prep_pipeline/upload_to_taiga.py new file mode 100644 index 000000000..6318c51ab --- /dev/null +++ b/pipeline/data-prep-pipeline/data_prep_pipeline/upload_to_taiga.py @@ -0,0 +1,72 @@ +import argparse +from pathlib import Path + +from taigapy.client_v3 import UploadedFile, LocalFormat +from taigapy import create_taiga_client_v3 + + +def update_taiga( + dataset_permaname: str, + description_of_changes: str, + matrix_name_in_taiga: str, + file_local_path: Path, + file_format: str, +) -> None: + """Update a dataset in Taiga with transformed data.""" + assert dataset_permaname, "Dataset permaname cannot be empty" + assert description_of_changes, "Description of changes cannot be empty" + assert matrix_name_in_taiga, "Matrix name in Taiga cannot be empty" + assert file_local_path, "File path cannot be empty" + assert file_format, "File format cannot be empty" + + if file_format == "csv_table": + file_format = LocalFormat.CSV_TABLE + elif file_format == "csv_matrix": + file_format = LocalFormat.CSV_MATRIX + try: + tc = create_taiga_client_v3() + # Update the dataset with the transformed data + version = tc.update_dataset( + dataset_permaname, + description_of_changes, + additions=[ + UploadedFile( + matrix_name_in_taiga, + local_path=file_local_path, + format=file_format, + ) + ], + ) + print( + f"Updated dataset: {version.permaname} to version number: {version.version_number}" + ) + except Exception as e: + print(f"Error updating Taiga: {e}") + raise + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Update Taiga dataset with transformed data." + ) + parser.add_argument("release_permaname", help="Release permaname") + parser.add_argument("description_of_changes", help="Description of the changes") + parser.add_argument("matrix_name_in_taiga", help="Name of the matrix in Taiga") + parser.add_argument( + "file_local_path", help="Path to the file that will be uploaded" + ) + parser.add_argument("file_format", help="Format of the file to upload") + args = parser.parse_args() + + tc = create_taiga_client_v3() + + dataset_permaname = args.release_permaname + print(f"Release dataset permaname is: {dataset_permaname}") + + update_taiga( + dataset_permaname, + args.description_of_changes, + args.matrix_name_in_taiga, + args.file_local_path, + args.file_format, + ) diff --git a/pipeline/data-prep-pipeline/data_prep_pipeline_runner.py b/pipeline/data-prep-pipeline/data_prep_pipeline_runner.py new file mode 100644 index 000000000..aeadc5027 --- /dev/null +++ b/pipeline/data-prep-pipeline/data_prep_pipeline_runner.py @@ -0,0 +1,60 @@ +import argparse +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from base_pipeline_runner import PipelineRunner + + +class DataPrepPipelineRunner(PipelineRunner): + def create_argument_parser(self): + parser = argparse.ArgumentParser(description="Run data prep pipeline") + + # Add common arguments + self.add_common_arguments(parser) + + # Add data-prep-specific arguments + parser.add_argument( + "--external", + action="store_true", + help="Run external pipeline (default is internal)", + ) + return parser + + def get_pipeline_config(self, args): + # Build common config + config = self.build_common_config(args, "data_prep") + # Add data-prep-specific config + config["is_external"] = args.external + + return config + + def get_conseq_file(self, config): + """Get conseq file for data prep pipeline.""" + conseq_files = self.config_data["pipelines"]["data_prep"]["conseq_files"] + + if config["is_external"]: + return conseq_files["external"] + else: + return conseq_files["internal"] + + def handle_special_features(self, config): + """Preprocess templates to generate DO-NOT-EDIT-ME files before run.""" + templates = self.config_data["pipelines"]["data_prep"]["templates"] + + template_key = "external" if config["is_external"] else "internal" + template = templates[template_key]["input"] + output = templates[template_key]["output"] + + self.run_via_container( + f"python ../preprocess_taiga_ids.py {template} {output}", config + ) + + def handle_post_run_tasks(self, config): + """After conseq finishes, log dataset usage.""" + self.track_dataset_usage_from_conseq("pipeline/data-prep-pipeline") + + +if __name__ == "__main__": + runner = DataPrepPipelineRunner() + runner.run(Path(__file__)) diff --git a/pipeline/data-prep-pipeline/local_run.sh b/pipeline/data-prep-pipeline/local_run.sh new file mode 100755 index 000000000..a424ff00b --- /dev/null +++ b/pipeline/data-prep-pipeline/local_run.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +ENV_TYPE="$1" + +# Validate input +if [[ -z "$ENV_TYPE" ]]; then + echo "Error: Parameter is required" + echo "Usage: $0 [internal|external]" + exit 1 +fi + +if [[ "$ENV_TYPE" != "internal" && "$ENV_TYPE" != "external" ]]; then + echo "Error: Parameter must be 'internal' or 'external'" + echo "Usage: $0 [internal|external]" + exit 1 +fi + +python ../preprocess_taiga_ids.py ../../../depmap-deploy/non-public-pipeline-files/data-prep-pipeline/release_inputs_${ENV_TYPE}.template release_inputs_${ENV_TYPE}-DO-NOT-EDIT-ME && conseq run data_prep_pipeline/run_${ENV_TYPE}.conseq diff --git a/data-prep-pipeline/poetry.lock b/pipeline/data-prep-pipeline/poetry.lock similarity index 89% rename from data-prep-pipeline/poetry.lock rename to pipeline/data-prep-pipeline/poetry.lock index a938f6811..8bc78e3cc 100644 --- a/data-prep-pipeline/poetry.lock +++ b/pipeline/data-prep-pipeline/poetry.lock @@ -64,10 +64,7 @@ files = [ [package.dependencies] jmespath = ">=0.7.1,<2.0.0" python-dateutil = ">=2.1,<3.0.0" -urllib3 = [ - {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}, - {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""}, -] +urllib3 = {version = ">=1.25.4,<1.27", markers = "python_version < \"3.10\""} [package.extras] crt = ["awscrt (==0.22.0)"] @@ -177,18 +174,6 @@ files = [ [package.dependencies] pycparser = "*" -[[package]] -name = "cfgv" -version = "3.4.0" -description = "Validate configuration and produce human readable error messages." -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, - {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, -] - [[package]] name = "charset-normalizer" version = "3.4.1" @@ -400,18 +385,6 @@ files = [ {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, ] -[[package]] -name = "distlib" -version = "0.3.9" -description = "Distribution utilities" -optional = false -python-versions = "*" -groups = ["main"] -files = [ - {file = "distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87"}, - {file = "distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403"}, -] - [[package]] name = "exceptiongroup" version = "1.2.2" @@ -419,7 +392,6 @@ description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" groups = ["main"] -markers = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -443,23 +415,6 @@ files = [ [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""] -[[package]] -name = "filelock" -version = "3.16.1" -description = "A platform independent file lock." -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, - {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, -] - -[package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] -typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""] - [[package]] name = "google-api-core" version = "2.24.0" @@ -475,10 +430,7 @@ files = [ [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" -proto-plus = [ - {version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""}, - {version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""}, -] +proto-plus = ">=1.22.3,<2.0.0dev" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -672,21 +624,6 @@ files = [ [package.dependencies] numpy = ">=1.19.3" -[[package]] -name = "identify" -version = "2.6.5" -description = "File identification library for Python" -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "identify-2.6.5-py2.py3-none-any.whl", hash = "sha256:14181a47091eb75b337af4c23078c9d09225cd4c48929f521f3bf16b09d02566"}, - {file = "identify-2.6.5.tar.gz", hash = "sha256:c10b33f250e5bba374fae86fb57f3adcebf1161bce7cdf92031915fd480c13bc"}, -] - -[package.extras] -license = ["ukkonen"] - [[package]] name = "idna" version = "3.10" @@ -709,7 +646,6 @@ description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.9\"" files = [ {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"}, {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"}, @@ -903,18 +839,6 @@ files = [ {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] -[[package]] -name = "nodeenv" -version = "1.9.1" -description = "Node.js virtual environment builder" -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -groups = ["main"] -files = [ - {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, - {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, -] - [[package]] name = "numpy" version = "1.26.4" @@ -1011,11 +935,7 @@ files = [ ] [package.dependencies] -numpy = [ - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, - {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version == \"3.10\""}, -] +numpy = {version = ">=1.20.3", markers = "python_version < \"3.10\""} python-dateutil = ">=2.8.1" pytz = ">=2020.1" @@ -1071,25 +991,6 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-a test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] type = ["mypy (>=1.11.2)"] -[[package]] -name = "pre-commit" -version = "3.8.0" -description = "A framework for managing and maintaining multi-language pre-commit hooks." -optional = false -python-versions = ">=3.9" -groups = ["main"] -files = [ - {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, - {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, -] - -[package.dependencies] -cfgv = ">=2.0.0" -identify = ">=1.0.0" -nodeenv = ">=0.11.1" -pyyaml = ">=5.1" -virtualenv = ">=20.10.0" - [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -1369,69 +1270,6 @@ files = [ {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"}, ] -[[package]] -name = "pyyaml" -version = "6.0.2" -description = "YAML parser and emitter for Python" -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, - {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, - {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, - {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, - {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, - {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, - {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, - {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, - {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, - {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, - {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, - {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, - {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, - {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, - {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, - {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, - {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, - {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, - {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, - {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, - {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, - {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, - {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, - {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, - {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, - {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, - {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, - {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, - {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, - {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, - {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, - {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, -] - [[package]] name = "pyzmq" version = "26.2.0" @@ -1655,14 +1493,14 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "taigapy" -version = "4.0.0" +version = "4.1.0" description = "Client library for fetching data from Taiga" optional = false python-versions = ">=3.9" groups = ["main"] files = [ - {file = "taigapy-4.0.0-py3-none-any.whl", hash = "sha256:da46ae136f47cefc68b6be8e71681f7c0b6359606a61e228e57fe783f8c29505"}, - {file = "taigapy-4.0.0.tar.gz", hash = "sha256:d567aa76f7e2d9b88d2848e529188924f989a303fee6509546fccc43c47ee94b"}, + {file = "taigapy-4.1.0-py3-none-any.whl", hash = "sha256:9dd08703c56f29c8d834688bf4230819ee5b03713a842b56013fadfcfb41be63"}, + {file = "taigapy-4.1.0.tar.gz", hash = "sha256:cd87e22e50ec9aa3653d22ce11e2233dc14cc90cd1a6b2c81f59e93faa6c3e05"}, ] [package.dependencies] @@ -1671,7 +1509,6 @@ colorful = ">=0.5.5,<0.6.0" google-cloud-storage = ">=2.2.0" h5py = ">=3.10.0,<4.0.0" pandas = ">=1.0.0" -pre-commit = ">=3.7.0,<4.0.0" pyarrow = ">3.0.0" requests = ">=2.28.2,<3.0.0" sqlite-shelve = ">=2.0.1,<3.0.0" @@ -1761,7 +1598,6 @@ description = "HTTP library with thread-safe connection pooling, file post, and optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" groups = ["main"] -markers = "python_version == \"3.9\"" files = [ {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, @@ -1772,46 +1608,6 @@ brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and p secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] -[[package]] -name = "urllib3" -version = "2.3.0" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.9" -groups = ["main"] -markers = "python_version >= \"3.10\"" -files = [ - {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, - {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9) ; platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; platform_python_implementation != \"CPython\""] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "virtualenv" -version = "20.28.1" -description = "Virtual Python Environment builder" -optional = false -python-versions = ">=3.8" -groups = ["main"] -files = [ - {file = "virtualenv-20.28.1-py3-none-any.whl", hash = "sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb"}, - {file = "virtualenv-20.28.1.tar.gz", hash = "sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329"}, -] - -[package.dependencies] -distlib = ">=0.3.7,<1" -filelock = ">=3.12.2,<4" -platformdirs = ">=3.9.1,<5" - -[package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] -test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8) ; platform_python_implementation == \"PyPy\" or platform_python_implementation == \"CPython\" and sys_platform == \"win32\" and python_version >= \"3.13\"", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10) ; platform_python_implementation == \"CPython\""] - [[package]] name = "wcwidth" version = "0.2.13" @@ -1831,7 +1627,6 @@ description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.9" groups = ["main"] -markers = "python_version == \"3.9\"" files = [ {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, @@ -1847,5 +1642,5 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" -python-versions = "^3.9" -content-hash = "e696df3cfdbb9a20491529989064e9f98dc13508c82e2c50d7afa4809f568b78" +python-versions = ">=3.9, <3.10" +content-hash = "82a16aa588e96120474e5d652b2a7678bd5bc033b9c210d5d4e12136d224de60" diff --git a/data-prep-pipeline/pyproject.toml b/pipeline/data-prep-pipeline/pyproject.toml similarity index 92% rename from data-prep-pipeline/pyproject.toml rename to pipeline/data-prep-pipeline/pyproject.toml index 03fae2290..77365b528 100644 --- a/data-prep-pipeline/pyproject.toml +++ b/pipeline/data-prep-pipeline/pyproject.toml @@ -8,11 +8,11 @@ package-mode = false [tool.poetry.dependencies] -python = "^3.9" +python = ">=3.9, <3.10" pandas = "1.5.3" click = "^8.1.7" numpy = "^1.25.1" -taigapy = {version = "4.0.0", source = "gcp-artifact-registry"} +taigapy = {version = "4.1.0", source = "gcp-artifact-registry"} ipykernel = "^6.29.5" diff --git a/data-prep-pipeline/scripts/cn_gene/transform_cngene_to_log2.py b/pipeline/data-prep-pipeline/scripts/cn_gene/transform_cngene_to_log2.py similarity index 77% rename from data-prep-pipeline/scripts/cn_gene/transform_cngene_to_log2.py rename to pipeline/data-prep-pipeline/scripts/cn_gene/transform_cngene_to_log2.py index 2ef59c91e..0b4c84593 100644 --- a/data-prep-pipeline/scripts/cn_gene/transform_cngene_to_log2.py +++ b/pipeline/data-prep-pipeline/scripts/cn_gene/transform_cngene_to_log2.py @@ -12,10 +12,14 @@ def transform_cngene_to_log2(cngene_dataset_id: str) -> pd.DataFrame: cngene_expression_data = tc.get(cngene_dataset_id) print("Filtering to default entries per model...") - filtered_cngene_expression_data = cngene_expression_data[cngene_expression_data["IsDefaultEntryForModel"] == "Yes"].copy() + filtered_cngene_expression_data = cngene_expression_data[ + cngene_expression_data["IsDefaultEntryForModel"] == "Yes" + ].copy() + + assert ( + not filtered_cngene_expression_data["ModelID"].duplicated().any() + ), "Duplicate ModelID after filtering" - assert not filtered_cngene_expression_data["ModelID"].duplicated().any(), "Duplicate ModelID after filtering" - print("Dropping some metadata columns...") cols_to_drop = [ "SequencingID", @@ -23,23 +27,31 @@ def transform_cngene_to_log2(cngene_dataset_id: str) -> pd.DataFrame: "IsDefaultEntryForModel", "IsDefaultEntryForMC", ] - existing_cols_to_drop = [c for c in cols_to_drop if c in filtered_cngene_expression_data.columns] - filtered_cngene_expression_data = filtered_cngene_expression_data.drop(columns=existing_cols_to_drop) + existing_cols_to_drop = [ + c for c in cols_to_drop if c in filtered_cngene_expression_data.columns + ] + filtered_cngene_expression_data = filtered_cngene_expression_data.drop( + columns=existing_cols_to_drop + ) print("Setting ModelID as index...") - filtered_cngene_expression_data = filtered_cngene_expression_data.set_index("ModelID") + filtered_cngene_expression_data = filtered_cngene_expression_data.set_index( + "ModelID" + ) filtered_cngene_expression_data.index.name = None # Check for columns with ALL NaN values count_all_na_columns = filtered_cngene_expression_data.isna().all().sum() print(f"Number of columns with ALL NA values: {count_all_na_columns}") - + if count_all_na_columns > 0: print(f"Data shape before dropping: {filtered_cngene_expression_data.shape}") - print("Dropping columns with all NaN values...") - filtered_cngene_expression_data = filtered_cngene_expression_data.dropna(axis=1, how="all") + print("Dropping columns with all NaN values...") + filtered_cngene_expression_data = filtered_cngene_expression_data.dropna( + axis=1, how="all" + ) print(f"Data shape after dropping: {filtered_cngene_expression_data.shape}") - + print("Transforming CN gene expression data to log2 scale...") log2_transformed_data = np.log2(filtered_cngene_expression_data + 1) print("Transformed CN gene expression data to log2 scale") diff --git a/pipeline/data-prep-pipeline/scripts/hgnc_gene_table/add_ce_and_selectivity.py b/pipeline/data-prep-pipeline/scripts/hgnc_gene_table/add_ce_and_selectivity.py new file mode 100644 index 000000000..70f51b387 --- /dev/null +++ b/pipeline/data-prep-pipeline/scripts/hgnc_gene_table/add_ce_and_selectivity.py @@ -0,0 +1,223 @@ +import argparse +import pandas as pd +from typing import Tuple + +from taigapy import create_taiga_client_v3 + +# Constants for calculating selectivity +DEPENDENCY_THRESHOLD = 0.5 +SELECTIVITY_THRESHOLD = -0.86 + + +def parse_gene_series(gene_series): + """ + Parse pandas Series with 'GENE_SYMBOL (entrez_id)' format. + """ + pattern = r"^(?P[^\(]+?)\s*\(\s*(?P\d+)\s*\)$" + return gene_series.str.extract(pattern) + + +def parse_and_validate_genes(gene_series, name): + """ + Parse and validate gene series, converting entrez_id to numeric. + """ + parsed = parse_gene_series(gene_series) + parsed["entrez_id"] = pd.to_numeric(parsed["entrez_id"], errors="coerce") + + if not parsed.notnull().all().all(): + raise ValueError(f"Failed to parse {name}") + + return parsed + + +def calculate_gene_selectivity( + crispr_gene_dependency: pd.DataFrame, + crispr_gene_effect: pd.DataFrame, + dependency_threshold: float = DEPENDENCY_THRESHOLD, + selectivity_threshold: float = SELECTIVITY_THRESHOLD, +) -> pd.Series: + """ + Calculate gene selectivity based on gene dependency and effect data. + """ + + # Count dependent lines for each gene + dep_lines = (crispr_gene_dependency > dependency_threshold).sum(axis=0) + + # Calculate statistical moments + skewness = crispr_gene_effect.skew(axis=0) + kurtosis = crispr_gene_effect.kurtosis(axis=0) + + # Calculate is_strongly_selective + is_strongly_selective = (skewness * kurtosis < selectivity_threshold) & ( + dep_lines > 0 + ) + + return is_strongly_selective + + +def load_taiga_data( + tc, hgnc_id: str, ce_id: str, effect_id: str, dependency_id: str +) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: + """ + Load all required datasets from Taiga. + """ + hgnc_gene_table = tc.get(hgnc_id) + crispr_inferred_common_essentials = tc.get(ce_id) + crispr_gene_effect = tc.get(effect_id) + crispr_gene_dependency = tc.get(dependency_id) + + assert ( + "entrez_id" in hgnc_gene_table.columns + ), "HGNC table must have 'entrez_id' column" + assert ( + "Essentials" in crispr_inferred_common_essentials.columns + ), "Common essentials must have 'Essentials' column" + + return ( + hgnc_gene_table, + crispr_inferred_common_essentials, + crispr_gene_effect, + crispr_gene_dependency, + ) + + +def add_essentiality_to_hgnc( + hgnc_gene_table: pd.DataFrame, + common_essentials: pd.DataFrame, + gene_effect: pd.DataFrame, +) -> pd.DataFrame: + """Add essentiality information to HGNC gene table.""" + print( + "Number of rows in crispr_inferred_common_essentials: ", len(common_essentials) + ) + + parsed_ce = parse_and_validate_genes( + common_essentials["Essentials"], "common essential genes" + ) + parsed_gene_effect = parse_and_validate_genes( + pd.Series(gene_effect.columns), "gene effect column names" + ) + + # Check that all common essentials are in the gene effect matrix + missing_essentials = ~parsed_ce["entrez_id"].isin(parsed_gene_effect["entrez_id"]) + if missing_essentials.any(): + raise ValueError( + f"Found {parsed_ce[missing_essentials]['entrez_id'].values} common essential genes that are not in the gene effect matrix" + ) + + # Add a common essential column so that we now have a full gene list with common essentials (the crispr_inferred_common_essentials matrix only contains True values) + full_gene_list_with_common_essentials = parsed_gene_effect.copy() + full_gene_list_with_common_essentials[ + "is_common_essential" + ] = full_gene_list_with_common_essentials["entrez_id"].isin(parsed_ce["entrez_id"]) + + print(f"Total genes in gene_effect: {len(parsed_gene_effect)}") + print( + f"Common essentials found in gene_effect: {full_gene_list_with_common_essentials['is_common_essential'].sum()}" + ) + + # Add essentiality column to HGNC gene table + hgnc_gene_table = hgnc_gene_table.copy() + hgnc_gene_table["entrez_id"] = pd.to_numeric( + hgnc_gene_table["entrez_id"], errors="coerce" + ) + essentiality_mapping = full_gene_list_with_common_essentials.set_index("entrez_id")[ + "is_common_essential" + ].to_dict() + + hgnc_gene_table["essentiality"] = ( + hgnc_gene_table["entrez_id"] + .map(essentiality_mapping) + .replace({True: "common essential", False: "not common essential"}) + ) + + print( + f"Essentiality column value counts: {hgnc_gene_table['essentiality'].value_counts(dropna=False)}" + ) + + return hgnc_gene_table + + +def add_selectivity_to_hgnc( + hgnc_gene_table: pd.DataFrame, + gene_dependency: pd.DataFrame, + gene_effect: pd.DataFrame, +) -> pd.DataFrame: + """Add selectivity information to HGNC gene table.""" + # Validate that gene dependency and gene effect matrices have the same number of genes + if len(gene_dependency.columns) != len(gene_effect.columns): + raise ValueError( + f"Gene dependency matrix has {len(gene_dependency.columns)} genes " + f"but gene effect matrix has {len(gene_effect.columns)} genes" + ) + + is_strongly_selective = calculate_gene_selectivity(gene_dependency, gene_effect) + + parsed_strongly_selective = parse_and_validate_genes( + pd.Series(is_strongly_selective.index), "selectivity gene names" + ) + parsed_strongly_selective["selectivity"] = is_strongly_selective.values + + selectivity_mapping = parsed_strongly_selective.set_index("entrez_id")[ + "selectivity" + ].to_dict() + + hgnc_gene_table = hgnc_gene_table.copy() + hgnc_gene_table["selectivity"] = ( + hgnc_gene_table["entrez_id"] + .map(selectivity_mapping) + .replace({True: "strongly selective", False: "not strongly selective"}) + ) + + print( + f"Selectivity column value counts: {hgnc_gene_table['selectivity'].value_counts(dropna=False)}" + ) + + return hgnc_gene_table + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Add common essential and selectivity to HGNC gene table." + ) + parser.add_argument("hgnc_gene_table_taiga_id", help="Taiga ID of HGNC gene table") + parser.add_argument( + "crispr_inferred_common_essentials_taiga_id", + help="Taiga ID of CRISPR inferred common essentials", + ) + parser.add_argument( + "crispr_gene_effect_taiga_id", help="Taiga ID of CRISPR gene effect" + ) + parser.add_argument( + "crispr_gene_dependency_taiga_id", help="Taiga ID of CRISPR gene dependency" + ) + parser.add_argument("output", help="Path to write the output") + args = parser.parse_args() + + tc = create_taiga_client_v3() + + # Load all data + ( + hgnc_gene_table, + crispr_inferred_common_essentials, + crispr_gene_effect, + crispr_gene_dependency, + ) = load_taiga_data( + tc, + args.hgnc_gene_table_taiga_id, + args.crispr_inferred_common_essentials_taiga_id, + args.crispr_gene_effect_taiga_id, + args.crispr_gene_dependency_taiga_id, + ) + + # Add essentiality + hgnc_gene_table = add_essentiality_to_hgnc( + hgnc_gene_table, crispr_inferred_common_essentials, crispr_gene_effect + ) + + # Add selectivity + hgnc_gene_table = add_selectivity_to_hgnc( + hgnc_gene_table, crispr_gene_dependency, crispr_gene_effect + ) + + hgnc_gene_table.to_csv(args.output, index=False) diff --git a/data-prep-pipeline/scripts/portal_compounds/filter_portal_compounds.py b/pipeline/data-prep-pipeline/scripts/portal_compounds/filter_portal_compounds.py similarity index 100% rename from data-prep-pipeline/scripts/portal_compounds/filter_portal_compounds.py rename to pipeline/data-prep-pipeline/scripts/portal_compounds/filter_portal_compounds.py diff --git a/data-prep-pipeline/scripts/predictability/transform_crispr_confounders.py b/pipeline/data-prep-pipeline/scripts/predictability/transform_crispr_confounders.py similarity index 100% rename from data-prep-pipeline/scripts/predictability/transform_crispr_confounders.py rename to pipeline/data-prep-pipeline/scripts/predictability/transform_crispr_confounders.py diff --git a/data-prep-pipeline/scripts/predictability/transform_driver_events.py b/pipeline/data-prep-pipeline/scripts/predictability/transform_driver_events.py similarity index 92% rename from data-prep-pipeline/scripts/predictability/transform_driver_events.py rename to pipeline/data-prep-pipeline/scripts/predictability/transform_driver_events.py index cfc2bb351..fea6dd2e5 100644 --- a/data-prep-pipeline/scripts/predictability/transform_driver_events.py +++ b/pipeline/data-prep-pipeline/scripts/predictability/transform_driver_events.py @@ -34,7 +34,7 @@ def process_and_generate_driver_events( oncokb_annotated = tc.get(oncokb_annotated_taiga_id) print("Transforming driver events data...") - mutations["EntrezGeneID"] = mutations["EntrezGeneID"].apply(reformat_entrez_id) + # mutations["EntrezGeneID"] = mutations["EntrezGeneID"].apply(reformat_entrez_id) # Don't need this in 25q3 since the EntrezGeneID is already a string oncokb_annotated["ProteinChange"] = oncokb_annotated["ProteinChange"].map( "p.{}".format ) @@ -80,6 +80,8 @@ def process_and_generate_driver_events( driver_events_matrix.set_index("ModelID", inplace=True) driver_events_matrix.index.name = None + all_models = mutations["ModelID"].unique() + driver_events_matrix = driver_events_matrix.reindex(all_models, fill_value=False) driver_events_matrix = driver_events_matrix.replace({True: 1.0, False: 0.0}) print("Transformed driver events data") diff --git a/data-prep-pipeline/scripts/predictability/transform_genetic_derangement.py b/pipeline/data-prep-pipeline/scripts/predictability/transform_genetic_derangement.py similarity index 100% rename from data-prep-pipeline/scripts/predictability/transform_genetic_derangement.py rename to pipeline/data-prep-pipeline/scripts/predictability/transform_genetic_derangement.py diff --git a/data-prep-pipeline/scripts/predictability/transform_lineage.py b/pipeline/data-prep-pipeline/scripts/predictability/transform_lineage.py similarity index 100% rename from data-prep-pipeline/scripts/predictability/transform_lineage.py rename to pipeline/data-prep-pipeline/scripts/predictability/transform_lineage.py diff --git a/data-prep-pipeline/scripts/subtype_tree/create_context_matrix.py b/pipeline/data-prep-pipeline/scripts/subtype_tree/create_context_matrix.py similarity index 94% rename from data-prep-pipeline/scripts/subtype_tree/create_context_matrix.py rename to pipeline/data-prep-pipeline/scripts/subtype_tree/create_context_matrix.py index 1f96aa25e..8b07393f4 100644 --- a/data-prep-pipeline/scripts/subtype_tree/create_context_matrix.py +++ b/pipeline/data-prep-pipeline/scripts/subtype_tree/create_context_matrix.py @@ -11,16 +11,9 @@ def load_data(model_taiga_id, molecular_subtypes_taiga_id, subtype_tree_path): tc = create_taiga_client_v3() ## Load the models table - models = ( - tc.get(model_taiga_id).loc[ - :, - [ - "ModelID", - "OncotreeCode", - "DepmapModelType", - ], - ] - ) + models = tc.get(model_taiga_id).loc[ + :, ["ModelID", "OncotreeCode", "DepmapModelType",], + ] ## Load the subtype tree # the subtype tree is created using a different script within the data prep pipeline @@ -34,16 +27,19 @@ def load_data(model_taiga_id, molecular_subtypes_taiga_id, subtype_tree_path): ## Outer join so it includes all models (even those with an annotated type ## that is not part of the tree) ## But then drop rows where ModelID is NA - which are subtypes with no models - model_tree = models.loc[:, ["ModelID", "OncotreeCode", "DepmapModelType"]].merge( - subtype_tree, how='outer' - ).dropna(subset=['ModelID']) + model_tree = ( + models.loc[:, ["ModelID", "OncotreeCode", "DepmapModelType"]] + .merge(subtype_tree, how="outer") + .dropna(subset=["ModelID"]) + ) - #make sure that model_tree contains all model ID's in the genetic subtypes matrix - #this has caused indexing errors in the past when this is not true + # make sure that model_tree contains all model ID's in the genetic subtypes matrix + # this has caused indexing errors in the past when this is not true assert set(genetic_subtypes.index).issubset(model_tree.ModelID) return model_tree, subtype_tree, genetic_subtypes + def get_context_models(subtype_node, subtype_tree, genetic_subtypes, model_tree): """ A function to find the set of models that belong to any particular context diff --git a/data-prep-pipeline/scripts/subtype_tree/create_subtype_tree.py b/pipeline/data-prep-pipeline/scripts/subtype_tree/create_subtype_tree.py similarity index 100% rename from data-prep-pipeline/scripts/subtype_tree/create_subtype_tree.py rename to pipeline/data-prep-pipeline/scripts/subtype_tree/create_subtype_tree.py diff --git a/pipeline/image-name b/pipeline/image-name new file mode 120000 index 000000000..6250f67ad --- /dev/null +++ b/pipeline/image-name @@ -0,0 +1 @@ +build-pipeline-docker-images/pipeline-run-docker/image-name \ No newline at end of file diff --git a/pipeline/jenkins-run-nonquarterly.sh b/pipeline/jenkins-run-nonquarterly.sh deleted file mode 100755 index 830a4eec8..000000000 --- a/pipeline/jenkins-run-nonquarterly.sh +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/bash - -if [ "$1" == "" ]; then - echo "needs name of environment" - exit 1 -fi - -ENV_NAME="$1" -CONSEQ_FILE="nonquarterly_$ENV_NAME.conseq" -DOCKER_IMAGE=us.gcr.io/broad-achilles/depmap-pipeline-run:v14 -COMMIT_SHA=`git rev-parse HEAD` -if [ "${COMMIT_SHA}" == "" ]; then - COMMIT_SHA="unknown" -fi - -set -ex -GOOGLE_APPLICATION_CREDENTIALS=/etc/google/auth/application_default_credentials.json docker pull ${DOCKER_IMAGE} - -PUBLISH_ROOT=$(dirname $(grep publish_dest "pipeline/$CONSEQ_FILE" | sed 's/.*"\(.*\)".*/\1/' )) -EXPORT_PATH="$PUBLISH_ROOT/export" - -# Copy all logs. I'm copying this to a new directory because each time we run we gc the state directory and that -# causes old logs to be deleted which makes it harder to investigate what happened. -function backup_conseq_logs { - file_list=`mktemp` - if [ -e pipeline/state ] ; then - ( cd pipeline/state && \ - find . -name "std*.txt" > ${file_list} && \ - find . -name "*.sh" >> ${file_list} && \ - find . -name "*.log" >> ${file_list} ) - rsync -a pipeline/state preprocess-logs --files-from=${file_list} - rm ${file_list} - fi -} - -function run_via_container { - COMMAND="$1" - - docker run \ - --rm \ - -v "$PWD":/work \ - -w /work/pipeline \ - -v "/etc/depmap-pipeline-runner-creds/broad-paquitas:/aws-keys/broad-paquitas" \ - -v "/etc/depmap-pipeline-runner-creds/sparkles:/root/.sparkles-cache" \ - -v "/etc/depmap-pipeline-runner-creds/depmap-pipeline-runner.json":/etc/google_default_creds.json \ - -v "/data2/depmap-pipeline-taiga:/root/.taiga" \ - -e GOOGLE_APPLICATION_CREDENTIALS=/etc/google_default_creds.json \ - -w /work/pipeline \ - --name "demap-pipeline-run-$ENV_NAME" \ - ${DOCKER_IMAGE} \ - bash -c "source /aws-keys/broad-paquitas && source /install/depmap-py/bin/activate && $COMMAND" -} - -# use /data2/depmap-pipeline-taiga as the taiga dir because -# different versions of taigapy seem to conflict in pickle format - -# backup logs before running GC -backup_conseq_logs - -if [ "$MANUALLY_RUN_CONSEQ" = "true" ]; then - echo "executing: conseq $CONSEQ_ARGS" - run_via_container "conseq $CONSEQ_ARGS" -else - # Clean up unused directories from past runs - run_via_container "conseq gc" - - # Kick off new run - run_via_container "conseq run --addlabel commitsha=${COMMIT_SHA} --no-reattach --maxfail 5 --remove-unknown-artifacts -D sparkles_path=/install/sparkles/bin/sparkles $CONSEQ_FILE $CONSEQ_ARGS" - - # Generate export - run_via_container "conseq export $CONSEQ_FILE $EXPORT_PATH" - - # copy the latest logs - backup_conseq_logs -fi - -# docker container is writing files as root. Fix up permissions after job completes -sudo chown -R ubuntu . diff --git a/pipeline/pipeline_config.yaml b/pipeline/pipeline_config.yaml new file mode 100644 index 000000000..f60f72a99 --- /dev/null +++ b/pipeline/pipeline_config.yaml @@ -0,0 +1,74 @@ +# Default paths +defaults: + taiga_dir: /data2/depmap-pipeline-taiga + creds_dir: /etc/depmap-pipeline-runner-creds + +# Required credential files (relative to creds_dir) +credentials: + required_files: + - broad-paquitas + - sparkles + - depmap-pipeline-runner.json + +# Docker configuration +docker: + # Volume mount paths inside container + volumes: + work_dir: /work + aws_keys: /aws-keys/broad-paquitas + sparkles_cache: /root/.sparkles-cache + google_creds: /etc/google_default_creds.json + taiga: /root/.taiga + + # Docker run options (keyed by directory name) + options: + preprocessing-pipeline: + security_opt: seccomp=unconfined + data-prep-pipeline: {} # No special options + + # Environment variables to set in container + env_vars: + GOOGLE_APPLICATION_CREDENTIALS: /etc/google_default_creds.json + +# Conseq configuration +conseq: + sparkles_path: /install/sparkles/bin/sparkles + max_fail: 20 + common_args: + - --no-reattach + - --remove-unknown-artifacts + gc_enabled: true + +# Pipeline-specific configuration +pipelines: + preprocessing: + state_path: pipeline/preprocessing-pipeline/state + log_destination: preprocess-logs + working_dir: /work/pipeline/preprocessing-pipeline + + # Environment name mapping + env_mapping: + qa: iqa + external: external + dqa: dqa + internal: internal + test-prefix: iqa # Any env starting with "test-" maps to this + + data_prep: + state_path: pipeline/data-prep-pipeline/state + log_destination: data-prep-logs + working_dir: /work/pipeline/data-prep-pipeline + + # Template files for external/internal runs + templates: + external: + input: release_inputs_external.template + output: release_inputs_external-DO-NOT-EDIT-ME + internal: + input: release_inputs_internal.template + output: release_inputs_internal-DO-NOT-EDIT-ME + + # Conseq files + conseq_files: + external: data_prep_pipeline/run_external.conseq + internal: data_prep_pipeline/run_internal.conseq diff --git a/pipeline/preprocess_taiga_ids.py b/pipeline/preprocess_taiga_ids.py index 1882e631b..185816d11 100644 --- a/pipeline/preprocess_taiga_ids.py +++ b/pipeline/preprocess_taiga_ids.py @@ -1,7 +1,7 @@ import sys import re from taigapy import create_taiga_client_v3 -import requests +import json import os # this script exists to rewrite any Taiga IDs into their canonical form. (This allows conseq to recognize when data files are the same by just comparing taiga IDs) @@ -12,6 +12,12 @@ tc = create_taiga_client_v3() +def _resolve_versioned_dataset_id(taiga_permaname): + if "." in taiga_permaname: + return taiga_permaname + return tc.get_latest_version_id(taiga_permaname) + + def _rewrite_stream(vars, in_name, in_lines, out_fd): fd = out_fd for line in in_lines: @@ -29,16 +35,31 @@ def _rewrite_stream(vars, in_name, in_lines, out_fd): variable_name = m.group(1) value = m.group(2) vars[variable_name] = value + # Export a variable for downstream consumers as RELEASE_PERMANAME when the variable is the release permaname + if variable_name == "virtual_permaname": + vars["RELEASE_PERMANAME"] = value + fd.write(f'let RELEASE_PERMANAME="{value}"\n') + + m = re.match("(.*)PREPROCESS_FORMAT_STR\\(([^ ,]+)\\)(.*)", line, re.DOTALL) + if m is not None: + line_prefix = m.group(1) + template = m.group(2) + line_suffix = m.group(3) + line = line_prefix + repr(json.loads(template.format(**vars))) + line_suffix m = re.match("(.*)PREPROCESS_TAIGA_ID\\(([^ ,]+)\\)(.*)", line, re.DOTALL) if m is not None: line_prefix = m.group(1) orig_taiga_dataset_var_name = m.group(2) line_suffix = m.group(3) + taiga_permaname = vars[orig_taiga_dataset_var_name] + taiga_dataset_id_with_latest_version = _resolve_versioned_dataset_id( + taiga_permaname + ) line = ( line_prefix + '"' - + vars[orig_taiga_dataset_var_name] + + taiga_dataset_id_with_latest_version + '"' + line_suffix ) @@ -48,17 +69,20 @@ def _rewrite_stream(vars, in_name, in_lines, out_fd): ) if m is not None: orig_taiga_dataset_var_name = m.group(2) - taiga_filename = m.group(3) line_prefix = m.group(1) line_suffix = m.group(4) - taiga_id = vars[orig_taiga_dataset_var_name] + "/" + taiga_filename + + taiga_filename = m.group(3) + taiga_permaname = vars[orig_taiga_dataset_var_name] + taiga_dataset_id_with_latest_version = _resolve_versioned_dataset_id( + taiga_permaname + ) + taiga_id = taiga_dataset_id_with_latest_version + "/" + taiga_filename try: - tc.get_canonical_id(taiga_id) + canonical = tc.get_canonical_id(taiga_id) except: print(f"failed to get data from canonical taiga id for {taiga_id}") - raise - line = line_prefix + '"' + tc.get_canonical_id(taiga_id) + '"' + line_suffix - + line = line_prefix + '"' + canonical + '"' + line_suffix fd.write(line) diff --git a/pipeline/_run_common.conseq b/pipeline/preprocessing-pipeline/_run_common.conseq similarity index 100% rename from pipeline/_run_common.conseq rename to pipeline/preprocessing-pipeline/_run_common.conseq diff --git a/pipeline/_run_dmc.conseq b/pipeline/preprocessing-pipeline/_run_dmc.conseq similarity index 100% rename from pipeline/_run_dmc.conseq rename to pipeline/preprocessing-pipeline/_run_dmc.conseq diff --git a/pipeline/_run_external.conseq b/pipeline/preprocessing-pipeline/_run_external.conseq similarity index 100% rename from pipeline/_run_external.conseq rename to pipeline/preprocessing-pipeline/_run_external.conseq diff --git a/pipeline/cell_lines.conseq b/pipeline/preprocessing-pipeline/cell_lines.conseq similarity index 100% rename from pipeline/cell_lines.conseq rename to pipeline/preprocessing-pipeline/cell_lines.conseq diff --git a/pipeline/celligner/celligner.conseq b/pipeline/preprocessing-pipeline/celligner/celligner.conseq similarity index 95% rename from pipeline/celligner/celligner.conseq rename to pipeline/preprocessing-pipeline/celligner/celligner.conseq index 7e895b4f5..f9ab4e709 100644 --- a/pipeline/celligner/celligner.conseq +++ b/pipeline/preprocessing-pipeline/celligner/celligner.conseq @@ -56,7 +56,7 @@ add-if-missing { rule process_celligner_inputs: executor: dsub { - "docker_image": "us.gcr.io/broad-achilles/celligner@sha256:890ca0afafea6fd7e40c79b7aef773b81dcf5919a5dee2d7eb63905990c55cbd", + "docker_image": "us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/celligner@sha256:890ca0afafea6fd7e40c79b7aef773b81dcf5919a5dee2d7eb63905990c55cbd", "min_ram": "100", "boot_disk_size": "70", "helper_path": "/opt/conseq/bin/conseq-helper" } diff --git a/pipeline/celligner/scripts/run_celligner.py b/pipeline/preprocessing-pipeline/celligner/scripts/run_celligner.py similarity index 98% rename from pipeline/celligner/scripts/run_celligner.py rename to pipeline/preprocessing-pipeline/celligner/scripts/run_celligner.py index 164715cfe..bf255c3a2 100644 --- a/pipeline/celligner/scripts/run_celligner.py +++ b/pipeline/preprocessing-pipeline/celligner/scripts/run_celligner.py @@ -548,20 +548,20 @@ def process_data(inputs, extra=True): print(depmap_data) # starting in 25Q2, some additional columns got added which will need to be dropped before proceeding. # the following should reformat the matrix to be the format we used to get from taiga prior to 25Q2 - - depmap_data = depmap_data[depmap_data.IsDefaultEntryForMC == 'Yes'] + + depmap_data = depmap_data[depmap_data.IsDefaultEntryForMC == "Yes"] depmap_data.index = depmap_data["ModelConditionID"] - depmap_data.drop(columns=["SequencingID", "IsDefaultEntryForModel", "ModelID"], inplace=True) + depmap_data.drop( + columns=["SequencingID", "IsDefaultEntryForModel", "ModelID"], inplace=True + ) warnings.warn("loading anns") depmap_ann = tc.get(inputs["depmap_ann"]["source_dataset_id"]) warnings.warn("loading model conds") depmap_model_cond = tc.get(inputs["depmap_model_cond"]["dataset_id"]) - depmap_out = process_depmap_ipts( - depmap_data, depmap_ann, depmap_model_cond - ) + depmap_out = process_depmap_ipts(depmap_data, depmap_ann, depmap_model_cond) # process tcga data into single input for celligner warnings.warn("loading tcga") @@ -627,4 +627,4 @@ def process_data(inputs, extra=True): pcs.to_csv("celligner_pcs.csv") corrected_expression.reset_index() print(corrected_expression.index) - corrected_expression.to_csv("corrected_expression.csv") \ No newline at end of file + corrected_expression.to_csv("corrected_expression.csv") diff --git a/pipeline/celligner/test-artifacts.conseq b/pipeline/preprocessing-pipeline/celligner/test-artifacts.conseq similarity index 100% rename from pipeline/celligner/test-artifacts.conseq rename to pipeline/preprocessing-pipeline/celligner/test-artifacts.conseq diff --git a/pipeline/celligner/test.conseq b/pipeline/preprocessing-pipeline/celligner/test.conseq similarity index 100% rename from pipeline/celligner/test.conseq rename to pipeline/preprocessing-pipeline/celligner/test.conseq diff --git a/pipeline/context_explorer/get_context_analysis.conseq b/pipeline/preprocessing-pipeline/context_explorer/get_context_analysis.conseq similarity index 100% rename from pipeline/context_explorer/get_context_analysis.conseq rename to pipeline/preprocessing-pipeline/context_explorer/get_context_analysis.conseq diff --git a/pipeline/context_explorer/get_context_analysis.py b/pipeline/preprocessing-pipeline/context_explorer/get_context_analysis.py similarity index 100% rename from pipeline/context_explorer/get_context_analysis.py rename to pipeline/preprocessing-pipeline/context_explorer/get_context_analysis.py diff --git a/pipeline/context_explorer/get_data_availability.conseq b/pipeline/preprocessing-pipeline/context_explorer/get_data_availability.conseq similarity index 100% rename from pipeline/context_explorer/get_data_availability.conseq rename to pipeline/preprocessing-pipeline/context_explorer/get_data_availability.conseq diff --git a/pipeline/context_explorer/get_data_availability.py b/pipeline/preprocessing-pipeline/context_explorer/get_data_availability.py similarity index 100% rename from pipeline/context_explorer/get_data_availability.py rename to pipeline/preprocessing-pipeline/context_explorer/get_data_availability.py diff --git a/pipeline/context_explorer/get_subtype_context_matrix.conseq b/pipeline/preprocessing-pipeline/context_explorer/get_subtype_context_matrix.conseq similarity index 100% rename from pipeline/context_explorer/get_subtype_context_matrix.conseq rename to pipeline/preprocessing-pipeline/context_explorer/get_subtype_context_matrix.conseq diff --git a/pipeline/context_explorer/get_subtype_tree.conseq b/pipeline/preprocessing-pipeline/context_explorer/get_subtype_tree.conseq similarity index 100% rename from pipeline/context_explorer/get_subtype_tree.conseq rename to pipeline/preprocessing-pipeline/context_explorer/get_subtype_tree.conseq diff --git a/pipeline/scripts/compounds/repurposing/filter_repurposing_data.py b/pipeline/preprocessing-pipeline/context_explorer/parallelized_get_context_analysis.json similarity index 100% rename from pipeline/scripts/compounds/repurposing/filter_repurposing_data.py rename to pipeline/preprocessing-pipeline/context_explorer/parallelized_get_context_analysis.json diff --git a/pipeline/context_explorer/test.conseq b/pipeline/preprocessing-pipeline/context_explorer/test.conseq similarity index 87% rename from pipeline/context_explorer/test.conseq rename to pipeline/preprocessing-pipeline/context_explorer/test.conseq index aca4510ef..cb98fa2c0 100644 --- a/pipeline/context_explorer/test.conseq +++ b/pipeline/preprocessing-pipeline/context_explorer/test.conseq @@ -1,5 +1,5 @@ add-if-missing { - "type": "depmap_data_taiga_id", + "type": "release_taiga_id", "dataset_id": "public-23q2-19de.101" } @@ -18,4 +18,4 @@ add-if-missing { } include "get_data_availability.conseq" -include "get_context_analysis.conseq" \ No newline at end of file +include "get_context_analysis.conseq" diff --git a/pipeline/cor_analysis/cor_analysis.conseq b/pipeline/preprocessing-pipeline/cor_analysis/cor_analysis.conseq similarity index 100% rename from pipeline/cor_analysis/cor_analysis.conseq rename to pipeline/preprocessing-pipeline/cor_analysis/cor_analysis.conseq diff --git a/pipeline/cor_analysis/correlation_with_qvalue.py b/pipeline/preprocessing-pipeline/cor_analysis/correlation_with_qvalue.py similarity index 100% rename from pipeline/cor_analysis/correlation_with_qvalue.py rename to pipeline/preprocessing-pipeline/cor_analysis/correlation_with_qvalue.py diff --git a/pipeline/cor_analysis/create_cor_analysis_pairs.py b/pipeline/preprocessing-pipeline/cor_analysis/create_cor_analysis_pairs.py similarity index 100% rename from pipeline/cor_analysis/create_cor_analysis_pairs.py rename to pipeline/preprocessing-pipeline/cor_analysis/create_cor_analysis_pairs.py diff --git a/pipeline/cor_analysis/dump_cor.py b/pipeline/preprocessing-pipeline/cor_analysis/dump_cor.py similarity index 100% rename from pipeline/cor_analysis/dump_cor.py rename to pipeline/preprocessing-pipeline/cor_analysis/dump_cor.py diff --git a/pipeline/preprocessing-pipeline/cor_analysis/test.conseq b/pipeline/preprocessing-pipeline/cor_analysis/test.conseq new file mode 100644 index 000000000..56287f24b --- /dev/null +++ b/pipeline/preprocessing-pipeline/cor_analysis/test.conseq @@ -0,0 +1,62 @@ +let DEFAULT_DOCKER_IMAGE = "us.gcr.io/broad-achilles/depmap-pipeline-run:ga2-build-8" + +add-if-missing { + "type": "config-file", + "name": "taiga-token", + "filename": {"$filename": "{{config.ENV['HOME']}}/.taiga/token"} +} + +# add-if-missing { +# 'type': 'cor-analysis-a', +# 'given_id': 'oncref-viability', +# 'taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceLog2ViabilityCollapsedMatrix', +# 'feature_id_format': 'compound+dose', +# 'features_taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceLog2ViabilityCollapsedConditions', +# 'compounds_taiga_id': 'internal-24q4-8c04.117/PortalCompounds' +# } + +add-if-missing { + 'type': 'cor-analysis-a', + 'given_id': 'oncref-auc', + 'taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceAUCMatrix', + 'feature_id_format': 'compound', + 'compounds_taiga_id': 'internal-24q4-8c04.117/PortalCompounds' + } + +#add-if-missing { +# 'type': 'cor-analysis-a', +# 'given_id': 'oncref-ic50', +# 'taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceLog2IC50Matrix', +# 'feature_id_format': 'compound', +# 'compounds_taiga_id': 'internal-24q4-8c04.117/PortalCompounds' +# } + +add-if-missing { +'type': 'cor-analysis-b', +'given_id': 'crispr', +'taiga_id': 'internal-24q4-8c04.117/CRISPRGeneEffect', +'feature_id_format': 'gene' +} + +# add-if-missing { +# 'type': 'cor-analysis-b', +# 'given_id': 'cn', +# 'taiga_id': 'internal-24q4-8c04.117/OmicsCNGene', +# 'feature_id_format': 'gene' +# } + +# add-if-missing { +# 'type': 'cor-analysis-b', +# 'given_id': 'expression', +# 'taiga_id': 'internal-24q4-8c04.117/OmicsExpressionProteinCodingGenesTPMLogp1', +# 'feature_id_format': 'gene' +# } + +include "cor_analysis.conseq" + + +#let publish_dest = "gs://preprocessing-pipeline-outputs/depmap-pipeline/pgm-test" +#let S3_STAGING_URL = "gs://preprocessing-pipeline-outputs/depmap-pipeline/pgm-test/staging" +#rule publish_cor_tables: +# inputs: cor_tables=all {"type": "cor_table"} +# publish: "{{config.publish_dest}}/cor_tables.json" diff --git a/pipeline/preprocessing-pipeline/cor_analysis/test2.conseq b/pipeline/preprocessing-pipeline/cor_analysis/test2.conseq new file mode 100644 index 000000000..3f5415360 --- /dev/null +++ b/pipeline/preprocessing-pipeline/cor_analysis/test2.conseq @@ -0,0 +1,41 @@ + +add-if-missing { + 'type': 'cor-analysis-a', + 'given_id': 'oncref-viability', + 'taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceLog2ViabilityCollapsedMatrix', + 'feature_id_format': 'compound+dose', + 'features_taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceLog2ViabilityCollapsedConditions', + 'compounds_taiga_id': 'internal-24q4-8c04.117/PortalCompounds' + } + +add-if-missing { + 'type': 'cor-analysis-a', + 'given_id': 'oncref-auc', + 'taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceAUCMatrix', + 'feature_id_format': 'compound', + 'compounds_taiga_id': 'internal-24q4-8c04.117/PortalCompounds' + } + +add-if-missing { + 'type': 'cor-analysis-a', + 'given_id': 'oncref-ic50', + 'taiga_id': 'internal-24q4-8c04.117/PRISMOncologyReferenceLog2IC50Matrix', + 'feature_id_format': 'compound', + 'compounds_taiga_id': 'internal-24q4-8c04.117/PortalCompounds' + } + +add-if-missing { +'type': 'cor-analysis-a', +'given_id': 'crispr', +'taiga_id': 'internal-24q4-8c04.117/CRISPRGeneEffect', +'feature_id_format': 'gene' +} + +add-if-missing { +'type': 'cor-analysis-b', +'given_id': 'RPPA500', +'taiga_id': 'tcpa-rppa-mdanderson-c699.2/TCPA_CCLE_RPPA500_with_ModelID', +'feature_id_format': 'rppa' +} + +include "cor_analysis.conseq" diff --git a/pipeline/create-biomarker-matrix-contexts.conseq b/pipeline/preprocessing-pipeline/create-biomarker-matrix-contexts.conseq similarity index 100% rename from pipeline/create-biomarker-matrix-contexts.conseq rename to pipeline/preprocessing-pipeline/create-biomarker-matrix-contexts.conseq diff --git a/pipeline/data_page/get_all_data_availability.conseq b/pipeline/preprocessing-pipeline/data_page/get_all_data_availability.conseq similarity index 86% rename from pipeline/data_page/get_all_data_availability.conseq rename to pipeline/preprocessing-pipeline/data_page/get_all_data_availability.conseq index b812f6c9f..2bc400102 100644 --- a/pipeline/data_page/get_all_data_availability.conseq +++ b/pipeline/preprocessing-pipeline/data_page/get_all_data_availability.conseq @@ -4,7 +4,7 @@ # won't manifest. Then, use the type field on each artifact to map it to the name the script is expecting rule get_all_data_availability: inputs: - artifacts=all {"type" ~ "depmap_data_taiga_id|depmap_oncref_taiga_id|rna_merged_version_taiga_id|rnai_drive_taiga_id|repurposing_matrix_taiga_id|ctd2-drug-taiga-id|gdsc_drug_taiga_id|raw-rppa-matrix|proteomics-raw|sanger_methylation_taiga_id|biomarker-correctly-transposed|ccle_mirna_taiga_id|ataq_seq_taiga_id|olink_taiga_id|sanger-proteomics|depmap_paralogs_taiga_id|depmap_long_reads_dataset"}, + artifacts=all {"type" ~ "release_taiga_id|depmap_oncref_taiga_id|rna_merged_version_taiga_id|rnai_drive_taiga_id|repurposing_matrix_taiga_id|ctd2-drug-taiga-id|gdsc_drug_taiga_id|raw-rppa-matrix|proteomics-raw|sanger_methylation_taiga_id|biomarker-correctly-transposed|ccle_mirna_taiga_id|ataq_seq_taiga_id|olink_taiga_id|sanger-proteomics|depmap_paralogs_taiga_id|depmap_long_reads_dataset"}, rnai_broad_only={"type": "raw-dep-matrix", "label": "RNAi_Ach"}, crispr_screen_sequence_map={"type": "crispr-screen-sequence-map"}, script=fileref("get_all_data_availability.py"), @@ -32,7 +32,7 @@ rule get_all_data_availability: # now unpack those inputs into ids the script was using for dest_name, type_name in [ - ('depmap_data_taiga_id','depmap_data_taiga_id'), + ('release_taiga_id','release_taiga_id'), ('oncref_taiga_id', 'depmap_oncref_taiga_id'), ('rnai_merged_version_taiga_id', 'rna_merged_version_taiga_id'), ('rnai_drive_taiga_id', 'rnai_drive_taiga_id'), diff --git a/pipeline/data_page/get_all_data_availability.py b/pipeline/preprocessing-pipeline/data_page/get_all_data_availability.py similarity index 98% rename from pipeline/data_page/get_all_data_availability.py rename to pipeline/preprocessing-pipeline/data_page/get_all_data_availability.py index b90bac770..3c76f3b2c 100644 --- a/pipeline/data_page/get_all_data_availability.py +++ b/pipeline/preprocessing-pipeline/data_page/get_all_data_availability.py @@ -2,7 +2,6 @@ import pandas as pd import argparse import json -from google.cloud import storage from taigapy import create_taiga_client_v3 @@ -475,7 +474,7 @@ def main( taiga_ids = json.load(input_json) # taiga ids - depmap_data_taiga_id = get_taiga_id(taiga_ids["depmap_data_taiga_id"]) + release_taiga_id = get_taiga_id(taiga_ids["release_taiga_id"]) depmap_oncref_taiga_id = get_taiga_id(taiga_ids["oncref_taiga_id"]) rnai_drive_taiga_id = get_taiga_id(taiga_ids["rnai_drive_taiga_id"]) repurposing_matrix_taiga_id = get_taiga_id(taiga_ids["repurposing_matrix_taiga_id"]) @@ -499,7 +498,7 @@ def main( ) tc = create_taiga_client_v3() - Model = tc.get(f"{depmap_data_taiga_id[0]}/Model") + Model = tc.get(f"{release_taiga_id[0]}/Model") assert Model is not None #################### @@ -592,7 +591,7 @@ def main( # WES (Broad), WES (Sanger), WGS (Broad), RNA (Broad) omics_summary = get_omics_summary( - tc=tc, omics_taiga_id=f"{depmap_data_taiga_id[0]}/OmicsProfiles" + tc=tc, omics_taiga_id=f"{release_taiga_id[0]}/OmicsProfiles" ) assert omics_summary.index.is_unique diff --git a/pipeline/data_page/test.conseq b/pipeline/preprocessing-pipeline/data_page/test.conseq similarity index 98% rename from pipeline/data_page/test.conseq rename to pipeline/preprocessing-pipeline/data_page/test.conseq index 63ca89a6b..e871ab7b5 100644 --- a/pipeline/data_page/test.conseq +++ b/pipeline/preprocessing-pipeline/data_page/test.conseq @@ -1,5 +1,5 @@ add-if-missing { - "type": "depmap_data_taiga_id", + "type": "release_taiga_id", "dataset_id": "internal-23q4-ac2b.67" } diff --git a/pipeline/dstat_wrapper.py b/pipeline/preprocessing-pipeline/dstat_wrapper.py similarity index 86% rename from pipeline/dstat_wrapper.py rename to pipeline/preprocessing-pipeline/dstat_wrapper.py index d1ab64c45..11048f372 100644 --- a/pipeline/dstat_wrapper.py +++ b/pipeline/preprocessing-pipeline/dstat_wrapper.py @@ -8,7 +8,13 @@ # whether a job is running or not # based on https://cloud.google.com/batch/docs/reference/rest/v1alpha/projects.locations.jobs#State -terminal_states = ["SUCCEEDED", "FAILED", "CANCELLED"] +terminal_states = [ + "SUCCEEDED", + "FAILED", + "CANCELLED", + "SCHEDULED_PENDING_FAILED", + "RUNNING_PENDING_FAILED", +] in_progress_state = [ "QUEUED", "SCHEDULED", @@ -27,7 +33,7 @@ prefix = "IN_PROGRESS" else: m = re.match( - "Job state is set from [A-Z]+ to ([A-Z]+) for job.*", status_message + "Job state is set from [A-Z_]+ to ([A-Z_]+) for job.*", status_message ) assert m is not None state = m.group(1) diff --git a/pipeline/examples/dsub-exec-profile/README.md b/pipeline/preprocessing-pipeline/examples/dsub-exec-profile/README.md similarity index 100% rename from pipeline/examples/dsub-exec-profile/README.md rename to pipeline/preprocessing-pipeline/examples/dsub-exec-profile/README.md diff --git a/pipeline/examples/dsub-exec-profile/docker/Dockerfile b/pipeline/preprocessing-pipeline/examples/dsub-exec-profile/docker/Dockerfile similarity index 100% rename from pipeline/examples/dsub-exec-profile/docker/Dockerfile rename to pipeline/preprocessing-pipeline/examples/dsub-exec-profile/docker/Dockerfile diff --git a/pipeline/examples/dsub-exec-profile/docker/build.sh b/pipeline/preprocessing-pipeline/examples/dsub-exec-profile/docker/build.sh similarity index 100% rename from pipeline/examples/dsub-exec-profile/docker/build.sh rename to pipeline/preprocessing-pipeline/examples/dsub-exec-profile/docker/build.sh diff --git a/pipeline/examples/dsub-exec-profile/sample.conseq b/pipeline/preprocessing-pipeline/examples/dsub-exec-profile/sample.conseq similarity index 100% rename from pipeline/examples/dsub-exec-profile/sample.conseq rename to pipeline/preprocessing-pipeline/examples/dsub-exec-profile/sample.conseq diff --git a/pipeline/exec.conseq b/pipeline/preprocessing-pipeline/exec.conseq similarity index 91% rename from pipeline/exec.conseq rename to pipeline/preprocessing-pipeline/exec.conseq index 8ae07d40a..6d3f70f5f 100644 --- a/pipeline/exec.conseq +++ b/pipeline/preprocessing-pipeline/exec.conseq @@ -1,6 +1,6 @@ let STAGING_URL = "gs://preprocessing-pipeline-outputs/conseq/depmap" -let DEFAULT_DOCKER_IMAGE = "us.gcr.io/broad-achilles/depmap-pipeline-run:ga2-build-13" -let DEFAULT_GCP_PROJECT = "broad-achilles" +let DEFAULT_DOCKER_IMAGE = "us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/depmap-pipeline-run:ga2-build-23" +let DEFAULT_GCP_PROJECT = "depmap-portal-pipeline" let DEFAULT_GCP_ZONE = "us-central*" let DSUB_EXE_PATH="dsub" let DSTAT_EXE_PATH="dstat" diff --git a/pipeline/jenkins-run-pipeline.sh b/pipeline/preprocessing-pipeline/jenkins-run-pipeline.sh similarity index 98% rename from pipeline/jenkins-run-pipeline.sh rename to pipeline/preprocessing-pipeline/jenkins-run-pipeline.sh index 2ddbb02f5..f03041a01 100755 --- a/pipeline/jenkins-run-pipeline.sh +++ b/pipeline/preprocessing-pipeline/jenkins-run-pipeline.sh @@ -30,7 +30,7 @@ fi # set DOCKER_IMAGE from pipeline-run-docker/image-name SCRIPT_PATH=`dirname $0` -source "$SCRIPT_PATH/build-pipeline-docker-images/pipeline-run-docker/image-name" +source "$SCRIPT_PATH/image-name" COMMIT_SHA=`git rev-parse HEAD` if [ "${COMMIT_SHA}" == "" ]; then diff --git a/pipeline/make_compound_summary_table.conseq b/pipeline/preprocessing-pipeline/make_compound_summary_table.conseq similarity index 100% rename from pipeline/make_compound_summary_table.conseq rename to pipeline/preprocessing-pipeline/make_compound_summary_table.conseq diff --git a/pipeline/oncokb_import.conseq b/pipeline/preprocessing-pipeline/oncokb_import.conseq similarity index 100% rename from pipeline/oncokb_import.conseq rename to pipeline/preprocessing-pipeline/oncokb_import.conseq diff --git a/pipeline/poetry.lock b/pipeline/preprocessing-pipeline/poetry.lock similarity index 100% rename from pipeline/poetry.lock rename to pipeline/preprocessing-pipeline/poetry.lock diff --git a/pipeline/predictability/README.md b/pipeline/preprocessing-pipeline/predictability/README.md similarity index 100% rename from pipeline/predictability/README.md rename to pipeline/preprocessing-pipeline/predictability/README.md diff --git a/pipeline/predictability/model-config.yaml b/pipeline/preprocessing-pipeline/predictability/model-config.yaml similarity index 100% rename from pipeline/predictability/model-config.yaml rename to pipeline/preprocessing-pipeline/predictability/model-config.yaml diff --git a/pipeline/predictability/predictability.conseq b/pipeline/preprocessing-pipeline/predictability/predictability.conseq similarity index 97% rename from pipeline/predictability/predictability.conseq rename to pipeline/preprocessing-pipeline/predictability/predictability.conseq index 7753e89b6..7473a1938 100644 --- a/pipeline/predictability/predictability.conseq +++ b/pipeline/preprocessing-pipeline/predictability/predictability.conseq @@ -11,11 +11,6 @@ # rules take in pred-biomarker-matrix-csv and pred-dep-matrix as inputs let testpred="" -if "(config.get('publish_dest', 'INVALID')) != 'INVALID' and ('test-perf' not in config.get('publish_dest', 'INVALID'))": -# run in testing mode if we're on the test-perf branch because we don't really need predictability results to test most of the -# functionality -let testpred="true" -endif # Create different model configs for each target matrix (artifacts of type dep-matrix-ftr-unfiltered) rule process_model_config: @@ -132,7 +127,7 @@ rule download_dep_matrix: # Create the "y" matrix containing the genes to predict. rule filter_dep_ftr: - executor: dsub {"min_ram":"10", "docker_image": "us.gcr.io/broad-achilles/depmap-pipeline-tda:v10"} + executor: dsub {"min_ram":"10", "docker_image": "us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/depmap-pipeline-tda:v10"} inputs: dep={"type": "dep-matrix-ftr-unfiltered"} outputs: @@ -377,7 +372,7 @@ rule prep_rppa_pred_biomarker_matrix: # Construct the feature matrix for each target matrix (and its corresponding model config) rule assemble_feature_matrix: - executor: dsub {"min_ram":"15", "docker_image": "us.gcr.io/broad-achilles/depmap-pipeline-tda:v10"} + executor: dsub {"min_ram":"15", "docker_image": "us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/depmap-pipeline-tda:v10"} inputs: expression={'type': 'pred-biomarker-matrix-csv', 'category': 'expression'}, # ssgsea={'type': 'pred-biomarker-matrix-csv', 'category': 'ssgsea'}, @@ -562,7 +557,7 @@ rule fit_predictive_model: {{ config.sparkles_path }} \ --config {{ inputs.sparkles_config.filename }} \ sub \ - -i us.gcr.io/broad-achilles/depmap-pipeline-tda:v6 \ + -i us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/depmap-pipeline-tda:v6 \ -u {{ inputs.dep.filename }}:target.ftr \ -u {{ inputs.model_config.filename }}:model-config.yaml \ -u {{ inputs.ensemble_feature_set.filename }}:X.ftr \ diff --git a/pipeline/predictability/scripts/confounders.py b/pipeline/preprocessing-pipeline/predictability/scripts/confounders.py similarity index 100% rename from pipeline/predictability/scripts/confounders.py rename to pipeline/preprocessing-pipeline/predictability/scripts/confounders.py diff --git a/pipeline/predictability/scripts/gather_ensemble_tasks.py b/pipeline/preprocessing-pipeline/predictability/scripts/gather_ensemble_tasks.py similarity index 100% rename from pipeline/predictability/scripts/gather_ensemble_tasks.py rename to pipeline/preprocessing-pipeline/predictability/scripts/gather_ensemble_tasks.py diff --git a/pipeline/predictability/scripts/partition_ensemble_inputs.py b/pipeline/preprocessing-pipeline/predictability/scripts/partition_ensemble_inputs.py similarity index 100% rename from pipeline/predictability/scripts/partition_ensemble_inputs.py rename to pipeline/preprocessing-pipeline/predictability/scripts/partition_ensemble_inputs.py diff --git a/pipeline/predictability/scripts/validate_jobs_complete.py b/pipeline/preprocessing-pipeline/predictability/scripts/validate_jobs_complete.py similarity index 100% rename from pipeline/predictability/scripts/validate_jobs_complete.py rename to pipeline/preprocessing-pipeline/predictability/scripts/validate_jobs_complete.py diff --git a/pipeline/predictability/test-artifacts.conseq b/pipeline/preprocessing-pipeline/predictability/test-artifacts.conseq similarity index 100% rename from pipeline/predictability/test-artifacts.conseq rename to pipeline/preprocessing-pipeline/predictability/test-artifacts.conseq diff --git a/pipeline/predictability/test.conseq b/pipeline/preprocessing-pipeline/predictability/test.conseq similarity index 100% rename from pipeline/predictability/test.conseq rename to pipeline/preprocessing-pipeline/predictability/test.conseq diff --git a/pipeline/pref_essential_genes.conseq b/pipeline/preprocessing-pipeline/pref_essential_genes.conseq similarity index 100% rename from pipeline/pref_essential_genes.conseq rename to pipeline/preprocessing-pipeline/pref_essential_genes.conseq diff --git a/pipeline/preprocess_raw_biom_matrix.conseq b/pipeline/preprocessing-pipeline/preprocess_raw_biom_matrix.conseq similarity index 100% rename from pipeline/preprocess_raw_biom_matrix.conseq rename to pipeline/preprocessing-pipeline/preprocess_raw_biom_matrix.conseq diff --git a/pipeline/preprocessing-pipeline/preprocessing_pipeline_runner.py b/pipeline/preprocessing-pipeline/preprocessing_pipeline_runner.py new file mode 100644 index 000000000..a73a34313 --- /dev/null +++ b/pipeline/preprocessing-pipeline/preprocessing_pipeline_runner.py @@ -0,0 +1,165 @@ +import argparse +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +# Add parent directory to path to import base class +sys.path.insert(0, str(Path(__file__).parent.parent)) +from base_pipeline_runner import PipelineRunner + + +class PreprocessingPipelineRunner(PipelineRunner): + """Pipeline runner for preprocessing pipeline.""" + + def map_environment_name(self, env_name): + """Map environment names to actual conseq file names.""" + env_mapping = self.config_data["pipelines"]["preprocessing"]["env_mapping"] + + # Handle test- prefix + if env_name.startswith("test-"): + return env_mapping["test-prefix"] + + mapped_name = env_mapping.get(env_name, env_name) + assert mapped_name, "Mapped environment name cannot be empty" + return mapped_name + + def create_argument_parser(self): + """Create argument parser for preprocessing pipeline.""" + parser = argparse.ArgumentParser( + description="Run preprocessing pipeline (Jenkins style)" + ) + + # Add common arguments + self.add_common_arguments(parser) + + # Add preprocessing-specific arguments + parser.add_argument( + "--publish-dest", help="S3/GCS path override for publishing" + ) + parser.add_argument("--export-path", help="Export path for conseq export") + parser.add_argument( + "--manually-run-conseq", + action="store_true", + help="If set args will be passed directly to conseq", + ) + parser.add_argument( + "--start-with", help="Start with existing export from GCS path" + ) + parser.add_argument( + "conseq_args", nargs="*", help="parameters to pass to conseq" + ) + return parser + + def get_pipeline_config(self, args): + """Get configuration for preprocessing pipeline.""" + # Build common config + config = self.build_common_config(args, "preprocessing") + + # Add preprocessing-specific config + config.update( + { + "conseq_args": args.conseq_args, + "manually_run_conseq": args.manually_run_conseq, + "start_with": args.start_with, + "publish_dest": args.publish_dest, + "export_path": args.export_path, + } + ) + + return config + + def create_override_conseq_file(self, env_name, publish_dest): + """Create an overridden conseq file with custom publish_dest.""" + mapped_env = self.map_environment_name(env_name) + original_conseq = f"run_{mapped_env}.conseq" + override_conseq = f"overriden-{original_conseq}" + + # Write new publish_dest line + with open(f"pipeline/preprocessing-pipeline/{override_conseq}", "w") as f: + f.write(f'let publish_dest = "{publish_dest}"\n') + + # Append original file content except for publish_dest lines + with open( + f"pipeline/preprocessing-pipeline/{original_conseq}", "r" + ) as original: + with open( + f"pipeline/preprocessing-pipeline/{override_conseq}", "a" + ) as override: + for line in original: + if not line.strip().startswith("let publish_dest"): + override.write(line) + + return override_conseq + + def get_conseq_file(self, config): + if config["publish_dest"]: + conseq_file = self.create_override_conseq_file( + config["env_name"], config["publish_dest"] + ) + print(f"Created override conseq file: {conseq_file}") + return conseq_file + else: + mapped_env = self.map_environment_name(config["env_name"]) + print("No S3 path override specified") + return f"run_{mapped_env}.conseq" + + def handle_special_features(self, config): + """Handle START_WITH functionality for preprocessing pipeline.""" + if config["start_with"]: + print(f"Starting with existing export: {config['start_with']}") + subprocess.run( + ["sudo", "chown", "-R", "ubuntu", "pipeline/preprocessing-pipeline"], + check=True, + ) + subprocess.run( + ["rm", "-rf", "pipeline/preprocessing-pipeline/state"], check=True + ) + + # Use gcloud storage cp with temporary service account activation + with tempfile.TemporaryDirectory() as temp_home: + env_with_temp_home = {**os.environ, "HOME": temp_home} + + # Activate service account + subprocess.run( + [ + "gcloud", + "auth", + "activate-service-account", + "--key-file", + f"{config['creds_dir']}/depmap-pipeline-runner.json", + ], + check=True, + env=env_with_temp_home, + ) + + # Download the export + subprocess.run( + [ + "gcloud", + "storage", + "cp", + config["start_with"], + "pipeline/preprocessing-pipeline/downloaded-export.conseq", + ], + check=True, + env=env_with_temp_home, + ) + + self.run_via_container("conseq run downloaded-export.conseq", config) + self.run_via_container("conseq forget --regex 'publish.*'", config) + + def handle_post_run_tasks(self, config): + """Handle export and report generation for preprocessing pipeline.""" + if config["export_path"]: + self.run_via_container( + f"conseq export {config['conseq_file']} {config['export_path']}", config + ) + self.run_via_container("conseq report html", config) + self.track_dataset_usage_from_conseq("pipeline/preprocessing-pipeline") + + +if __name__ == "__main__": + runner = PreprocessingPipelineRunner() + runner.run(Path(__file__)) diff --git a/pipeline/publish.conseq b/pipeline/preprocessing-pipeline/publish.conseq similarity index 97% rename from pipeline/publish.conseq rename to pipeline/preprocessing-pipeline/publish.conseq index 591b254e0..ce86d4702 100644 --- a/pipeline/publish.conseq +++ b/pipeline/preprocessing-pipeline/publish.conseq @@ -1,3 +1,6 @@ +rule publish_depmap_release2: + inputs: release={"type": "release_taiga_id"} + publish: "{{config.publish_dest}}/depmap-release.json" rule publish_dep_mats: inputs: dep=all {"type": "dep-matrix"} diff --git a/pipeline/pyproject.toml b/pipeline/preprocessing-pipeline/pyproject.toml similarity index 100% rename from pipeline/pyproject.toml rename to pipeline/preprocessing-pipeline/pyproject.toml diff --git a/pipeline/readme.md b/pipeline/preprocessing-pipeline/readme.md similarity index 97% rename from pipeline/readme.md rename to pipeline/preprocessing-pipeline/readme.md index 5eda5fa7c..2b00884c7 100644 --- a/pipeline/readme.md +++ b/pipeline/preprocessing-pipeline/readme.md @@ -24,7 +24,7 @@ Taiga IDs used by all environments should go into `xrefs_common.conseq`. Those d If you have a task which requires a large amount of memory or CPU, it's best to push it to the cloud. If it's an array job (ie: you want hundreds of jobs to run in parallel) you should have your rule run sparkles to submit the job. Always submit the job with a name that contains a hash of the inputs so that we can gracefully continue if the process is interrupted. (See the predictive pipeline for examples) -If you have individual tasks which should run in the cloud, you can mark then as using the `dsub` executor and specify the memory required and the image to use. For example: +If you have individual tasks which should run in the cloud, you can mark then as using the `dsub` executor and specify the memory required and the image to use. For example: ``` rule process_celligner_inputs: @@ -38,5 +38,3 @@ rule process_celligner_inputs: ``` Also, note always specify the image SHA so that we can track which version of the image was used. - - diff --git a/pipeline/reformat_deps.conseq b/pipeline/preprocessing-pipeline/reformat_deps.conseq similarity index 100% rename from pipeline/reformat_deps.conseq rename to pipeline/preprocessing-pipeline/reformat_deps.conseq diff --git a/pipeline/reformat_repurposing_data.conseq b/pipeline/preprocessing-pipeline/reformat_repurposing_data.conseq similarity index 99% rename from pipeline/reformat_repurposing_data.conseq rename to pipeline/preprocessing-pipeline/reformat_repurposing_data.conseq index d9c93a737..04e4791c7 100644 --- a/pipeline/reformat_repurposing_data.conseq +++ b/pipeline/preprocessing-pipeline/reformat_repurposing_data.conseq @@ -1,6 +1,3 @@ - - - rule reformat_repurposing_single_pt_data: inputs: script=fileref("scripts/compounds/repurposing/reformat_repurposing_single_pt.py"), data={'type': 'repallsinglept-taiga-id'}, @@ -13,4 +10,3 @@ rule reformat_repurposing_single_pt_data: "orig_dataset_id": "{{inputs.data.dataset_id}}" } run "python3 {{ inputs.script.filename }} {{ inputs.data.dataset_id }} {{inputs.data.label}}.hdf5" - diff --git a/pipeline/rules-to-skip b/pipeline/preprocessing-pipeline/rules-to-skip similarity index 100% rename from pipeline/rules-to-skip rename to pipeline/preprocessing-pipeline/rules-to-skip diff --git a/pipeline/run_dev.conseq b/pipeline/preprocessing-pipeline/run_dev.conseq similarity index 100% rename from pipeline/run_dev.conseq rename to pipeline/preprocessing-pipeline/run_dev.conseq diff --git a/pipeline/run_dqa.conseq b/pipeline/preprocessing-pipeline/run_dqa.conseq similarity index 100% rename from pipeline/run_dqa.conseq rename to pipeline/preprocessing-pipeline/run_dqa.conseq diff --git a/pipeline/run_external.conseq b/pipeline/preprocessing-pipeline/run_external.conseq similarity index 100% rename from pipeline/run_external.conseq rename to pipeline/preprocessing-pipeline/run_external.conseq diff --git a/pipeline/run_iqa.conseq b/pipeline/preprocessing-pipeline/run_iqa.conseq similarity index 92% rename from pipeline/run_iqa.conseq rename to pipeline/preprocessing-pipeline/run_iqa.conseq index 21f0d448a..06e820295 100644 --- a/pipeline/run_iqa.conseq +++ b/pipeline/preprocessing-pipeline/run_iqa.conseq @@ -3,4 +3,4 @@ include "_run_internal.conseq" # the only edits/additions that should take place here are changing the publish destination # please don't add anything else here. any added rules should go into _run_internal -# this exists just so that the iqa environment can have a separate publish destination \ No newline at end of file +# this exists just so that the iqa environment can have a separate publish destination diff --git a/pipeline/run_test.conseq b/pipeline/preprocessing-pipeline/run_test.conseq similarity index 100% rename from pipeline/run_test.conseq rename to pipeline/preprocessing-pipeline/run_test.conseq diff --git a/pipeline/run_xqa.conseq b/pipeline/preprocessing-pipeline/run_xqa.conseq similarity index 100% rename from pipeline/run_xqa.conseq rename to pipeline/preprocessing-pipeline/run_xqa.conseq diff --git a/pipeline/scripts/achilles_lfc_per_cell_line.py b/pipeline/preprocessing-pipeline/scripts/achilles_lfc_per_cell_line.py similarity index 100% rename from pipeline/scripts/achilles_lfc_per_cell_line.py rename to pipeline/preprocessing-pipeline/scripts/achilles_lfc_per_cell_line.py diff --git a/pipeline/scripts/add_arxspan_to_col.py b/pipeline/preprocessing-pipeline/scripts/add_arxspan_to_col.py similarity index 100% rename from pipeline/scripts/add_arxspan_to_col.py rename to pipeline/preprocessing-pipeline/scripts/add_arxspan_to_col.py diff --git a/pipeline/scripts/cleanup_dataframe.py b/pipeline/preprocessing-pipeline/scripts/cleanup_dataframe.py similarity index 100% rename from pipeline/scripts/cleanup_dataframe.py rename to pipeline/preprocessing-pipeline/scripts/cleanup_dataframe.py diff --git a/pipeline/scripts/cleanup_hdf5.py b/pipeline/preprocessing-pipeline/scripts/cleanup_hdf5.py similarity index 100% rename from pipeline/scripts/cleanup_hdf5.py rename to pipeline/preprocessing-pipeline/scripts/cleanup_hdf5.py diff --git a/pipeline/scripts/compound_summary_merge.py b/pipeline/preprocessing-pipeline/scripts/compound_summary_merge.py similarity index 100% rename from pipeline/scripts/compound_summary_merge.py rename to pipeline/preprocessing-pipeline/scripts/compound_summary_merge.py diff --git a/pipeline/scripts/compounds/repurposing/reformat_dose_level_repurposing_secondary.py b/pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_dose_level_repurposing_secondary.py similarity index 100% rename from pipeline/scripts/compounds/repurposing/reformat_dose_level_repurposing_secondary.py rename to pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_dose_level_repurposing_secondary.py diff --git a/pipeline/scripts/compounds/repurposing/reformat_dose_replicate_level_repurposing_secondary.py b/pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_dose_replicate_level_repurposing_secondary.py similarity index 100% rename from pipeline/scripts/compounds/repurposing/reformat_dose_replicate_level_repurposing_secondary.py rename to pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_dose_replicate_level_repurposing_secondary.py diff --git a/pipeline/scripts/compounds/repurposing/reformat_rep_all_single_pt.py b/pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_rep_all_single_pt.py similarity index 100% rename from pipeline/scripts/compounds/repurposing/reformat_rep_all_single_pt.py rename to pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_rep_all_single_pt.py diff --git a/pipeline/scripts/compounds/repurposing/reformat_repurposing_single_pt.py b/pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_repurposing_single_pt.py similarity index 100% rename from pipeline/scripts/compounds/repurposing/reformat_repurposing_single_pt.py rename to pipeline/preprocessing-pipeline/scripts/compounds/repurposing/reformat_repurposing_single_pt.py diff --git a/pipeline/scripts/compute_enrichment.py b/pipeline/preprocessing-pipeline/scripts/compute_enrichment.py similarity index 100% rename from pipeline/scripts/compute_enrichment.py rename to pipeline/preprocessing-pipeline/scripts/compute_enrichment.py diff --git a/pipeline/scripts/compute_hash.py b/pipeline/preprocessing-pipeline/scripts/compute_hash.py similarity index 100% rename from pipeline/scripts/compute_hash.py rename to pipeline/preprocessing-pipeline/scripts/compute_hash.py diff --git a/pipeline/scripts/correlation.py b/pipeline/preprocessing-pipeline/scripts/correlation.py similarity index 100% rename from pipeline/scripts/correlation.py rename to pipeline/preprocessing-pipeline/scripts/correlation.py diff --git a/pipeline/scripts/count_dep_lines.py b/pipeline/preprocessing-pipeline/scripts/count_dep_lines.py similarity index 100% rename from pipeline/scripts/count_dep_lines.py rename to pipeline/preprocessing-pipeline/scripts/count_dep_lines.py diff --git a/pipeline/scripts/create-dose-replicate-level-hdf5.py b/pipeline/preprocessing-pipeline/scripts/create-dose-replicate-level-hdf5.py similarity index 100% rename from pipeline/scripts/create-dose-replicate-level-hdf5.py rename to pipeline/preprocessing-pipeline/scripts/create-dose-replicate-level-hdf5.py diff --git a/pipeline/scripts/create_cor_analysis_pairs.py b/pipeline/preprocessing-pipeline/scripts/create_cor_analysis_pairs.py similarity index 100% rename from pipeline/scripts/create_cor_analysis_pairs.py rename to pipeline/preprocessing-pipeline/scripts/create_cor_analysis_pairs.py diff --git a/pipeline/scripts/download_mutation_boolean_matrix.py b/pipeline/preprocessing-pipeline/scripts/download_mutation_boolean_matrix.py similarity index 100% rename from pipeline/scripts/download_mutation_boolean_matrix.py rename to pipeline/preprocessing-pipeline/scripts/download_mutation_boolean_matrix.py diff --git a/pipeline/scripts/expand_drug_metadata.py b/pipeline/preprocessing-pipeline/scripts/expand_drug_metadata.py similarity index 100% rename from pipeline/scripts/expand_drug_metadata.py rename to pipeline/preprocessing-pipeline/scripts/expand_drug_metadata.py diff --git a/pipeline/scripts/format_cell_lines.py b/pipeline/preprocessing-pipeline/scripts/format_cell_lines.py similarity index 100% rename from pipeline/scripts/format_cell_lines.py rename to pipeline/preprocessing-pipeline/scripts/format_cell_lines.py diff --git a/pipeline/scripts/format_cell_lines_v2.py b/pipeline/preprocessing-pipeline/scripts/format_cell_lines_v2.py similarity index 100% rename from pipeline/scripts/format_cell_lines_v2.py rename to pipeline/preprocessing-pipeline/scripts/format_cell_lines_v2.py diff --git a/pipeline/scripts/format_models.py b/pipeline/preprocessing-pipeline/scripts/format_models.py similarity index 100% rename from pipeline/scripts/format_models.py rename to pipeline/preprocessing-pipeline/scripts/format_models.py diff --git a/pipeline/scripts/gene_score_confidence.py b/pipeline/preprocessing-pipeline/scripts/gene_score_confidence.py similarity index 100% rename from pipeline/scripts/gene_score_confidence.py rename to pipeline/preprocessing-pipeline/scripts/gene_score_confidence.py diff --git a/pipeline/scripts/hdf5_utils.py b/pipeline/preprocessing-pipeline/scripts/hdf5_utils.py similarity index 100% rename from pipeline/scripts/hdf5_utils.py rename to pipeline/preprocessing-pipeline/scripts/hdf5_utils.py diff --git a/pipeline/scripts/make_biomarker_matrix_contexts.py b/pipeline/preprocessing-pipeline/scripts/make_biomarker_matrix_contexts.py similarity index 100% rename from pipeline/scripts/make_biomarker_matrix_contexts.py rename to pipeline/preprocessing-pipeline/scripts/make_biomarker_matrix_contexts.py diff --git a/pipeline/scripts/make_fusions_matrix.py b/pipeline/preprocessing-pipeline/scripts/make_fusions_matrix.py similarity index 85% rename from pipeline/scripts/make_fusions_matrix.py rename to pipeline/preprocessing-pipeline/scripts/make_fusions_matrix.py index 7aa84c9b7..57199dbda 100644 --- a/pipeline/scripts/make_fusions_matrix.py +++ b/pipeline/preprocessing-pipeline/scripts/make_fusions_matrix.py @@ -6,6 +6,7 @@ from taigapy import create_taiga_client_v3 from omics_preprocessing_utils import preprocess_omics_dataframe + def extract_id(x): m = re.match(r"\S+ \(([^.]+)\.\d+\)", x) if m is None: @@ -21,6 +22,9 @@ def main(): parser.add_argument("fusions_dataset_id") parser.add_argument("hgnc_dataset_id") parser.add_argument("out_csv") + + # if --rows-per-model flag is provided, then transpose the matrix + parser.add_argument("--rows-per-model", action="store_true", default=False) args = parser.parse_args() tc = create_taiga_client_v3() @@ -40,6 +44,9 @@ def main(): fill_value=0, ) + if args.rows_per_model: + one_hot = one_hot.T + one_hot.to_csv(args.out_csv) diff --git a/pipeline/scripts/omics_preprocessing_utils.py b/pipeline/preprocessing-pipeline/scripts/omics_preprocessing_utils.py similarity index 81% rename from pipeline/scripts/omics_preprocessing_utils.py rename to pipeline/preprocessing-pipeline/scripts/omics_preprocessing_utils.py index 97ba2e9dd..4acbdcbce 100644 --- a/pipeline/scripts/omics_preprocessing_utils.py +++ b/pipeline/preprocessing-pipeline/scripts/omics_preprocessing_utils.py @@ -7,29 +7,37 @@ def preprocess_omics_dataframe(df, dataset_id): 4. Set ModelID as index 5. Drop columns with all NaN values """ - + # Check if this dataframe needs preprocessing (has the required columns) if "IsDefaultEntryForModel" not in df.columns: - print(f"No IsDefaultEntryForModel column found in {dataset_id}, skipping preprocessing") + print( + f"No IsDefaultEntryForModel column found in {dataset_id}, skipping preprocessing" + ) return df - + print(f"Preprocessing {dataset_id}...") print("Filtering to default entries per model...") filtered_df = df[df["IsDefaultEntryForModel"] == "Yes"].copy() dataset_name = dataset_id.split("/")[-1] - if dataset_name in ["OmicsFusionFiltered", "OmicsProfiles", "OmicsSomaticMutations"]: + if dataset_name in [ + "OmicsFusionFiltered", + "OmicsProfiles", + "OmicsSomaticMutations", + ]: print(f"Warning: {dataset_id} has multiple entries per ModelID") else: - assert not filtered_df["ModelID"].duplicated().any(), f"Duplicate ModelID after filtering in {dataset_id}" + assert ( + not filtered_df["ModelID"].duplicated().any() + ), f"Duplicate ModelID after filtering in {dataset_id}" print("Setting ModelID as index...") filtered_df = filtered_df.set_index("ModelID") filtered_df.index.name = None - + print("Dropping some metadata columns...") cols_to_drop = [ "SequencingID", - "ModelConditionID", + "ModelConditionID", "IsDefaultEntryForModel", "IsDefaultEntryForMC", ] @@ -39,12 +47,12 @@ def preprocess_omics_dataframe(df, dataset_id): count_all_na_columns = filtered_df.isna().all().sum() print(f"Number of columns with ALL NA values: {count_all_na_columns}") - + if count_all_na_columns > 0: print(f"Data shape before dropping: {filtered_df.shape}") print("Dropping columns with all NaN values...") filtered_df = filtered_df.dropna(axis=1, how="all") print(f"Data shape after dropping: {filtered_df.shape}") - + print(f"Finished preprocessing {dataset_id}") return filtered_df diff --git a/pipeline/scripts/oncokb_maf_annotator.py b/pipeline/preprocessing-pipeline/scripts/oncokb_maf_annotator.py similarity index 100% rename from pipeline/scripts/oncokb_maf_annotator.py rename to pipeline/preprocessing-pipeline/scripts/oncokb_maf_annotator.py diff --git a/pipeline/scripts/oncref_cpd_doses_from_treatment_metadata.py b/pipeline/preprocessing-pipeline/scripts/oncref_cpd_doses_from_treatment_metadata.py similarity index 100% rename from pipeline/scripts/oncref_cpd_doses_from_treatment_metadata.py rename to pipeline/preprocessing-pipeline/scripts/oncref_cpd_doses_from_treatment_metadata.py diff --git a/pipeline/scripts/pref_essential_genes_to_csv.py b/pipeline/preprocessing-pipeline/scripts/pref_essential_genes_to_csv.py similarity index 100% rename from pipeline/scripts/pref_essential_genes_to_csv.py rename to pipeline/preprocessing-pipeline/scripts/pref_essential_genes_to_csv.py diff --git a/pipeline/scripts/process_drug_screen_auc_matrix.py b/pipeline/preprocessing-pipeline/scripts/process_drug_screen_auc_matrix.py similarity index 100% rename from pipeline/scripts/process_drug_screen_auc_matrix.py rename to pipeline/preprocessing-pipeline/scripts/process_drug_screen_auc_matrix.py diff --git a/pipeline/scripts/process_drug_screen_drc.py b/pipeline/preprocessing-pipeline/scripts/process_drug_screen_drc.py similarity index 100% rename from pipeline/scripts/process_drug_screen_drc.py rename to pipeline/preprocessing-pipeline/scripts/process_drug_screen_drc.py diff --git a/pipeline/scripts/process_expr_matrix.py b/pipeline/preprocessing-pipeline/scripts/process_expr_matrix.py similarity index 99% rename from pipeline/scripts/process_expr_matrix.py rename to pipeline/preprocessing-pipeline/scripts/process_expr_matrix.py index b3870ee7b..a16d6bbb7 100644 --- a/pipeline/scripts/process_expr_matrix.py +++ b/pipeline/preprocessing-pipeline/scripts/process_expr_matrix.py @@ -14,7 +14,7 @@ tc = create_taiga_client_v3() mat = tc.get(dataset_id) mat = preprocess_omics_dataframe(mat, dataset_id) - + maxval = np.nanmax(mat.values) # Find the maximum value in the matrix, ignoring NaNs print(f"Maxval: {maxval}") diff --git a/pipeline/scripts/process_rppa_matrix.py b/pipeline/preprocessing-pipeline/scripts/process_rppa_matrix.py similarity index 100% rename from pipeline/scripts/process_rppa_matrix.py rename to pipeline/preprocessing-pipeline/scripts/process_rppa_matrix.py diff --git a/pipeline/scripts/process_viability_dataset_from_taiga.py b/pipeline/preprocessing-pipeline/scripts/process_viability_dataset_from_taiga.py similarity index 100% rename from pipeline/scripts/process_viability_dataset_from_taiga.py rename to pipeline/preprocessing-pipeline/scripts/process_viability_dataset_from_taiga.py diff --git a/pipeline/scripts/proteomics.py b/pipeline/preprocessing-pipeline/scripts/proteomics.py similarity index 100% rename from pipeline/scripts/proteomics.py rename to pipeline/preprocessing-pipeline/scripts/proteomics.py diff --git a/pipeline/scripts/rep_all_single_pt_cpd_doses_from_treatment_metadata.py b/pipeline/preprocessing-pipeline/scripts/rep_all_single_pt_cpd_doses_from_treatment_metadata.py similarity index 100% rename from pipeline/scripts/rep_all_single_pt_cpd_doses_from_treatment_metadata.py rename to pipeline/preprocessing-pipeline/scripts/rep_all_single_pt_cpd_doses_from_treatment_metadata.py diff --git a/pipeline/scripts/sanger_proteomics.py b/pipeline/preprocessing-pipeline/scripts/sanger_proteomics.py similarity index 100% rename from pipeline/scripts/sanger_proteomics.py rename to pipeline/preprocessing-pipeline/scripts/sanger_proteomics.py diff --git a/pipeline/scripts/summarize_gene_deps.py b/pipeline/preprocessing-pipeline/scripts/summarize_gene_deps.py similarity index 100% rename from pipeline/scripts/summarize_gene_deps.py rename to pipeline/preprocessing-pipeline/scripts/summarize_gene_deps.py diff --git a/pipeline/scripts/tda/CE_percentile_rank_analysis.py b/pipeline/preprocessing-pipeline/scripts/tda/CE_percentile_rank_analysis.py similarity index 100% rename from pipeline/scripts/tda/CE_percentile_rank_analysis.py rename to pipeline/preprocessing-pipeline/scripts/tda/CE_percentile_rank_analysis.py diff --git a/pipeline/scripts/tda/ensemble/download_dep_prob_matrix.py b/pipeline/preprocessing-pipeline/scripts/tda/ensemble/download_dep_prob_matrix.py similarity index 100% rename from pipeline/scripts/tda/ensemble/download_dep_prob_matrix.py rename to pipeline/preprocessing-pipeline/scripts/tda/ensemble/download_dep_prob_matrix.py diff --git a/pipeline/scripts/tda/ensemble/interpretable_models.py b/pipeline/preprocessing-pipeline/scripts/tda/ensemble/interpretable_models.py similarity index 100% rename from pipeline/scripts/tda/ensemble/interpretable_models.py rename to pipeline/preprocessing-pipeline/scripts/tda/ensemble/interpretable_models.py diff --git a/pipeline/scripts/tda/lrt/LRT.R b/pipeline/preprocessing-pipeline/scripts/tda/lrt/LRT.R similarity index 100% rename from pipeline/scripts/tda/lrt/LRT.R rename to pipeline/preprocessing-pipeline/scripts/tda/lrt/LRT.R diff --git a/pipeline/scripts/tda/lrt/compile_LRT_tasks.py b/pipeline/preprocessing-pipeline/scripts/tda/lrt/compile_LRT_tasks.py similarity index 100% rename from pipeline/scripts/tda/lrt/compile_LRT_tasks.py rename to pipeline/preprocessing-pipeline/scripts/tda/lrt/compile_LRT_tasks.py diff --git a/pipeline/scripts/tda/lrt/lrt.sh b/pipeline/preprocessing-pipeline/scripts/tda/lrt/lrt.sh similarity index 97% rename from pipeline/scripts/tda/lrt/lrt.sh rename to pipeline/preprocessing-pipeline/scripts/tda/lrt/lrt.sh index 15176d8f9..d1ddf904d 100755 --- a/pipeline/scripts/tda/lrt/lrt.sh +++ b/pipeline/preprocessing-pipeline/scripts/tda/lrt/lrt.sh @@ -55,7 +55,7 @@ job_name=lrt-`cat job-hash.txt` # away from it. Given that, I don't think it's worth any additional investigation and I'm electing to just # continue using the old docker image until the day we can abandon LRT. #LRT_DOCKER_IMAGE="us.gcr.io/broad-achilles/depmap-pipeline-tda-lrt:v3" -LRT_DOCKER_IMAGE=us.gcr.io/broad-achilles/tda-pipeline:v2 +LRT_DOCKER_IMAGE=us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/tda-pipeline:v2 #Submit job eval "$sparkles_path" \ diff --git a/pipeline/scripts/tda/lrt/partition-lrt-input.py b/pipeline/preprocessing-pipeline/scripts/tda/lrt/partition-lrt-input.py similarity index 100% rename from pipeline/scripts/tda/lrt/partition-lrt-input.py rename to pipeline/preprocessing-pipeline/scripts/tda/lrt/partition-lrt-input.py diff --git a/pipeline/scripts/tda/moments.py b/pipeline/preprocessing-pipeline/scripts/tda/moments.py similarity index 100% rename from pipeline/scripts/tda/moments.py rename to pipeline/preprocessing-pipeline/scripts/tda/moments.py diff --git a/pipeline/scripts/test_merge_cpd_data.py b/pipeline/preprocessing-pipeline/scripts/test_merge_cpd_data.py similarity index 100% rename from pipeline/scripts/test_merge_cpd_data.py rename to pipeline/preprocessing-pipeline/scripts/test_merge_cpd_data.py diff --git a/pipeline/sparkles-config b/pipeline/preprocessing-pipeline/sparkles-config similarity index 61% rename from pipeline/sparkles-config rename to pipeline/preprocessing-pipeline/sparkles-config index 4d95c3c9f..4fafeb311 100644 --- a/pipeline/sparkles-config +++ b/pipeline/preprocessing-pipeline/sparkles-config @@ -1,7 +1,7 @@ [config] -default_url_prefix=gs://broad-achilles-kubeque/depmap-pipeline -project=broad-achilles -default_image=us.gcr.io/broad-achilles/tda-pipeline:v2 +default_url_prefix=gs://dpp-sparkles/depmap-pipeline +project=depmap-portal-pipeline +default_image=us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/tda-pipeline:v2 region=us-central1 machine_type=n2-highmem-2 account=856306709302-compute@developer.gserviceaccount.com diff --git a/pipeline/preprocessing-pipeline/sparkles-config-n1-highmem-4 b/pipeline/preprocessing-pipeline/sparkles-config-n1-highmem-4 new file mode 100644 index 000000000..5900e22b7 --- /dev/null +++ b/pipeline/preprocessing-pipeline/sparkles-config-n1-highmem-4 @@ -0,0 +1,12 @@ +[config] +cas_url_prefix=gs://dpp-sparkles/test/cas +default_url_prefix=gs://dpp-sparkles/depmap-pipeline +project=depmap-portal-pipeline +default_image=us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/tda-pipeline:v2 +region=us-central1 +machine_type=n2-highmem-4 +mount_1_type=pd-standard +mount_1_size_in_gb=50 +boot_volume_in_gb=40 +sparklesworker_image=us-central1-docker.pkg.dev/cds-docker-containers/docker/sparklesworker:5.0.0-alpha3 +account=pmontgom@broadinstitute.org diff --git a/pipeline/tda.conseq b/pipeline/preprocessing-pipeline/tda.conseq similarity index 95% rename from pipeline/tda.conseq rename to pipeline/preprocessing-pipeline/tda.conseq index c4cd15b64..c5c017a09 100644 --- a/pipeline/tda.conseq +++ b/pipeline/preprocessing-pipeline/tda.conseq @@ -33,7 +33,7 @@ rule get_crispr_inferred_common_essentials: rule common_essentials: - executor: dsub {"docker_image": "us.gcr.io/broad-achilles/depmap-pipeline-tda:v10", "min_ram": "10"} + executor: dsub {"docker_image": "us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/depmap-pipeline-tda:v10", "min_ram": "10"} inputs: data={'type': 'dep-matrix-csv'}, crispr_inferred_common_essentials={'type': 'crispr-inferred-common-essentials-file'}, @@ -44,7 +44,7 @@ rule common_essentials: rule dep_moments: - executor: dsub {"docker_image": "us.gcr.io/broad-achilles/depmap-pipeline-tda:v10", "min_ram": "10"} + executor: dsub {"docker_image": "us-central1-docker.pkg.dev/depmap-consortium/depmap-docker-images/depmap-pipeline-tda:v10", "min_ram": "10"} inputs: data={'type': 'dep-matrix-csv'}, script=fileref('scripts/tda/moments.py') diff --git a/pipeline/tda_table_generator.conseq b/pipeline/preprocessing-pipeline/tda_table_generator.conseq similarity index 100% rename from pipeline/tda_table_generator.conseq rename to pipeline/preprocessing-pipeline/tda_table_generator.conseq diff --git a/pipeline/validation.conseq b/pipeline/preprocessing-pipeline/validation.conseq similarity index 100% rename from pipeline/validation.conseq rename to pipeline/preprocessing-pipeline/validation.conseq diff --git a/pipeline/xrefs-common.conseq b/pipeline/preprocessing-pipeline/xrefs-common.conseq similarity index 100% rename from pipeline/xrefs-common.conseq rename to pipeline/preprocessing-pipeline/xrefs-common.conseq diff --git a/pipeline/xrefs-dmc.template b/pipeline/preprocessing-pipeline/xrefs-dmc.template similarity index 100% rename from pipeline/xrefs-dmc.template rename to pipeline/preprocessing-pipeline/xrefs-dmc.template diff --git a/pipeline/preprocessing-pipeline/xrefs-external.template b/pipeline/preprocessing-pipeline/xrefs-external.template new file mode 100644 index 000000000..2dd26009d --- /dev/null +++ b/pipeline/preprocessing-pipeline/xrefs-external.template @@ -0,0 +1,3 @@ +include "xrefs-common.conseq" +# TAIGA_PREPROCESSOR_INCLUDE "../external.template" +# TAIGA_PREPROCESSOR_INCLUDE "xrefs-public.template" diff --git a/pipeline/preprocessing-pipeline/xrefs-public.template b/pipeline/preprocessing-pipeline/xrefs-public.template new file mode 100644 index 000000000..e4ccbe4a1 --- /dev/null +++ b/pipeline/preprocessing-pipeline/xrefs-public.template @@ -0,0 +1,216 @@ +# SET_TAIGA_PREPROCESSOR repurposing_taiga_id "repurposing-public-24q2-875f.4" + +add-if-missing { + "type": "release_taiga_id", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname) +} + +# The following 3 artifacts are necessary for the Context Explorer preprocessing +# scripts to run successfully +add-if-missing { + "type": "subtype_tree", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "SubtypeTree"), +} + +add-if-missing { + "type": "omics_profiles", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsProfiles") +} + +add-if-missing { + "type": "subtype_context_matrix", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "SubtypeMatrix"), +} + + +# Sample info file +add-if-missing { + "type": "sample_info_dataset_id", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "Model"), +} + +add-if-missing { + "type": "crispr-confounder-parameters", + "achilles_qc_report_taiga_id": PREPROCESS_TAIGA_ID(virtual_permaname, "AchillesScreenQCReport"), + "crispr_screen_map_taiga_id": PREPROCESS_TAIGA_ID(virtual_permaname, "CRISPRScreenMap") +} + +# Chronos Combined +add-if-missing { + "type": "raw-dep-prob-matrix", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "CRISPRGeneDependency"), + "label": "Chronos_Combined", + "rows": "cell-lines" +} + +add-if-missing { + "type": "crispr-screen-sequence-map", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "ScreenSequenceMap") +} + + +add-if-missing { + "type": "raw-dep-matrix", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "CRISPRGeneEffect"), + "label": "Chronos_Combined", + "rows": "cell-lines", + "confounders_label": "crispr-confounders", +} + +# Chronos Achilles +add-if-missing { + "type": "confounders-matrix-essential-genes", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "AchillesCommonEssentialControls") +} + +add-if-missing { + "type": "confounders-matrix-nonessential-genes", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "AchillesNonessentialControls") +} + +# Mutation table +add-if-missing { + "type": "mutation-maf", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsSomaticMutations") +} + +add-if-missing { + "type": "other-taiga-dataset", + "category": "fusions", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsFusionFiltered") +} + +# CRISPRInferredCommonEssentials +add-if-missing { + "type": "crispr-inferred-common-essentials", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "CRISPRInferredCommonEssentials") +} + +# biomarker-matrix +add-if-missing { + 'type': 'raw-expr-matrix', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsExpressionTPMLogp1HumanProteinCodingGenes"), + 'category' : 'expression', +} + +# biomarker-matrix +add-if-missing { + 'type': 'raw-expr-matrix-profile', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsExpressionTPMLogp1HumanAllGenes"), + 'category' : 'expression', +} + + +# profile-map +add-if-missing { + 'type': 'profile-map', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsProfiles"), + 'category' : 'mapping', +} + +# model-condition +add-if-missing { + 'type': 'model-condition', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "ModelCondition"), + 'category' : 'mapping', +} + +# Log2-transformed CNGene data +add-if-missing { + 'type': 'biomarker-needing-transpose', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "PortalOmicsCNGeneLog2"), + 'category': 'copy-number-relative' +} + + +add-if-missing { + "type": "raw-mutations-bool-matrix", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsSomaticMutationsMatrixDamaging"), + "category": "damaging" +} + +add-if-missing { + "type": "raw-mutations-bool-matrix", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsSomaticMutationsMatrixHotspot"), + "category": "hotspot" +} + + +# These below are used to label datasets. It's a little odd that the display label is stored +# seperately from the artifact with the data (and so we'll have to update both when versions change) +# but doing it this way saves us from having to re-run rules when the display name changes + +add-if-missing { + 'type': 'dataset-display-name', + 'display_name': 'Copy Number WGS {{ config.RELEASE_LABEL }} (Log2 transformed)', + 'label': 'copy_number_relative', + 'dataset_id': PREPROCESS_TAIGA_ID(virtual_permaname, "PortalOmicsCNGeneLog2") +} + +add-if-missing { + 'type': 'dataset-display-name', + 'display_name': 'Expression {{ config.RELEASE_LABEL }}', + 'label': 'expression', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsExpressionTPMLogp1HumanProteinCodingGenes"), +} + +add-if-missing { + 'type': 'dataset-display-name', + 'display_name': 'Fusions {{ config.RELEASE_LABEL }}', + 'label': 'fusions', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsFusionFiltered"), +} + +add-if-missing { + 'type': 'dataset-display-name', + 'display_name': 'Mutation {{ config.RELEASE_LABEL }}', + 'label': 'mutation_pearson', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "OmicsSomaticMutations"), +} + +add-if-missing { + 'type': 'dataset-display-name', + 'display_name': 'CRISPR (DepMap {{ config.RELEASE_LABEL }}+Score, Chronos)', + 'label': 'Chronos_Combined', + 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_permaname, "CRISPRGeneEffect"), +} + +# These artifacts are used to associate with the dataset name "Rep_all_single_pt". +add-if-missing { + 'type' : 'repallsinglept-taiga-id', + 'label' : 'Rep_all_single_pt', # matches with Dataset enum + 'dataset_id' : PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Extended_Primary_Data_Matrix"), +} + +## These "Rep_all_single_pt" artifacts are used for context analysis + +add-if-missing { + "type": "repurposing_matrix_taiga_id", + "dataset_id": PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Extended_Primary_Data_Matrix") +} + +add-if-missing { + "type": "repurposing_list_taiga_id", + "dataset_id": PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Extended_Primary_Compound_List") +} + +## These "Rep_all_single_pt" artifacts are used for the compound dashboard summary table +add-if-missing { + "type": "needs-compound-dashboard", + "dataset": "Rep_all_single_pt", + "units": "log2 fold change" # Must match shared.py +} + +add-if-missing { + 'type' : 'raw-treatment_metadata', + 'label' : 'Rep_all_single_pt', + 'dataset_id' : PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Treatment_Meta_Data") +} +add-if-missing { + "type": "download_from_taiga", + "target_type": "drug-metadata", + "dataset_id": PREPROCESS_TAIGA_ID(virtual_permaname, "PortalCompounds"), + "name": "merged-drugs", + "label": "compound_metadata", + "format": "csv" +} diff --git a/pipeline/process-drug-screens.conseq b/pipeline/process-drug-screens.conseq deleted file mode 100644 index 10b9b6749..000000000 --- a/pipeline/process-drug-screens.conseq +++ /dev/null @@ -1,67 +0,0 @@ -rule process_drug_screen_auc_matrix: - inputs: - download={"type": "drug_screen_auc_matrix"}, - script=fileref("scripts/process_drug_screen_auc_matrix.py") - outputs: - { - "label": "{{ inputs.download.label }}", - "orig_dataset_id": "{{ inputs.download.dataset_id }}", - "type": "dep-matrix", - "dataset_id": "{{ inputs.download.dataset_id }}", - "filename": {"$filename": "{{ inputs.download.label }}.hdf5"}, - "confounders_label": "{{ inputs.download.confounders_label }}" - } - run "python {{ inputs.script.filename }} {{ inputs.download.label }} {{ inputs.download.dataset_id }} {{ inputs.download.conditions_dataset_id }} {{ inputs.download.sample_id_prefix }} '{{inputs.download.source_units | default('AUC')}}' {{ inputs.download.label }}.hdf5" - - -rule process_drug_screen_drc: - inputs: - download={"type": "drug_screen_drc"}, - script=fileref("scripts/process_drug_screen_drc.py") - outputs: - { - "type": "dose-response-curve-params", - "dataset_id": "{{ inputs.download.dataset_id }}", - "filename": {"$filename": "{{ inputs.download.label }}.csv"}, - "label": "{{ inputs.download.label }}", - "orig_dataset_id": "{{ inputs.download.dataset_id }}" - } - run "python {{ inputs.script.filename }} {{ inputs.download.label }} {{ inputs.download.dataset_id }} {{ inputs.download.conditions_dataset_id }} {{ inputs.download.sample_id_prefix }} {{ inputs.download.label }}.csv" - -rule process_download_csv_from_taiga: - inputs: - download={"type": "download_csv_from_taiga"}, - run "python" with """ - import shutil - import taigapy - import os - - tc = taigapy.create_version_3_client() - - label = {{ inputs.download.label | quoted }} - - assert label in [ "compound_metadata" ] - cached = tc.download_to_cache("{{ inputs.download.dataset_id }}", LocalFormat.CSV_TABLE) - shutil.copy2(cached, "out.csv") - - assert os.path.exists("out.csv"), "Output file 'out.csv' not generated" - """ - -rule process_viability_dataset_from_taiga: - inputs: - script=fileref("scripts/process_viability_dataset_from_taiga.py"), - download={"type": "viability_dataset"}, - hdf5_utils=fileref("scripts/hdf5_utils.py") - outputs: { - "type": "aggregated-dose-replicate-level", - "label": "{{ inputs.download.label }}", - "dataset_id": "{{ inputs.download.dataset_id }}", - "orig_dataset_id": "{{ inputs.download.dataset_id }}", - "cell_lines_dataset_id": "none", - "perturbations_dataset_id": "{{ inputs.download.perturbations_dataset_id }}", - "hdf5_filename": {"$filename": "out.hdf5"}, - "cell_lines_filename": {"$filename": "cell_lines.csv"}, - "perturbations": {"$filename": "perturbations.csv"} - } - run "python {{ inputs.script.filename }} {{ inputs.download.label }} {{ inputs.download.perturbations_dataset_id }} {{ inputs.download.dataset_id }} {{ inputs.download.units }}" - run "python {{ inputs.hdf5_utils.filename }} to_hdf5 out.csv csv out.hdf5" diff --git a/pipeline/run_pipelines_jenkins.sh b/pipeline/run_pipelines_jenkins.sh new file mode 100755 index 000000000..04a9c6c8a --- /dev/null +++ b/pipeline/run_pipelines_jenkins.sh @@ -0,0 +1,97 @@ +#!/bin/bash + +# Exit immediately if a command exits with a non-zero status +set -ex + +# ============================================== +# CLEANUP PREVIOUS RUNS +# ============================================== +echo "Cleaning up previous Docker containers..." +set +e # Don't exit on error for cleanup +docker kill depmap-data-prep-pipeline-run-test-perf 2>/dev/null || true +docker kill depmap-preprocessing-pipeline-run-test-perf 2>/dev/null || true +set -e # Re-enable exit on error + +# ============================================== +# SETUP DEPLOY REPO +# ============================================== +echo "Setting up depmap-deploy repo..." +if [ -d depmap-deploy ] ; then + ssh-agent bash -c 'ssh-add /home/ubuntu/.ssh/depmap-deploy-repo-key; cd depmap-deploy ; git pull' +else + ssh-agent bash -c 'ssh-add /home/ubuntu/.ssh/depmap-deploy-repo-key; git clone git@github.com:broadinstitute/depmap-deploy.git' +fi + +( cd depmap-deploy && git checkout depmap-pipeline-reorg-25q3 ) + +# ============================================== +# DATA PREP PIPELINE +# ============================================== +echo "==================== DATA PREP PIPELINE ====================" + +if [ "$CLEAN_START" = "true" ] && [ -d "pipeline/data-prep-pipeline/state" ]; then + echo "Cleaning data prep pipeline state..." + sudo chown -f -R ubuntu pipeline/data-prep-pipeline/state || true + rm -rf pipeline/data-prep-pipeline/state +fi + +# Copy non-public conseq files to the pipeline directory +echo "Syncing non-public pipeline files..." +rsync -av depmap-deploy/non-public-pipeline-files/ pipeline/ + +# Run the Python data prep pipeline script +echo "Starting data prep pipeline..." +python pipeline/data-prep-pipeline/data_prep_pipeline_runner.py test-perf depmap-data-prep-pipeline-run-test-perf + +# Check if data prep pipeline succeeded +if [ $? -ne 0 ]; then + echo "ERROR: Data prep pipeline failed! Stopping execution." + exit 1 +fi + +echo "Data prep pipeline completed successfully." + +# ============================================== +# PREPROCESSING PIPELINE +# ============================================== +echo "==================== PREPROCESSING PIPELINE ====================" + +export PREPROCESSING_PUBLISH_DEST="gs://preprocessing-pipeline-outputs/depmap-pipeline-25q4/preprocessing-pipeline-test-perf/publish" +export PREPROCESSING_EXPORT_PATH="gs://preprocessing-pipeline-outputs/depmap-pipeline-25q4/preprocessing-pipeline-test-perf/export" + +if [ "$CLEAN_START" = "true" ] && [ -d "pipeline/preprocessing-pipeline/state" ]; then + echo "Cleaning preprocessing pipeline state..." + sudo chown -f -R ubuntu pipeline/preprocessing-pipeline/state || true + rm -rf pipeline/preprocessing-pipeline/state +fi + +echo "Syncing non-public pipeline files..." +rsync -av depmap-deploy/non-public-pipeline-files/ pipeline/ + +# Run the Python preprocessing pipeline script +echo "Starting preprocessing pipeline..." +python pipeline/preprocessing-pipeline/preprocessing_pipeline_runner.py \ + test-perf \ + depmap-preprocessing-pipeline-run-test-perf \ + --publish-dest "$PREPROCESSING_PUBLISH_DEST" \ + --export-path "$PREPROCESSING_EXPORT_PATH" + +# Check if preprocessing pipeline succeeded +if [ $? -ne 0 ]; then + echo "ERROR: Preprocessing pipeline failed! Stopping execution." + exit 1 +fi + +echo "Preprocessing pipeline completed successfully!" + +# ============================================== +# POST-SUCCESS ACTIONS +# ============================================== +echo "==================== ALL PIPELINES COMPLETED ====================" + +# If we reach here, the job was successful so potentially kick off the DB rebuild +if [ "$ON_SUCCESS_REBUILD_DB" = "true" ]; then + echo "Triggering DB rebuild..." + curl 'https://hooks-proxy.broadinstitute.org/generic-webhook/jenkins/datascidev?token=test-perf+build+db+3amvd0923SSz' + echo "DB rebuild triggered!" +fi diff --git a/pipeline/scripts/process_taiga_pulled_aggregated_dose_artifact.py b/pipeline/scripts/process_taiga_pulled_aggregated_dose_artifact.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/pipeline/sparkles-config-n1-highmem-4 b/pipeline/sparkles-config-n1-highmem-4 deleted file mode 100644 index b142d3709..000000000 --- a/pipeline/sparkles-config-n1-highmem-4 +++ /dev/null @@ -1,12 +0,0 @@ -[config] -cas_url_prefix=gs://broad-achilles-kubeque/test/cas -default_url_prefix=gs://broad-achilles-kubeque/depmap-pipeline -project=broad-achilles -default_image=us.gcr.io/broad-achilles/tda-pipeline:v2 -region=us-central1 -machine_type=n2-highmem-4 -mount_1_type=pd-standard -mount_1_size_in_gb=50 -boot_volume_in_gb=40 -sparklesworker_image=us-central1-docker.pkg.dev/cds-docker-containers/docker/sparklesworker:5.0.0-alpha3 -account=pmontgom@broadinstitute.org \ No newline at end of file diff --git a/pipeline/summarize_gene_deps.conseq b/pipeline/summarize_gene_deps.conseq deleted file mode 100644 index 8b1378917..000000000 --- a/pipeline/summarize_gene_deps.conseq +++ /dev/null @@ -1 +0,0 @@ - diff --git a/pipeline/xrefs-external.template b/pipeline/xrefs-external.template deleted file mode 100644 index 515da96c3..000000000 --- a/pipeline/xrefs-external.template +++ /dev/null @@ -1,183 +0,0 @@ -include "xrefs-common.conseq" - -# SET_TAIGA_PREPROCESSOR virtual_dataset_id "public-25q2-c5ef.111" - -let RELEASE_LABEL="Public 25Q2" - -add-if-missing { - "type": "depmap_data_taiga_id", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id) -} - - -# The following 3 artifacts are necessary for the Context Explorer preprocessing -# scripts to run successfully -add-if-missing { - "type": "subtype_tree", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "SubtypeTree"), -} - -add-if-missing { - "type": "omics_profiles", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsProfiles") -} - -add-if-missing { - "type": "subtype_context_matrix", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "SubtypeMatrix"), -} - - -# Sample info file -add-if-missing { - "type": "sample_info_dataset_id", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "Model"), -} - -add-if-missing { - "type": "crispr-confounder-parameters", - "achilles_qc_report_taiga_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "AchillesScreenQCReport"), - "crispr_screen_map_taiga_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "CRISPRScreenMap") -} - -# Chronos Combined -add-if-missing { - "type": "raw-dep-prob-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "CRISPRGeneDependency"), - "label": "Chronos_Combined", - "rows": "cell-lines" -} - -add-if-missing { - "type": "crispr-screen-sequence-map", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "ScreenSequenceMap") -} - - -add-if-missing { - "type": "raw-dep-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "CRISPRGeneEffect"), - "label": "Chronos_Combined", - "rows": "cell-lines", - "confounders_label": "crispr-confounders", -} - -# Chronos Achilles -add-if-missing { - "type": "confounders-matrix-essential-genes", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "AchillesCommonEssentialControls") -} - -add-if-missing { - "type": "confounders-matrix-nonessential-genes", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "AchillesNonessentialControls") -} - -# Mutation table -add-if-missing { - "type": "mutation-maf", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsSomaticMutations") -} - -add-if-missing { - "type": "other-taiga-dataset", - "category": "fusions", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsFusionFiltered") -} - -# CRISPRInferredCommonEssentials -add-if-missing { - "type": "crispr-inferred-common-essentials", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "CRISPRInferredCommonEssentials") -} - -# biomarker-matrix -add-if-missing { - 'type': 'raw-expr-matrix', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsExpressionTPMLogp1HumanProteinCodingGenes"), - 'category' : 'expression', -} - -# biomarker-matrix -add-if-missing { - 'type': 'raw-expr-matrix-profile', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsExpressionTPMLogp1HumanAllGenes"), - 'category' : 'expression', -} - - -# profile-map -add-if-missing { - 'type': 'profile-map', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsProfiles"), - 'category' : 'mapping', -} - -# model-condition -add-if-missing { - 'type': 'model-condition', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "ModelCondition"), - 'category' : 'mapping', -} - -# Log2-transformed CNGene data -add-if-missing { - 'type': 'biomarker-needing-transpose', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "PortalOmicsCNGeneLog2"), - 'category': 'copy-number-relative' -} - - -add-if-missing { - "type": "raw-mutations-bool-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsSomaticMutationsMatrixDamaging"), - "category": "damaging" -} - -add-if-missing { - "type": "raw-mutations-bool-matrix", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsSomaticMutationsMatrixHotspot"), - "category": "hotspot" -} - - -# These below are used to label datasets. It's a little odd that the display label is stored -# seperately from the artifact with the data (and so we'll have to update both when versions change) -# but doing it this way saves us from having to re-run rules when the display name changes - -add-if-missing { - 'type': 'dataset-display-name', - 'display_name': 'Copy Number WGS {{ config.RELEASE_LABEL }} (Log2 transformed)', - 'label': 'copy_number_relative', - 'dataset_id': PREPROCESS_TAIGA_ID(virtual_dataset_id, "PortalOmicsCNGeneLog2") -} - -add-if-missing { - 'type': 'dataset-display-name', - 'display_name': 'Expression {{ config.RELEASE_LABEL }}', - 'label': 'expression', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsExpressionTPMLogp1HumanProteinCodingGenes"), -} - -add-if-missing { - 'type': 'dataset-display-name', - 'display_name': 'Fusions {{ config.RELEASE_LABEL }}', - 'label': 'fusions', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsFusionFiltered"), -} - -add-if-missing { - 'type': 'dataset-display-name', - 'display_name': 'Mutation {{ config.RELEASE_LABEL }}', - 'label': 'mutation_pearson', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "OmicsSomaticMutations"), -} - -add-if-missing { - 'type': 'dataset-display-name', - 'display_name': 'CRISPR (DepMap {{ config.RELEASE_LABEL }}+Score, Chronos)', - 'label': 'Chronos_Combined', - 'dataset_id' : PREPROCESS_TAIGA_ID(virtual_dataset_id, "CRISPRGeneEffect"), -} - -# TAIGA_PREPROCESSOR_INCLUDE "xrefs-public.template" diff --git a/pipeline/xrefs-nonquarterly-unprocessed.conseq b/pipeline/xrefs-nonquarterly-unprocessed.conseq deleted file mode 100644 index e69de29bb..000000000 diff --git a/pipeline/xrefs-public.template b/pipeline/xrefs-public.template deleted file mode 100644 index ebd5d407e..000000000 --- a/pipeline/xrefs-public.template +++ /dev/null @@ -1,41 +0,0 @@ -# SET_TAIGA_PREPROCESSOR repurposing_taiga_id "repurposing-public-24q2-875f.4" - -# These artifacts are used to associate with the dataset name "Rep_all_single_pt". -add-if-missing { - 'type' : 'repallsinglept-taiga-id', - 'label' : 'Rep_all_single_pt', # matches with Dataset enum - 'dataset_id' : PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Extended_Primary_Data_Matrix"), -} - -## These "Rep_all_single_pt" artifacts are used for context analysis - -add-if-missing { - "type": "repurposing_matrix_taiga_id", - "dataset_id": PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Extended_Primary_Data_Matrix") -} - -add-if-missing { - "type": "repurposing_list_taiga_id", - "dataset_id": PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Extended_Primary_Compound_List") -} - -## These "Rep_all_single_pt" artifacts are used for the compound dashboard summary table -add-if-missing { - "type": "needs-compound-dashboard", - "dataset": "Rep_all_single_pt", - "units": "log2 fold change" # Must match shared.py -} - -add-if-missing { - 'type' : 'raw-treatment_metadata', - 'label' : 'Rep_all_single_pt', - 'dataset_id' : PREPROCESS_TAIGA_ID(repurposing_taiga_id, "Repurposing_Public_24Q2_Treatment_Meta_Data") -} -add-if-missing { - "type": "download_from_taiga", - "target_type": "drug-metadata", - "dataset_id": PREPROCESS_TAIGA_ID(virtual_dataset_id, "PortalCompounds"), - "name": "merged-drugs", - "label": "compound_metadata", - "format": "csv" -} diff --git a/portal-backend/sample_data/subset_files/subset_context_explorer.py b/portal-backend/sample_data/subset_files/subset_context_explorer.py index 79ace0913..df3b76c9d 100644 --- a/portal-backend/sample_data/subset_files/subset_context_explorer.py +++ b/portal-backend/sample_data/subset_files/subset_context_explorer.py @@ -28,13 +28,13 @@ def main(): parser = argparse.ArgumentParser() - parser.add_argument("depmap_data_taiga_id") + parser.add_argument("release_taiga_id") parser.add_argument("repurposing_taiga_id") parser.add_argument("rnai_taiga_id") parser.add_argument("dest") parser.add_argument("file_name") args = parser.parse_args() - depmap_data_taiga_id = args.depmap_data_taiga_id + release_taiga_id = args.release_taiga_id repurposing_taiga_id = args.repurposing_taiga_id rnai_taiga_id = args.rnai_taiga_id dest = args.dest @@ -43,7 +43,7 @@ def main(): tc = create_taiga_client_v3() # Data for CRISPR, RNAi, Omics, PRISM - ScreenSequenceMap = tc.get(f"{depmap_data_taiga_id}/ScreenSequenceMap") + ScreenSequenceMap = tc.get(f"{release_taiga_id}/ScreenSequenceMap") ScreenSequenceMap = ScreenSequenceMap[ ScreenSequenceMap["ModelID"].isin(cell_lines_arxspan) ] @@ -52,7 +52,7 @@ def main(): CL_data_comb = tc.get(rnai_taiga_id) assert CL_data_comb is not None - OmicsProfiles = tc.get(f"{depmap_data_taiga_id}/OmicsProfiles") + OmicsProfiles = tc.get(f"{release_taiga_id}/OmicsProfiles") OmicsProfiles = OmicsProfiles[OmicsProfiles["ModelID"].isin(cell_lines_arxspan)] assert OmicsProfiles is not None @@ -62,7 +62,7 @@ def main(): ] assert Repurposing_23Q2_Cell_Line_Meta_Data is not None - Model = tc.get(f"{depmap_data_taiga_id}/Model") + Model = tc.get(f"{release_taiga_id}/Model") Model = Model[Model["ModelID"].isin(cell_lines_arxspan)] assert Model is not None