Skip to content

Commit

Permalink
chore: multiple python version support with latest pyspark and hail (#…
Browse files Browse the repository at this point in the history
…974)

* chore(pyspark): update to 3.5.X

* chore: fix doctest syntax

* chore: bump temurin version to 11

* feat: allow multiple python versions

* feat: python matrix for gha

* chore: pre-commit auto fixes [...]

* chore: typos

* chore: fix python version in setup dev script

* fix: attempt to fix the 3.11 tests

* fix: set the session correctly in variant_index_config

* Revert "chore: fix doctest syntax"

This reverts commit 630c0c9.

* chore: update dependencies

* Revert "Revert "chore: fix doctest syntax""

This reverts commit 18c66b1.

* chore: bump image to 2.2

* chore: update lock files

* build: poetry cleanup

* build: uv checks droped

* chore: fix dockerfile and install test deps

* build(uv): add all dependencies to run tests

* chore: fix test issue with rounding error

* chore: fix dependency version lower bounds

* chore: add .python-version file to ignored

* build: new setup

* build: new setup

* build: new setup

* build: new setup

* build: new setup

* revert: bring back initialization actions

* chore: align variable name

* chore: update pre-commit python version

* chore: docs update

* feat: more complex uv installation

* feat: notify to source shellrc file when installing uv

* fix: checks

* chore: debug gha

* chore: debug gha

* feat: debug gha

* feat: debug gha

* feat: debug gha

* feat: force user shell

* feat: gha debug

* feat: gha debug

* feat: gha debug

* feat: gha debug

---------

Co-authored-by: project-defiant <[email protected]>
Co-authored-by: Szymon Szyszkowski <[email protected]>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
4 people authored Jan 28, 2025
1 parent aa00959 commit 3d31edd
Show file tree
Hide file tree
Showing 27 changed files with 4,350 additions and 6,673 deletions.
10 changes: 5 additions & 5 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,26 @@ version: 1
labels:
- label: "size-XS"
size:
exclude-files: ["poetry.lock"]
exclude-files: ["uv.lock"]
below: 10
- label: "size-S"
size:
exclude-files: ["poetry.lock"]
exclude-files: ["uv.lock"]
above: 9
below: 100
- label: "size-M"
size:
exclude-files: ["poetry.lock"]
exclude-files: ["uv.lock"]
above: 100
below: 500
- label: "size-L"
size:
exclude-files: ["poetry.lock"]
exclude-files: ["uv.lock"]
above: 499
below: 1000
- label: "size-XL"
size:
exclude-files: ["poetry.lock"]
exclude-files: ["uv.lock"]
above: 999
- label: "airflow"
files:
Expand Down
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ add diagrams or images if necessary. It'll help the reviewer_ -->
- [ ] Did you make sure the branch is up-to-date with the `dev` branch?
- [ ] Did you write any new necessary tests?
- [ ] Did you make sure the changes pass local tests (`make test`)?
- [ ] Did you make sure the changes pass pre-commit rules (e.g `poetry run pre-commit run --all-files`)?
- [ ] Did you make sure the changes pass pre-commit rules (e.g `uv run pre-commit run --all-files`)?
15 changes: 6 additions & 9 deletions .github/workflows/artifact.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Build and Push to Artifact Registry

"on":
workflow_dispatch:
push:
branches:
- "*"
Expand All @@ -12,7 +13,7 @@ env:
REGION: europe-west1
GAR_LOCATION: europe-west1-docker.pkg.dev/open-targets-genetics-dev
REPOSITORY: gentropy-app
PYTHON_VERSION_DEFAULT: "3.10.8"
PYTHON_VERSION_DEFAULT: "3.11.11"

jobs:
build-push-artifact:
Expand Down Expand Up @@ -54,6 +55,7 @@ jobs:

# skip the `v` at the beginning of the tag for docker image tags
- name: Create a docker tag
if: github.ref == 'refs/heads/dev' || startsWith(github.ref, 'refs/tags/v')
id: docker-tag
shell: bash
env:
Expand Down Expand Up @@ -86,13 +88,8 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION_DEFAULT }}
- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install uv
uses: astral-sh/setup-uv@v5

- name: Build and push spark cluster dependencies
run: |
make build
run: make build
34 changes: 15 additions & 19 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,47 +4,43 @@ name: Checks
pull_request:

env:
PYTHON_VERSION_DEFAULT: "3.10.8"
PYTHON_VERSION_DEFAULT: "3.11.11"

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
fail-fast: false
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.10.8
python-version: ${{ matrix.python-version }}
- name: Set up Java
uses: actions/setup-java@v4
with:
java-version: "8"
java-version: "11"
distribution: "temurin"
- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Load cached venv
id: cached-poetry-dependencies
id: cached-uv-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ env.PYTHON_VERSION_DEFAULT }}-${{ hashFiles('**/poetry.lock') }}
- name: Validate project dependencies
run: poetry check
key: venv-${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/uv.lock') }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Install library
run: poetry install --no-interaction
if: steps.cached-uv-dependencies.outputs.cache-hit != 'true'
run: uv sync --all-groups
- name: Check dependencies
run: poetry run deptry .
run: uv run deptry .
- name: Run tests
run: poetry run pytest
run: uv run pytest
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5
with:
Expand Down
28 changes: 10 additions & 18 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ name: Release

"on":
push:
branches: ["main", "release/**"]
branches: ["main", "release/**", "dev"]

concurrency:
group: deploy
cancel-in-progress: false # prevent hickups with semantic-release

env:
PYTHON_VERSION_DEFAULT: "3.10.8"
PYTHON_VERSION_DEFAULT: "3.11.11"

jobs:
release:
Expand Down Expand Up @@ -40,9 +40,7 @@ jobs:

- name: Python Semantic Release
id: semrelease
# v9.6.0 is required due to the python v3.12 in the newer version of semantic release action which
# breaks the poetry build command.
uses: python-semantic-release/[email protected]
uses: python-semantic-release/[email protected]
with:
github_token: ${{ steps.trigger-token.outputs.token }}

Expand Down Expand Up @@ -121,25 +119,19 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION_DEFAULT }}
- name: Install and configure Poetry
uses: snok/install-poetry@v1
with:
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install uv
uses: astral-sh/setup-uv@v5
- name: Load cached venv
id: cached-poetry-dependencies
id: cached-dependencies
uses: actions/cache@v4
with:
path: .venv
key: |
venv-${{ runner.os }}-\
${{ env.PYTHON_VERSION_DEFAULT }}-\
${{ hashFiles('**/poetry.lock') }}
${{ hashFiles('**/uv.lock') }}
- name: Install dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root
- name: Install library
run: poetry install --without tests --no-interaction
if: steps.cached-dependencies.outputs.cache-hit != 'true'
run: uv sync --group docs
- name: Publish docs
run: poetry run mkdocs gh-deploy --force
run: uv run mkdocs gh-deploy --force
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ site/
.coverage*
wandb/
hail*.log
.python-version
.idea
11 changes: 4 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
default_language_version:
python: python3.10
python: python3.11
ci:
autoupdate_commit_msg: "chore: pre-commit autoupdate"
autofix_commit_msg: "chore: pre-commit auto fixes [...]"
skip: [poetry-lock]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.4
Expand Down Expand Up @@ -101,9 +100,7 @@ repos:
rev: 0.5.9
hooks:
- id: pydoclint

- repo: https://github.com/python-poetry/poetry
rev: "2.0.0"
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.5.22
hooks:
- id: poetry-check
- id: poetry-lock
- id: uv-lock
1 change: 0 additions & 1 deletion .python-version

This file was deleted.

25 changes: 9 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,20 @@
FROM python:3.10-bullseye

RUN apt-get update \
&& apt-get clean \
&& apt-get install -y openjdk-11-jdk \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get update && \
apt-get clean && \
apt-get install -y openjdk-11-jdk && \
rm -rf /var/lib/apt/lists/*

ENV POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_IN_PROJECT=1 \
POETRY_VIRTUALENVS_CREATE=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache \
JAVA_HOME=/usr
ENV JAVA_HOME=/usr

RUN pip install poetry>=2.0.0
RUN pip install uv
WORKDIR /app

COPY pyproject.toml poetry.lock ./
COPY pyproject.toml uv.lock ./
RUN touch README.md

RUN poetry config installer.max-workers 10
RUN poetry install --without dev,docs,tests --no-root --no-interaction --no-ansi -vvv && rm -rf $POETRY_CACHE_DIR
RUN uv sync

COPY src ./src

RUN poetry install --without dev,docs,tests

ENTRYPOINT ["poetry", "run", "gentropy"]
ENTRYPOINT ["uv", "run", "gentropy"]
55 changes: 31 additions & 24 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
SHELL := /bin/bash
PROJECT_ID ?= open-targets-genetics-dev
REGION ?= europe-west1
APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g')
PACKAGE_VERSION ?= $$(poetry version --short)
PACKAGE_VERSION ?= $(shell grep -m 1 'version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
# NOTE: git rev-parse will always return the HEAD if it sits in the tag,
# this way we can distinguish the tag vs branch name
ifeq ($(shell git rev-parse --abbrev-ref HEAD),HEAD)
Expand All @@ -11,7 +12,7 @@ else
endif

CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]')
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF}
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation

.PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST))

Expand All @@ -23,43 +24,55 @@ help: ## This is help
clean: ## Clean up prior to building
@rm -Rf ./dist

setup-dev: SHELL:=/bin/bash
setup-dev: ## Setup development environment
setup-dev: SHELL := $(shell echo $${SHELL})
setup-dev: ## Setup development environment
@. utils/install_dependencies.sh
@echo "Run . ${HOME}/.$(notdir $(SHELL))rc to finish setup"

check: ## Lint and format code
@echo "Linting API..."
@poetry run ruff check src/gentropy .
@uv run ruff check src/gentropy .
@echo "Linting docstrings..."
@poetry run pydoclint --config=pyproject.toml src
@poetry run pydoclint --config=pyproject.toml --skip-checking-short-docstrings=true tests
@uv run pydoclint --config=pyproject.toml src
@uv run pydoclint --config=pyproject.toml --skip-checking-short-docstrings=true tests

test: ## Run tests
@echo "Running Tests..."
@poetry run pytest
@uv run pytest

build-documentation: ## Create local server with documentation
@echo "Building Documentation..."
@poetry run mkdocs serve
@uv run mkdocs serve

create-dev-cluster: build ## Spin up a simple dataproc cluster with all dependencies for development purposes
sync-cluster-init-script: ## Synchronize the cluster inicialisation actions script to google cloud
@echo "Syncing install_dependencies_on_cluster.sh to ${BUCKET_NAME}"
@gcloud storage cp utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}/install_dependencies_on_cluster.sh

sync-gentropy-cli-script: ## Synchronize the gentropy cli script
@echo "Syncing gentropy cli script to ${BUCKET_NAME}"
@gcloud storage cp src/gentropy/cli.py ${BUCKET_NAME}/cli.py

create-dev-cluster: sync-cluster-init-script sync-gentropy-cli-script ## Spin up a simple dataproc cluster with all dependencies for development purposes
@echo "Making sure the branch is in sync with remote, so cluster can install gentropy dev version..."
@./utils/clean_status.sh || (echo "ERROR: Commit and push or stash local changes, to have up to date cluster"; exit 1)
@echo "Creating Dataproc Dev Cluster"
@gcloud config set project ${PROJECT_ID}
@gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \
--image-version 2.1 \
gcloud config set project ${PROJECT_ID}
gcloud dataproc clusters create "ot-genetics-dev-${CLEAN_PACKAGE_VERSION}-$(USER)" \
--image-version 2.2 \
--region ${REGION} \
--master-machine-type n1-standard-16 \
--initialization-actions=$(BUCKET_NAME)/install_dependencies_on_cluster.sh \
--metadata="PACKAGE=$(BUCKET_NAME)/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl" \
--master-machine-type n1-standard-2 \
--metadata="GENTROPY_REF=${REF}" \
--initialization-actions=${BUCKET_NAME}/install_dependencies_on_cluster.sh \
--secondary-worker-type spot \
--worker-machine-type n1-standard-4 \
--public-ip-address \
--worker-boot-disk-size 500 \
--autoscaling-policy="projects/${PROJECT_ID}/regions/${REGION}/autoscalingPolicies/otg-etl" \
--optional-components=JUPYTER \
--enable-component-gateway \
--max-idle=60m

make update-dev-cluster: build ## Reinstalls the package on the dev-cluster
update-dev-cluster: build ## Reinstalls the package on the dev-cluster
@echo "Updating Dataproc Dev Cluster"
@gcloud config set project ${PROJECT_ID}
gcloud dataproc jobs submit pig --cluster="ot-genetics-dev-${CLEAN_PACKAGE_VERSION}" \
Expand All @@ -68,10 +81,4 @@ make update-dev-cluster: build ## Reinstalls the package on the dev-cluster
-e='sh chmod 750 $${PWD}/install_dependencies_on_cluster.sh; sh $${PWD}/install_dependencies_on_cluster.sh'

build: clean ## Build Python package with dependencies
@gcloud config set project ${PROJECT_ID}
@echo "Packaging Code and Dependencies for ${APP_NAME}-${PACKAGE_VERSION}"
@poetry build
@echo "Uploading to ${BUCKET_NAME}"
@gsutil cp src/${APP_NAME}/cli.py ${BUCKET_NAME}/
@gsutil cp ./dist/${APP_NAME}-${PACKAGE_VERSION}-py3-none-any.whl ${BUCKET_NAME}/
@gsutil cp ./utils/install_dependencies_on_cluster.sh ${BUCKET_NAME}/
@uv build
Loading

0 comments on commit 3d31edd

Please sign in to comment.