Skip to content

Commit

Permalink
Merge pull request #24 from umccr/feature/databricks-hail-runtime
Browse files Browse the repository at this point in the history
Add Dockerfile and patch for hail / databricks / spark
  • Loading branch information
alexiswl authored Dec 1, 2023
2 parents 1487b18 + d4af441 commit 7e39548
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
From ff5aa8bd5ae37d64ad9e37a45f76f6fcaced01c1 Mon Sep 17 00:00:00 2001
From: Alexis Lucattini <[email protected]>
Date: Fri, 1 Dec 2023 04:48:08 +0000
Subject: [PATCH] spark-to-3.4.0

---
hail/build.gradle | 2 +-
hail/python/requirements.txt | 2 +-
hail/src/main/scala/is/hail/HailContext.scala | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hail/build.gradle b/hail/build.gradle
index d3111dc7e..0e80e5d7c 100644
--- a/hail/build.gradle
+++ b/hail/build.gradle
@@ -40,7 +40,7 @@ tasks.withType(JavaCompile) {
}

project.ext {
- breezeVersion = "1.1"
+ breezeVersion = "2.1.0"

sparkVersion = System.getProperty("spark.version", "3.3.0")

diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt
index 9f8492a2a..a4c11f8ea 100644
--- a/hail/python/requirements.txt
+++ b/hail/python/requirements.txt
@@ -10,6 +10,6 @@ pandas>=2,<3
parsimonious<1
plotly>=5.5.0,<6
protobuf==3.20.2
-pyspark>=3.3.0,<3.4
+pyspark==3.4.0,<3.5
requests>=2.25.1,<3
scipy>1.2,<1.12
diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala
index 4e4063378..4d2f9056a 100644
--- a/hail/src/main/scala/is/hail/HailContext.scala
+++ b/hail/src/main/scala/is/hail/HailContext.scala
@@ -113,10 +113,10 @@ object HailContext {

{
import breeze.linalg._
- import breeze.linalg.operators.{BinaryRegistry, OpMulMatrix}
+ import breeze.linalg.operators.{BinaryRegistry, HasOps, OpMulMatrix}

implicitly[BinaryRegistry[DenseMatrix[Double], Vector[Double], OpMulMatrix.type, DenseVector[Double]]].register(
- DenseMatrix.implOpMulMatrix_DMD_DVD_eq_DVD)
+ HasOps.impl_OpMulMatrix_DMD_DVD_eq_DVD)
}

theContext = new HailContext(backend, branchingFactor, optimizerIterations)
--
2.34.1

37 changes: 37 additions & 0 deletions repositories/hail/0.2.126--spark-3.4.0-patch/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
FROM databricksruntime/python:13.3-LTS

ARG HAIL_COMMIT_ID="5b718e1" # https://github.com/hail-is/hail/commit/5b718e1d3e1d72fad15cc57801601a6e35df41dd
ARG SCALA_VERSION="2.12.0"
ARG SPARK_VERSION="3.4.0"
ARG BREEZE_VERSION="2.1.0"

RUN apt update -y && \
apt-get install -yq \
openjdk-8-jre-headless \
g++ \
python3 \
python3-pip \
libopenblas-base \
liblapack3 \
git \
rsync \
liblz4-dev && \
pip3 install ipython

COPY 0001-spark-to-3.4.0.patch /root/0001-spark-to-3.4.0.patch

RUN \
git clone "https://github.com/hail-is/hail.git" && \
( \
cd hail && \
git checkout "${HAIL_COMMIT_ID}" && \
git apply /root/0001-spark-to-3.4.0.patch && \
( \
cd hail/ && \
make install \
HAIL_COMPILE_NATIVES=1 \
SCALA_VERSION="${SCALA_VERSION}" \
SPARK_VERSION="${SPARK_VERSION}"\
) \
) && \
rm /root/0001-spark-to-3.4.0.patch
27 changes: 27 additions & 0 deletions repositories/hail/0.2.126--spark-3.4.0-patch/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Hail 0.2.126 -- Spark 3.4.0 Patch

## Hail Version 0.1.126 running on DataBricks runtime 13.3 LTS

### Versions / Commit IDs

* HAIL_COMMIT_ID: "5b718e1" # https://github.com/hail-is/hail/commit/5b718e1d3e1d72fad15cc57801601a6e35df41dd
* SCALA_VERSION: "2.12.0"
* SPARK_VERSION: "3.4.0"
* BREEZE_VERSION: "2.1.0"

## Platforms
* linux/amd64
* linux/arm64


### Usage

Set up your databricks compute to run `ghcr.io/umccr/hail/0.2.126--spark-3.4.0-patch.

Make sure your DataBricks runtime is set to standard 13.3 LTS

### Description
A fix for supporting DataBricks runtime 13.3 UC while also using hail.



0 comments on commit 7e39548

Please sign in to comment.