-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #24 from umccr/feature/databricks-hail-runtime
Add Dockerfile and patch for hail / databricks / spark
- Loading branch information
Showing
3 changed files
with
120 additions
and
0 deletions.
There are no files selected for viewing
56 changes: 56 additions & 0 deletions
56
repositories/hail/0.2.126--spark-3.4.0-patch/0001-spark-to-3.4.0.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
From ff5aa8bd5ae37d64ad9e37a45f76f6fcaced01c1 Mon Sep 17 00:00:00 2001 | ||
From: Alexis Lucattini <[email protected]> | ||
Date: Fri, 1 Dec 2023 04:48:08 +0000 | ||
Subject: [PATCH] spark-to-3.4.0 | ||
|
||
--- | ||
hail/build.gradle | 2 +- | ||
hail/python/requirements.txt | 2 +- | ||
hail/src/main/scala/is/hail/HailContext.scala | 4 ++-- | ||
3 files changed, 4 insertions(+), 4 deletions(-) | ||
|
||
diff --git a/hail/build.gradle b/hail/build.gradle | ||
index d3111dc7e..0e80e5d7c 100644 | ||
--- a/hail/build.gradle | ||
+++ b/hail/build.gradle | ||
@@ -40,7 +40,7 @@ tasks.withType(JavaCompile) { | ||
} | ||
|
||
project.ext { | ||
- breezeVersion = "1.1" | ||
+ breezeVersion = "2.1.0" | ||
|
||
sparkVersion = System.getProperty("spark.version", "3.3.0") | ||
|
||
diff --git a/hail/python/requirements.txt b/hail/python/requirements.txt | ||
index 9f8492a2a..a4c11f8ea 100644 | ||
--- a/hail/python/requirements.txt | ||
+++ b/hail/python/requirements.txt | ||
@@ -10,6 +10,6 @@ pandas>=2,<3 | ||
parsimonious<1 | ||
plotly>=5.5.0,<6 | ||
protobuf==3.20.2 | ||
-pyspark>=3.3.0,<3.4 | ||
+pyspark==3.4.0,<3.5 | ||
requests>=2.25.1,<3 | ||
scipy>1.2,<1.12 | ||
diff --git a/hail/src/main/scala/is/hail/HailContext.scala b/hail/src/main/scala/is/hail/HailContext.scala | ||
index 4e4063378..4d2f9056a 100644 | ||
--- a/hail/src/main/scala/is/hail/HailContext.scala | ||
+++ b/hail/src/main/scala/is/hail/HailContext.scala | ||
@@ -113,10 +113,10 @@ object HailContext { | ||
|
||
{ | ||
import breeze.linalg._ | ||
- import breeze.linalg.operators.{BinaryRegistry, OpMulMatrix} | ||
+ import breeze.linalg.operators.{BinaryRegistry, HasOps, OpMulMatrix} | ||
|
||
implicitly[BinaryRegistry[DenseMatrix[Double], Vector[Double], OpMulMatrix.type, DenseVector[Double]]].register( | ||
- DenseMatrix.implOpMulMatrix_DMD_DVD_eq_DVD) | ||
+ HasOps.impl_OpMulMatrix_DMD_DVD_eq_DVD) | ||
} | ||
|
||
theContext = new HailContext(backend, branchingFactor, optimizerIterations) | ||
-- | ||
2.34.1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
FROM databricksruntime/python:13.3-LTS | ||
|
||
ARG HAIL_COMMIT_ID="5b718e1" # https://github.com/hail-is/hail/commit/5b718e1d3e1d72fad15cc57801601a6e35df41dd | ||
ARG SCALA_VERSION="2.12.0" | ||
ARG SPARK_VERSION="3.4.0" | ||
ARG BREEZE_VERSION="2.1.0" | ||
|
||
RUN apt update -y && \ | ||
apt-get install -yq \ | ||
openjdk-8-jre-headless \ | ||
g++ \ | ||
python3 \ | ||
python3-pip \ | ||
libopenblas-base \ | ||
liblapack3 \ | ||
git \ | ||
rsync \ | ||
liblz4-dev && \ | ||
pip3 install ipython | ||
|
||
COPY 0001-spark-to-3.4.0.patch /root/0001-spark-to-3.4.0.patch | ||
|
||
RUN \ | ||
git clone "https://github.com/hail-is/hail.git" && \ | ||
( \ | ||
cd hail && \ | ||
git checkout "${HAIL_COMMIT_ID}" && \ | ||
git apply /root/0001-spark-to-3.4.0.patch && \ | ||
( \ | ||
cd hail/ && \ | ||
make install \ | ||
HAIL_COMPILE_NATIVES=1 \ | ||
SCALA_VERSION="${SCALA_VERSION}" \ | ||
SPARK_VERSION="${SPARK_VERSION}"\ | ||
) \ | ||
) && \ | ||
rm /root/0001-spark-to-3.4.0.patch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
# Hail 0.2.126 -- Spark 3.4.0 Patch | ||
|
||
## Hail Version 0.1.126 running on DataBricks runtime 13.3 LTS | ||
|
||
### Versions / Commit IDs | ||
|
||
* HAIL_COMMIT_ID: "5b718e1" # https://github.com/hail-is/hail/commit/5b718e1d3e1d72fad15cc57801601a6e35df41dd | ||
* SCALA_VERSION: "2.12.0" | ||
* SPARK_VERSION: "3.4.0" | ||
* BREEZE_VERSION: "2.1.0" | ||
|
||
## Platforms | ||
* linux/amd64 | ||
* linux/arm64 | ||
|
||
|
||
### Usage | ||
|
||
Set up your databricks compute to run `ghcr.io/umccr/hail/0.2.126--spark-3.4.0-patch. | ||
|
||
Make sure your DataBricks runtime is set to standard 13.3 LTS | ||
|
||
### Description | ||
A fix for supporting DataBricks runtime 13.3 UC while also using hail. | ||
|
||
|
||
|