Skip to content

Commit 01c8099

Browse files
[hailctl] Fix dependency installation for hailctl dataproc clusters (#12510)
* [hailctl] Fix dependency installation for hailctl dataproc clusters * make the pinned requirements files actual targets * strip the spaces from each dependency * fix hailctl dataproc package installation * require the user regenerate pinned-requirements for install-on-cluster if out of date * install pip-tools * pip installing pip-tools puts it in the user local bin which is not on path in dataproc * fix #s * just have the user sudo * add permission again * fix requirements parsing before installation * put lockfile generation into a separate file * fix for dataproc * remove dependency from deploy.yaml * remove targets but ensure compatibility * undo requirements test change * fix requirements * fix' * add file to build.yaml * add more inputs in build.yaml * make singular
1 parent 9366f2b commit 01c8099

9 files changed

+99
-77
lines changed

build.yaml

+8-2
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,8 @@ steps:
826826
to: /io/repo/README.md
827827
- from: /repo/hail
828828
to: /io/repo/hail
829+
- from: /repo/check_pip_requirements.sh
830+
to: /io/repo/check_pip_requirements.sh
829831
dependsOn:
830832
- hail_build_image
831833
- merge_code
@@ -848,6 +850,8 @@ steps:
848850
to: /io/repo/README.md
849851
- from: /repo/hail
850852
to: /io/repo/hail
853+
- from: /repo/check_pip_requirements.sh
854+
to: /io/repo/check_pip_requirements.sh
851855
dependsOn:
852856
- hail_build_image
853857
- merge_code
@@ -871,6 +875,8 @@ steps:
871875
to: /io/repo/README.md
872876
- from: /repo/hail
873877
to: /io/repo/hail
878+
- from: /repo/check_pip_requirements.sh
879+
to: /io/repo/check_pip_requirements.sh
874880
outputs:
875881
- from: /io/repo/hail/build/deploy/dist/wheel-container.tar
876882
to: /wheel-for-azure-container.tar
@@ -2790,9 +2796,9 @@ steps:
27902796
27912797
cat >file-with-args.sh <<EOF
27922798
set -ex
2793-
2799+
27942800
[[ $# -eq 2 ]]
2795-
2801+
27962802
cat foo
27972803
echo "Hello World! $1 $2"
27982804
EOF

check_pip_requirements.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@ new_pinned=$(mktemp)
88
pinned_no_comments=$(mktemp)
99
new_pinned_no_comments=$(mktemp)
1010

11-
pip-compile --quiet $reqs $pinned --output-file=$new_pinned
11+
PATH="$PATH:$HOME/.local/bin" pip-compile --quiet $reqs $pinned --output-file=$new_pinned
1212
# Get rid of comments that might differ despite requirements being the same
1313
cat $pinned | sed '/#/d' > $pinned_no_comments
1414
cat $new_pinned | sed '/#/d' > $new_pinned_no_comments
1515
diff $pinned_no_comments $new_pinned_no_comments || {
1616
echo '>>> up-to-date pinned requirements <<<'
1717
cat $new_pinned
18+
echo '--------------------------------------'
19+
echo "$pinned is no longer up to date with $reqs"
20+
echo "Please regenerate the pinned requirements file."
1821
exit 1
1922
}

docker/Makefile

+1-7
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,7 @@ deploy: build
4040

4141
.PHONY: generate-pip-lockfile
4242
generate-pip-lockfile:
43-
# Some service dependencies are linux-specific and the lockfile
44-
# would differ when generated on MacOS, so we generate the lockfile
45-
# on a linux image.
46-
docker run --rm -it \
47-
-v $(HAIL):/hail \
48-
python:3.7-slim \
49-
/bin/bash -c "pip install pip-tools && cd hail/docker && pip-compile --upgrade requirements.txt --output-file=linux-pinned-requirements.txt"
43+
HAIL_HAIL_DIR=$(HAIL) ../generate_pip_lockfile.sh requirements.txt linux-pinned-requirements.txt
5044

5145
.PHONY: clean
5246
clean:

generate_pip_lockfile.sh

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
reqs=$1
6+
pinned_reqs=$2
7+
8+
# Some service dependencies are linux-specific and the lockfile
9+
# would differ when generated on MacOS, so we generate the lockfile
10+
# on a linux image.
11+
if [[ "$(uname)" == 'Linux' ]]; then
12+
# `pip install pip-tools` on dataproc by default installs into the
13+
# user's local bin which is not on the PATH
14+
PATH="$PATH:$HOME/.local/bin" pip-compile --upgrade $reqs --output-file=$pinned_reqs
15+
else
16+
docker run --rm -it \
17+
-v ${HAIL_HAIL_DIR}:/hail \
18+
python:3.7-slim \
19+
/bin/bash -c "pip install pip-tools && cd /hail && pip-compile --upgrade $reqs --output-file=$pinned_reqs"
20+
fi

hail/Makefile

+21-17
Original file line numberDiff line numberDiff line change
@@ -268,14 +268,31 @@ resources := $(wildcard python/hailtop/hailctl/dataproc/resources/*)
268268
$(eval $(call ENV_VAR,cloud_base))
269269
$(eval $(call ENV_VAR,wheel_cloud_path))
270270

271+
272+
.PHONY: generate-pip-lockfiles
273+
generate-pip-lockfiles:
274+
HAIL_HAIL_DIR=$(HAIL_HAIL_DIR) \
275+
../generate_pip_lockfile.sh python/hailtop/requirements.txt python/hailtop/pinned-requirements.txt && \
276+
HAIL_HAIL_DIR=$(HAIL_HAIL_DIR) \
277+
../generate_pip_lockfile.sh python/requirements.txt python/pinned-requirements.txt && \
278+
HAIL_HAIL_DIR=$(HAIL_HAIL_DIR) \
279+
../generate_pip_lockfile.sh python/dev/requirements.txt python/dev/pinned-requirements.txt
280+
281+
282+
.PHONY: check-pip-lockfile
283+
check-pip-lockfile:
284+
$(PIP) install pip-tools && bash ../check_pip_requirements.sh python/requirements.txt python/pinned-requirements.txt
285+
286+
271287
python/hailtop/hailctl/deploy.yaml: env/cloud_base env/wheel_cloud_path
272-
python/hailtop/hailctl/deploy.yaml: $(resources) python/requirements.txt
288+
python/hailtop/hailctl/deploy.yaml: $(resources) check-pip-lockfile
273289
rm -f $@
274290
echo "dataproc:" >> $@
275291
for FILE in $(notdir $(resources)); do \
276292
echo " $$FILE: $(cloud_base)/$$FILE" >> $@ || exit 1; done
277293
echo " wheel: $(wheel_cloud_path)" >> $@
278-
echo " pip_dependencies: $(shell cat python/requirements.txt | sed '/^[[:blank:]]*#/d;s/#.*//' | grep -v pyspark | tr "\n" "|||")" >> $@
294+
printf " pip_dependencies: " >> $@
295+
cat python/pinned-requirements.txt | sed '/^[[:blank:]]*#/d;s/#.*//' | grep -v pyspark | tr "\n" "|||" | tr -d '[:space:]' >> $@
279296

280297
.PHONY: upload-artifacts
281298
upload-artifacts: $(WHEEL)
@@ -324,8 +341,8 @@ install: $(WHEEL)
324341
hailctl config set query/backend spark
325342

326343
.PHONY: install-on-cluster
327-
install-on-cluster: $(WHEEL)
328-
sed '/^pyspark/d' python/requirements.txt | grep -v '^#' | tr '\n' '\0' | xargs -0 $(PIP) install -U
344+
install-on-cluster: $(WHEEL) check-pip-lockfiles
345+
sed '/^pyspark/d' python/pinned-requirements.txt | grep -v -e '^[[:space:]]*#' -e '^$$' | tr '\n' '\0' | xargs -0 $(PIP) install -U
329346
-$(PIP) uninstall -y hail
330347
$(PIP) install $(WHEEL) --no-deps
331348
hailctl config set query/backend spark
@@ -372,19 +389,6 @@ install-dev-deps:
372389
install-deps: install-dev-deps
373390
sed "s/^pyspark.*/pyspark==$(SPARK_VERSION)/" python/requirements.txt | xargs $(PIP) install -U
374391

375-
.PHONY: generate-pip-lockfile
376-
generate-pip-lockfile:
377-
docker run --rm -it \
378-
-v $(HAIL_HAIL_DIR):/hail \
379-
python:3.7-slim \
380-
/bin/bash -c "pip install pip-tools && cd /hail && \
381-
pip-compile --upgrade python/hailtop/requirements.txt \
382-
--output-file=python/hailtop/pinned-requirements.txt && \
383-
pip-compile --upgrade python/requirements.txt \
384-
--output-file=python/pinned-requirements.txt && \
385-
pip-compile --upgrade python/dev/requirements.txt \
386-
--output-file=python/dev/pinned-requirements.txt"
387-
388392
.PHONY: benchmark
389393
benchmark: $(WHEEL)
390394
HAIL_WHEEL=../hail/$(WHEEL) HAIL_PIP_VERSION=$(HAIL_PIP_VERSION) $(MAKE) -C ../benchmark benchmark

hail/python/dev/pinned-requirements.txt

+25-27
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#
2-
# This file is autogenerated by pip-compile with python 3.7
3-
# To update, run:
2+
# This file is autogenerated by pip-compile with Python 3.7
3+
# by the following command:
44
#
55
# pip-compile --output-file=python/dev/pinned-requirements.txt python/dev/requirements.txt
66
#
@@ -50,13 +50,13 @@ backcall==0.2.0
5050
# via ipython
5151
beautifulsoup4==4.11.1
5252
# via nbconvert
53-
black==22.10.0
53+
black==22.12.0
5454
# via -r python/dev/requirements.txt
5555
bleach==5.0.1
5656
# via nbconvert
57-
boto3==1.26.20
57+
boto3==1.26.26
5858
# via -r python/dev/../hailtop/requirements.txt
59-
botocore==1.29.20
59+
botocore==1.29.26
6060
# via
6161
# -r python/dev/../hailtop/requirements.txt
6262
# boto3
@@ -124,7 +124,7 @@ frozenlist==1.3.3
124124
# aiosignal
125125
fswatch==0.1.1
126126
# via -r python/dev/requirements.txt
127-
google-api-core==2.10.2
127+
google-api-core==2.11.0
128128
# via
129129
# google-cloud-core
130130
# google-cloud-storage
@@ -136,7 +136,7 @@ google-auth==2.14.1
136136
# google-cloud-storage
137137
google-cloud-core==2.3.2
138138
# via google-cloud-storage
139-
google-cloud-storage==2.6.0
139+
google-cloud-storage==2.7.0
140140
# via -r python/dev/../hailtop/requirements.txt
141141
google-crc32c==1.5.0
142142
# via google-resumable-media
@@ -163,7 +163,7 @@ importlib-metadata==3.10.1
163163
# nbformat
164164
# pluggy
165165
# pytest
166-
importlib-resources==5.10.0
166+
importlib-resources==5.10.1
167167
# via jsonschema
168168
iniconfig==1.1.1
169169
# via pytest
@@ -185,7 +185,7 @@ ipython-genutils==0.2.0
185185
# nbclassic
186186
# notebook
187187
# qtconsole
188-
ipywidgets==8.0.2
188+
ipywidgets==8.0.3
189189
# via jupyter
190190
isodate==0.6.1
191191
# via msrest
@@ -214,7 +214,7 @@ jsonschema==4.17.3
214214
# via nbformat
215215
jupyter==1.0.0
216216
# via -r python/dev/requirements.txt
217-
jupyter-client==7.4.7
217+
jupyter-client==7.4.8
218218
# via
219219
# ipykernel
220220
# jupyter-console
@@ -241,7 +241,7 @@ jupyter-server==1.23.3
241241
# notebook-shim
242242
jupyterlab-pygments==0.2.2
243243
# via nbconvert
244-
jupyterlab-widgets==3.0.3
244+
jupyterlab-widgets==3.0.4
245245
# via ipywidgets
246246
lazy-object-proxy==1.8.0
247247
# via astroid
@@ -267,7 +267,7 @@ msal-extensions==1.0.0
267267
# via azure-identity
268268
msrest==0.7.1
269269
# via azure-storage-blob
270-
multidict==6.0.2
270+
multidict==6.0.3
271271
# via
272272
# aiohttp
273273
# yarl
@@ -281,7 +281,7 @@ nbclassic==0.4.8
281281
# via notebook
282282
nbclient==0.7.2
283283
# via nbconvert
284-
nbconvert==7.2.5
284+
nbconvert==7.2.6
285285
# via
286286
# jupyter
287287
# jupyter-server
@@ -311,9 +311,9 @@ notebook-shim==0.2.2
311311
# via nbclassic
312312
oauthlib==3.2.2
313313
# via requests-oauthlib
314-
orjson==3.8.2
314+
orjson==3.8.3
315315
# via -r python/dev/../hailtop/requirements.txt
316-
packaging==21.3
316+
packaging==22.0
317317
# via
318318
# ipykernel
319319
# jupyter-server
@@ -337,7 +337,7 @@ pickleshare==0.7.5
337337
# via ipython
338338
pkgutil-resolve-name==1.3.10
339339
# via jsonschema
340-
platformdirs==2.5.4
340+
platformdirs==2.6.0
341341
# via
342342
# black
343343
# pylint
@@ -350,7 +350,7 @@ prometheus-client==0.15.0
350350
# jupyter-server
351351
# nbclassic
352352
# notebook
353-
prompt-toolkit==3.0.33
353+
prompt-toolkit==3.0.36
354354
# via
355355
# ipython
356356
# jupyter-console
@@ -391,10 +391,8 @@ pyjwt[crypto]==2.6.0
391391
# via
392392
# -c python/dev/../requirements.txt
393393
# msal
394-
pylint==2.15.7
394+
pylint==2.15.8
395395
# via -r python/dev/requirements.txt
396-
pyparsing==3.0.9
397-
# via packaging
398396
pyrsistent==0.19.2
399397
# via jsonschema
400398
pytest==7.2.0
@@ -406,7 +404,7 @@ pytest==7.2.0
406404
# pytest-instafail
407405
# pytest-metadata
408406
# pytest-xdist
409-
pytest-asyncio==0.20.2
407+
pytest-asyncio==0.20.3
410408
# via -r python/dev/requirements.txt
411409
pytest-forked==1.4.0
412410
# via pytest-xdist
@@ -504,7 +502,7 @@ sphinxcontrib-serializinghtml==1.1.5
504502
# via sphinx
505503
tabulate==0.9.0
506504
# via -r python/dev/../hailtop/requirements.txt
507-
terminado==0.17.0
505+
terminado==0.17.1
508506
# via
509507
# jupyter-server
510508
# nbclassic
@@ -529,7 +527,7 @@ tornado==6.2
529527
# nbclassic
530528
# notebook
531529
# terminado
532-
traitlets==5.6.0
530+
traitlets==5.7.0
533531
# via
534532
# ipykernel
535533
# ipython
@@ -550,7 +548,7 @@ typed-ast==1.5.4
550548
# astroid
551549
# black
552550
# mypy
553-
types-chardet==5.0.4
551+
types-chardet==5.0.4.1
554552
# via -r python/dev/requirements.txt
555553
types-decorator==5.1.8.1
556554
# via -r python/dev/requirements.txt
@@ -564,7 +562,7 @@ types-pyyaml==6.0.12.2
564562
# via -r python/dev/requirements.txt
565563
types-requests==2.28.11.5
566564
# via -r python/dev/requirements.txt
567-
types-setuptools==65.6.0.1
565+
types-setuptools==65.6.0.2
568566
# via -r python/dev/requirements.txt
569567
types-six==1.16.21.4
570568
# via -r python/dev/requirements.txt
@@ -606,11 +604,11 @@ webencodings==0.5.1
606604
# tinycss2
607605
websocket-client==1.4.2
608606
# via jupyter-server
609-
widgetsnbextension==4.0.3
607+
widgetsnbextension==4.0.4
610608
# via ipywidgets
611609
wrapt==1.14.1
612610
# via astroid
613-
yarl==1.8.1
611+
yarl==1.8.2
614612
# via aiohttp
615613
zipp==3.11.0
616614
# via

hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py

-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ def mkdir_if_not_exists(path):
3838
'setuptools',
3939
'mkl<2020',
4040
'lxml<5',
41-
'google-cloud-storage==1.25.*',
4241
'https://github.com/hail-is/jgscm/archive/v0.1.12+hail.zip',
4342
'ipykernel==4.10.*',
4443
'ipywidgets==7.5.*',

0 commit comments

Comments
 (0)