Skip to content

Commit

Permalink
Merge pull request #7 from ScaDS/hf-ft
Browse files Browse the repository at this point in the history
Fine-tuning text generation models locally using Huggingface
  • Loading branch information
haesleinhuepf authored Jul 29, 2024
2 parents e775a69 + 5614507 commit b3b7756
Show file tree
Hide file tree
Showing 9 changed files with 1,807 additions and 63 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ docs/29_algorithm_validation/ideas.ipynb
docs/29_algorithm_validation/solution for exercise - metrics to investigate segmentation results.ipynb
docs/22_feature_extraction/blobs_analysis.csv
data/S-BIAD634
docs/71_fine_tuning_hf/haesleinhuepf
3 changes: 3 additions & 0 deletions docs/00_setup/environment-gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,7 @@ dependencies:
- llama-index
- google-generativeai
- pygithub
- bitsandbytes>=0.43.2
- peft
- trl
prefix: C:\Users\haase\miniconda3\envs\genai-gpu
131 changes: 69 additions & 62 deletions docs/00_setup/environment.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: genai2
name: genai-gpu
channels:
- pytorch
- nvidia
- conda-forge
- defaults
dependencies:
- _libavif_api=1.1.0=h57928b3_0
- accelerate=0.32.1=pyhd8ed1ab_0
- accelerate=0.33.0=pyhd8ed1ab_0
- aiohttp=3.9.5=py311ha68e1ae_0
- aiosignal=1.3.1=pyhd8ed1ab_0
- annotated-types=0.7.0=pyhd8ed1ab_0
Expand Down Expand Up @@ -35,20 +35,19 @@ dependencies:
- aws-sdk-cpp=1.11.329=he0aa860_9
- babel=2.14.0=pyhd8ed1ab_0
- beautifulsoup4=4.12.3=pyha770c72_0
- blas=2.112=mkl
- blas-devel=3.9.0=12_win64_mkl
- blas=1.0=mkl
- bleach=6.1.0=pyhd8ed1ab_0
- blosc=1.21.6=h85f69ea_0
- brotli=1.1.0=hcfcfb64_1
- brotli-bin=1.1.0=hcfcfb64_1
- brotli-python=1.1.0=py311h12c1d0e_1
- bzip2=1.0.8=h2466b09_7
- c-ares=1.32.2=h2466b09_0
- c-ares=1.32.3=h2466b09_0
- c-blosc2=2.15.0=hb461149_1
- ca-certificates=2024.7.4=h56e8100_0
- cached-property=1.5.2=hd8ed1ab_1
- cached_property=1.5.2=pyha770c72_1
- certifi=2024.7.4=py311haa95532_0
- certifi=2024.7.4=pyhd8ed1ab_0
- cffi=1.16.0=py311ha68e1ae_0
- charls=2.4.2=h1537add_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
Expand Down Expand Up @@ -92,13 +91,13 @@ dependencies:
- hpack=4.0.0=pyh9f0ad1d_0
- httpcore=1.0.5=pyhd8ed1ab_0
- httpx=0.27.0=pyhd8ed1ab_0
- huggingface_hub=0.23.4=pyhd8ed1ab_0
- huggingface_hub=0.24.2=pyhd8ed1ab_0
- hyperframe=6.0.1=pyhd8ed1ab_0
- idna=3.7=pyhd8ed1ab_0
- imagecodecs=2024.6.1=py311hd929db6_2
- imageio=2.34.2=pyh12aca89_0
- importlib-metadata=8.0.0=pyha770c72_0
- importlib_metadata=8.0.0=hd8ed1ab_0
- importlib-metadata=8.2.0=pyha770c72_0
- importlib_metadata=8.2.0=hd8ed1ab_0
- importlib_resources=6.4.0=pyhd8ed1ab_0
- intel-openmp=2024.2.0=h57928b3_980
- ipycanvas=0.13.2=pyhd8ed1ab_0
Expand All @@ -112,7 +111,7 @@ dependencies:
- jedi=0.19.1=pyhd8ed1ab_0
- jinja2=3.1.4=pyhd8ed1ab_0
- jiter=0.5.0=py311h533ab2d_0
- joblib=1.4.2=py311haa95532_0
- joblib=1.4.2=pyhd8ed1ab_0
- json5=0.9.25=pyhd8ed1ab_0
- jsonpatch=1.33=pyhd8ed1ab_0
- jsonpointer=3.0.0=py311h1ea47a8_0
Expand All @@ -125,40 +124,40 @@ dependencies:
- jupyter_events=0.10.0=pyhd8ed1ab_0
- jupyter_server=2.14.2=pyhd8ed1ab_0
- jupyter_server_terminals=0.5.3=pyhd8ed1ab_0
- jupyterlab=4.2.3=pyhd8ed1ab_0
- jupyterlab=4.2.4=pyhd8ed1ab_0
- jupyterlab_pygments=0.3.0=pyhd8ed1ab_1
- jupyterlab_server=2.27.3=pyhd8ed1ab_0
- jupyterlab_widgets=3.0.11=pyhd8ed1ab_0
- jxrlib=1.1=hcfcfb64_3
- kiwisolver=1.4.5=py311h005e61a_1
- krb5=1.21.3=hdf4eb48_0
- langchain=0.2.8=pyhd8ed1ab_0
- langchain-core=0.2.21=pyhd8ed1ab_0
- langchain=0.2.11=pyhd8ed1ab_0
- langchain-core=0.2.24=pyhd8ed1ab_0
- langchain-text-splitters=0.2.2=pyhd8ed1ab_0
- langsmith=0.1.90=pyhd8ed1ab_0
- langsmith=0.1.93=pyhd8ed1ab_0
- lazy_loader=0.4=pyhd8ed1ab_0
- lcms2=2.16=h67d730c_0
- lerc=4.0.0=h63175ca_0
- libabseil=20240116.2=cxx17_he0c23c2_1
- libaec=1.1.3=h63175ca_0
- libarrow=16.1.0=h11e6a32_14_cpu
- libarrow-acero=16.1.0=he0c23c2_14_cpu
- libarrow-dataset=16.1.0=he0c23c2_14_cpu
- libarrow-substrait=16.1.0=h1f0e801_14_cpu
- libarrow=17.0.0=h11e6a32_2_cpu
- libarrow-acero=17.0.0=he0c23c2_2_cpu
- libarrow-dataset=17.0.0=he0c23c2_2_cpu
- libarrow-substrait=17.0.0=h1f0e801_2_cpu
- libavif16=1.1.0=hf4f7b25_0
- libblas=3.9.0=12_win64_mkl
- libblas=3.9.0=1_h8933c1f_netlib
- libbrotlicommon=1.1.0=hcfcfb64_1
- libbrotlidec=1.1.0=hcfcfb64_1
- libbrotlienc=1.1.0=hcfcfb64_1
- libcblas=3.9.0=12_win64_mkl
- libcblas=3.9.0=5_hd5c7e75_netlib
- libcrc32c=1.1.2=h0e60522_0
- libcublas=11.11.3.6=0
- libcublas-dev=11.11.3.6=0
- libcufft=10.9.0.58=0
- libcufft-dev=10.9.0.58=0
- libcurand=10.3.5.147=0
- libcurand-dev=10.3.5.147=0
- libcurl=8.8.0=hd5e4a3a_1
- libcurl=8.9.0=h18fefc2_0
- libcusolver=11.4.1.48=0
- libcusolver-dev=11.4.1.48=0
- libcusparse=11.7.5.86=0
Expand All @@ -173,13 +172,12 @@ dependencies:
- libhwloc=2.11.1=default_h8125262_1000
- libiconv=1.17=hcfcfb64_2
- libjpeg-turbo=3.0.0=hcfcfb64_1
- liblapack=3.9.0=12_win64_mkl
- liblapacke=3.9.0=12_win64_mkl
- liblapack=3.9.0=5_hd5c7e75_netlib
- libnpp=11.8.0.86=0
- libnpp-dev=11.8.0.86=0
- libnvjpeg=11.9.0.86=0
- libnvjpeg-dev=11.9.0.86=0
- libparquet=16.1.0=h178134c_14_cpu
- libparquet=17.0.0=h178134c_2_cpu
- libpng=1.6.43=h19919ed_0
- libprotobuf=4.25.3=h503648d_0
- libre2-11=2023.09.01=hf8d8778_2
Expand All @@ -195,7 +193,7 @@ dependencies:
- libxml2=2.12.7=h0f24e4e_4
- libzlib=1.3.1=h2466b09_1
- libzopfli=1.0.3=h0e60522_0
- lightning-utilities=0.11.5=pyhd8ed1ab_0
- lightning-utilities=0.11.6=pyhd8ed1ab_0
- lz4-c=1.9.4=hcfcfb64_0
- m2w64-gcc-libgfortran=5.3.0=6
- m2w64-gcc-libs=5.3.0=7
Expand All @@ -206,9 +204,7 @@ dependencies:
- matplotlib-base=3.9.1=py311h8f1b1e4_0
- matplotlib-inline=0.1.7=pyhd8ed1ab_0
- mistune=3.0.2=pyhd8ed1ab_0
- mkl=2021.4.0=h0e2418a_729
- mkl-devel=2021.4.0=h57928b3_730
- mkl-include=2021.4.0=h0e2418a_729
- mkl=2023.1.0=h6a75c08_48682
- mpmath=1.3.0=pyhd8ed1ab_0
- msys2-conda-epoch=20160418=1
- multidict=6.0.5=py311ha68e1ae_0
Expand All @@ -223,17 +219,18 @@ dependencies:
- networkx=3.3=pyhd8ed1ab_1
- notebook-shim=0.2.4=pyhd8ed1ab_0
- numpy=1.26.4=py311h0b4df5a_0
- openai=1.35.14=pyhd8ed1ab_0
- openai=1.37.1=pyhd8ed1ab_0
- openjpeg=2.5.2=h3d672ee_0
- openssl=3.3.1=h2466b09_2
- orc=2.0.1=h7e885a9_1
- orjson=3.10.6=py311h633b200_0
- overrides=7.7.0=pyhd8ed1ab_0
- packaging=23.2=pyhd8ed1ab_0
- pandas=2.2.2=py311hcf9f919_1
- pandoc=3.2.1=h57928b3_0
- pandoc=3.3=h57928b3_0
- pandocfilters=1.5.0=pyhd8ed1ab_0
- parso=0.8.4=pyhd8ed1ab_0
- patsy=0.5.6=pyhd8ed1ab_0
- pickleshare=0.7.5=py_1003
- pillow=10.4.0=py311h5592be9_0
- pip=24.0=pyhd8ed1ab_0
Expand All @@ -244,9 +241,9 @@ dependencies:
- psutil=6.0.0=py311he736701_0
- pthread-stubs=0.4=hcd874cb_1001
- pthreads-win32=2.9.1=hfa6e2cd_3
- pure_eval=0.2.2=pyhd8ed1ab_0
- pyarrow=16.1.0=py311h06a5be4_4
- pyarrow-core=16.1.0=py311hf9a78b3_4_cpu
- pure_eval=0.2.3=pyhd8ed1ab_0
- pyarrow=17.0.0=py311h06a5be4_0
- pyarrow-core=17.0.0=py311hf9a78b3_0_cpu
- pyarrow-hotfix=0.6=pyhd8ed1ab_0
- pycparser=2.22=pyhd8ed1ab_0
- pydantic=2.8.2=pyhd8ed1ab_0
Expand All @@ -261,7 +258,7 @@ dependencies:
- python-tzdata=2024.1=pyhd8ed1ab_0
- python-xxhash=3.4.1=py311ha68e1ae_0
- python_abi=3.11=4_cp311
- pytorch=2.3.1=py3.11_cuda11.8_cudnn8_0
- pytorch=2.4.0=py3.11_cuda11.8_cudnn9_0
- pytorch-cuda=11.8=h24eeafa_5
- pytorch-mutex=1.0=cuda
- pytz=2024.1=pyhd8ed1ab_0
Expand All @@ -274,32 +271,34 @@ dependencies:
- rav1e=0.6.6=h975169c_2
- re2=2023.09.01=hd3b24a8_2
- referencing=0.35.1=pyhd8ed1ab_0
- regex=2024.5.15=py311he736701_0
- regex=2024.7.24=py311he736701_0
- requests=2.32.3=pyhd8ed1ab_0
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
- rpds-py=0.19.0=py311h533ab2d_0
- rpds-py=0.19.1=py311h533ab2d_0
- safetensors=0.4.3=py311hc37eb10_0
- scikit-image=0.24.0=py311hcf9f919_1
- scikit-learn=1.4.2=py311hf62ec03_1
- scikit-learn=1.5.1=py311hdcb8d17_0
- scipy=1.14.0=py311hd4686c6_1
- seaborn=0.13.2=py311haa95532_0
- seaborn=0.13.2=hd8ed1ab_2
- seaborn-base=0.13.2=pyhd8ed1ab_2
- send2trash=1.8.3=pyh5737063_0
- setuptools=71.0.1=pyhd8ed1ab_0
- setuptools=71.0.4=pyhd8ed1ab_0
- six=1.16.0=pyh6c4a22f_0
- snappy=1.2.1=h23299a8_0
- sniffio=1.3.1=pyhd8ed1ab_0
- soupsieve=2.5=pyhd8ed1ab_1
- sqlalchemy=2.0.31=py311he736701_0
- stack_data=0.6.2=pyhd8ed1ab_0
- stackview=0.8.0=pyhd8ed1ab_0
- statsmodels=0.14.2=py311h0a17f05_0
- svt-av1=2.1.2=he0c23c2_0
- sympy=1.13.0=pyh04b8f61_3
- tbb=2021.12.0=hc790b64_3
- tenacity=8.5.0=pyhd8ed1ab_0
- terminado=0.18.1=pyh5737063_0
- threadpoolctl=3.5.0=py311h746a85d_0
- tifffile=2024.7.2=pyhd8ed1ab_0
- threadpoolctl=3.5.0=pyhc1e730c_0
- tifffile=2024.7.24=pyhd8ed1ab_0
- tinycss2=1.3.0=pyhd8ed1ab_0
- tk=8.6.13=h5226925_1
- tokenizers=0.19.1=py311h91c4a10_0
Expand All @@ -309,7 +308,7 @@ dependencies:
- tornado=6.4.1=py311he736701_0
- tqdm=4.66.4=pyhd8ed1ab_0
- traitlets=5.14.3=pyhd8ed1ab_0
- transformers=4.42.4=pyhd8ed1ab_0
- transformers=4.43.3=pyhd8ed1ab_0
- types-python-dateutil=2.9.0.20240316=pyhd8ed1ab_0
- typing-extensions=4.12.2=hd8ed1ab_0
- typing_extensions=4.12.2=pyha770c72_0
Expand Down Expand Up @@ -344,50 +343,55 @@ dependencies:
- zstandard=0.23.0=py311h53056dc_0
- zstd=1.5.6=h0ea2cb4_0
- pip:
- bia-bob==0.20.0
- bia-bob==0.21.1
- bitsandbytes==0.43.2
- blablado==0.1.3
- cachetools==5.4.0
- click==8.1.7
- cryptography==43.0.0
- dataclasses-json==0.6.7
- deprecated==1.2.14
- dirtyjson==1.0.8
- docstring-parser==0.16
- google-ai-generativelanguage==0.6.6
- google-api-core==2.19.1
- google-api-python-client==2.137.0
- google-api-python-client==2.138.0
- google-auth==2.32.0
- google-auth-httplib2==0.2.0
- google-generativeai==0.7.2
- googleapis-common-protos==1.63.2
- grpcio==1.65.1
- grpcio-status==1.62.2
- gtts==2.5.1
- gtts==2.5.2
- httplib2==0.22.0
- ipynbname==2024.1.0.0
- langchain-openai==0.1.17
- llama-cloud==0.0.9
- llama-index==0.10.55
- llama-index-agent-openai==0.2.8
- llama-index-cli==0.1.12
- llama-index-core==0.10.55
- llama-index-embeddings-openai==0.1.10
- llama-index-indices-managed-llama-cloud==0.2.5
- langchain-openai==0.1.19
- llama-cloud==0.0.11
- llama-index==0.10.58
- llama-index-agent-openai==0.2.9
- llama-index-cli==0.1.13
- llama-index-core==0.10.58
- llama-index-embeddings-openai==0.1.11
- llama-index-indices-managed-llama-cloud==0.2.7
- llama-index-legacy==0.9.48
- llama-index-llms-openai==0.1.25
- llama-index-multi-modal-llms-openai==0.1.7
- llama-index-program-openai==0.1.6
- llama-index-llms-openai==0.1.27
- llama-index-multi-modal-llms-openai==0.1.8
- llama-index-program-openai==0.1.7
- llama-index-question-gen-openai==0.1.3
- llama-index-readers-file==0.1.30
- llama-index-readers-file==0.1.31
- llama-index-readers-llama-parse==0.1.6
- llama-parse==0.4.9
- lxml==5.2.2
- markdown-it-py==3.0.0
- marshmallow==3.21.3
- mdurl==0.1.2
- metakernel==0.30.2
- mypy-extensions==1.0.0
- nltk==3.8.1
- peft==0.12.0
- pexpect==4.9.0
- proto-plus==1.24.0
- protobuf==4.25.3
- protobuf==4.25.4
- ptyprocess==0.7.0
- pyasn1==0.6.0
- pyasn1-modules==0.4.0
Expand All @@ -396,16 +400,19 @@ dependencies:
- pygithub==2.3.0
- pyjwt==2.8.0
- pynacl==1.5.0
- pypdf==4.3.0
- pypdf==4.3.1
- python-pptx==0.6.23
- rsa==4.9
- shtab==1.7.1
- speechrecognition==3.10.4
- striprtf==0.0.26
- tiktoken==0.7.0
- torchaudio==2.3.1
- torchvision==0.18.1
- torchaudio==2.4.0
- torchvision==0.19.0
- trl==0.9.6
- typing-inspect==0.9.0
- tyro==0.8.5
- uritemplate==4.1.1
- wrapt==1.16.0
- xlsxwriter==3.2.0
prefix: C:\Users\haase\miniconda3\envs\genai2
prefix: C:\Users\haase\miniconda3\envs\genai-gpu
Loading

0 comments on commit b3b7756

Please sign in to comment.