From 99c8a6196861ec0dc10944e3c02730f694ae54e9 Mon Sep 17 00:00:00 2001 From: lopezp Date: Mon, 1 Jul 2024 09:41:27 +0200 Subject: [PATCH 1/9] upgrade to tensorflow 2.16; ensure it works with new Keras andupdated dependencies --- delft/sequenceLabelling/wrapper.py | 5 +++++ delft/textClassification/wrapper.py | 7 +++++++ delft/utilities/Transformer.py | 2 ++ requirements.txt | 16 ++++++++++------ setup.py | 18 +++++++++++------- 5 files changed, 35 insertions(+), 13 deletions(-) diff --git a/delft/sequenceLabelling/wrapper.py b/delft/sequenceLabelling/wrapper.py index f2b2d80..d1e652a 100644 --- a/delft/sequenceLabelling/wrapper.py +++ b/delft/sequenceLabelling/wrapper.py @@ -2,6 +2,11 @@ from packaging import version +# for using legacy Keras 2, and not Keras 3 installed by default from TensorFlow 2.16 +os.environ["TF_USE_LEGACY_KERAS"] = "1" +os.environ["KERAS_BACKEND"] = "tensorflow" +import tf_keras as keras + # ask tensorflow to be quiet and not print hundred lines of logs from delft.utilities.Transformer import TRANSFORMER_CONFIG_FILE_NAME, DEFAULT_TRANSFORMER_TOKENIZER_DIR from delft.utilities.misc import print_parameters diff --git a/delft/textClassification/wrapper.py b/delft/textClassification/wrapper.py index cc8127e..0c25a69 100644 --- a/delft/textClassification/wrapper.py +++ b/delft/textClassification/wrapper.py @@ -1,5 +1,12 @@ import os +from packaging import version + +# for using legacy Keras 2, and not Keras 3 installed by default from TensorFlow 2.16 +os.environ["TF_USE_LEGACY_KERAS"] = "1" +os.environ["KERAS_BACKEND"] = "tensorflow" +import tf_keras as keras + from delft.sequenceLabelling.trainer import LogLearningRateCallback # ask tensorflow to be quiet and not print hundred lines of logs from delft.utilities.misc import print_parameters diff --git a/delft/utilities/Transformer.py b/delft/utilities/Transformer.py index 24e3a74..fe75195 100644 --- a/delft/utilities/Transformer.py +++ b/delft/utilities/Transformer.py @@ -1,6 +1,8 @@ import os from typing import Union, Iterable +os.environ["KERAS_BACKEND"] = "tensorflow" + from transformers import AutoTokenizer, TFAutoModel, AutoConfig, BertTokenizer, TFBertModel TRANSFORMER_CONFIG_FILE_NAME = 'transformer-config.json' diff --git a/requirements.txt b/requirements.txt index c45c129..cd56dd6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,11 @@ -numpy==1.22.3 +numpy==1.23.5 regex==2021.11.10 -scikit-learn==1.0.1 +scikit-learn==1.1 tqdm==4.62.3 -tensorflow==2.9.3 -h5py==3.6.0 +#tensorflow==2.16.1 +tensorflow[and-cuda]==2.16.1 +tf_keras==2.16.0 +h5py==3.10.0 unidecode==1.3.2 pydot==1.4.0 lmdb==1.2.1 @@ -11,8 +13,10 @@ truecase requests>=2.20 pandas==1.3.5 transformers==4.33.2 -torch==1.10.1 +#transformers==4.40.0 +torch==1.11.0 pytest -tensorflow-addons==0.19.0 +#tensorflow-addons==0.19.0 +tfa-nightly blingfire==0.1.8 accelerate>=0.20.3 diff --git a/setup.py b/setup.py index ffdfb58..d46133f 100644 --- a/setup.py +++ b/setup.py @@ -13,22 +13,26 @@ include_package_data=True, python_requires='>=3.7', install_requires=[ - 'numpy==1.22.3', + 'numpy==1.23.5', 'regex==2021.11.10', - 'scikit-learn==1.0.1', + 'scikit-learn==1.1', 'tqdm==4.62.3', - 'tensorflow==2.9.3', - 'h5py==3.6.0', + #'tensorflow==2.16.1', + 'tensorflow[and-cuda]==2.16.1', + 'tf_keras==2.16.0', + 'h5py==3.10.0', 'unidecode==1.3.2', 'pydot==1.4.0', 'lmdb==1.2.1', - 'transformers==4.33.2', - 'torch==1.10.1', + #'transformers==4.33.2', + 'transformers==4.40.0', + 'torch==1.11.0', 'truecase', 'requests>=2.20', 'pandas==1.3.5', 'pytest', - 'tensorflow-addons==0.19.0', + #'tensorflow-addons==0.19.0', + 'tfa-nightly', 'accelerate>=0.20.3' ], classifiers=[ From c66c961cb58990cc3adb2ad85553a08694c195a2 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 8 Jan 2025 21:48:50 +0100 Subject: [PATCH 2/9] update dependencies, create a requirements for macos --- requirements.macos.txt | 18 ++++++++++++++++++ requirements.txt | 5 +---- 2 files changed, 19 insertions(+), 4 deletions(-) create mode 100644 requirements.macos.txt diff --git a/requirements.macos.txt b/requirements.macos.txt new file mode 100644 index 0000000..3b5568f --- /dev/null +++ b/requirements.macos.txt @@ -0,0 +1,18 @@ +numpy==1.23.5 +regex==2021.11.10 +scikit-learn==1.1 +tqdm==4.62.3 +tensorflow==2.17.1 +tf_keras==2.17.0 +h5py==3.10.0 +unidecode==1.3.2 +pydot==1.4.0 +lmdb==1.2.1 +truecase +requests>=2.20 +pandas==1.3.5 +transformers==4.46.1 +pytest +#tensorflow-addons==0.19.0 +tfa-nightly +accelerate>=0.20.3 diff --git a/requirements.txt b/requirements.txt index cd56dd6..f6bb2cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ numpy==1.23.5 regex==2021.11.10 scikit-learn==1.1 -tqdm==4.62.3 -#tensorflow==2.16.1 +tqdm==4.66.3 tensorflow[and-cuda]==2.16.1 tf_keras==2.16.0 h5py==3.10.0 @@ -14,9 +13,7 @@ requests>=2.20 pandas==1.3.5 transformers==4.33.2 #transformers==4.40.0 -torch==1.11.0 pytest #tensorflow-addons==0.19.0 tfa-nightly -blingfire==0.1.8 accelerate>=0.20.3 From 77872d7e6677a2dca524625b033dbb6a9322d81d Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 8 Jan 2025 21:49:23 +0100 Subject: [PATCH 3/9] fix compatibility with higher version of transformers --- delft/sequenceLabelling/preprocess.py | 10 ++++++++-- delft/sequenceLabelling/trainer.py | 4 ++-- delft/utilities/Transformer.py | 7 +------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/delft/sequenceLabelling/preprocess.py b/delft/sequenceLabelling/preprocess.py index 2f33e52..cd60dd6 100644 --- a/delft/sequenceLabelling/preprocess.py +++ b/delft/sequenceLabelling/preprocess.py @@ -321,8 +321,14 @@ def convert_single_text(self, text_tokens, chars_tokens, features_tokens, label_ chars_tokens.append(self.empty_char_vector) # sub-tokenization - encoded_result = self.tokenizer(text_tokens, add_special_tokens=True, is_split_into_words=True, - max_length=max_seq_length, truncation=True, return_offsets_mapping=True) + encoded_result = self.tokenizer( + text_tokens, + add_special_tokens=True, + is_split_into_words=True, + max_length=max_seq_length, + truncation=True, + return_offsets_mapping=True + ) input_ids = encoded_result.input_ids offsets = encoded_result.offset_mapping diff --git a/delft/sequenceLabelling/trainer.py b/delft/sequenceLabelling/trainer.py index 6c44455..08417f8 100644 --- a/delft/sequenceLabelling/trainer.py +++ b/delft/sequenceLabelling/trainer.py @@ -168,8 +168,8 @@ def train_model(self, local_model, x_train, y_train, f_train=None, # multiple workers should work with transformer layers, but not with ELMo due to GPU memory limit (with GTX 1080Ti 11GB) if self.model_config.transformer_name is not None or (self.embeddings and self.embeddings.use_ELMo): - # worker at 0 means the training will be executed in the main thread - nb_workers = 0 + # worker at 1 means the training will be executed in the main thread + nb_workers = 1 multiprocessing = False local_model.fit(training_generator, diff --git a/delft/utilities/Transformer.py b/delft/utilities/Transformer.py index fe75195..4b58f05 100644 --- a/delft/utilities/Transformer.py +++ b/delft/utilities/Transformer.py @@ -128,35 +128,30 @@ def init_preprocessor(self, max_sequence_length: int, do_lower_case = False if do_lower_case is not None: - if self.auth_token != None: + if self.auth_token is not None: self.tokenizer = AutoTokenizer.from_pretrained(self.name, - add_special_tokens=add_special_tokens, max_length=max_sequence_length, add_prefix_space=add_prefix_space, do_lower_case=do_lower_case, use_auth_token=self.auth_token) else: self.tokenizer = AutoTokenizer.from_pretrained(self.name, - add_special_tokens=add_special_tokens, max_length=max_sequence_length, add_prefix_space=add_prefix_space, do_lower_case=do_lower_case) else: if self.auth_token != None: self.tokenizer = AutoTokenizer.from_pretrained(self.name, - add_special_tokens=add_special_tokens, max_length=max_sequence_length, add_prefix_space=add_prefix_space, use_auth_token=self.auth_token) else: self.tokenizer = AutoTokenizer.from_pretrained(self.name, - add_special_tokens=add_special_tokens, max_length=max_sequence_length, add_prefix_space=add_prefix_space) elif self.loading_method == LOADING_METHOD_LOCAL_MODEL_DIR: self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir_path, - add_special_tokens=add_special_tokens, max_length=max_sequence_length, add_prefix_space=add_prefix_space) elif self.loading_method == LOADING_METHOD_PLAIN_MODEL: From a8567024ddcbdc8ec4d072b418c77bdbdb003865 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 8 Jan 2025 22:02:57 +0100 Subject: [PATCH 4/9] support more python versions --- .github/workflows/ci-build-unstable.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml index f9ea863..e9b830e 100644 --- a/.github/workflows/ci-build-unstable.yml +++ b/.github/workflows/ci-build-unstable.yml @@ -4,7 +4,7 @@ on: [ push ] concurrency: group: build -# cancel-in-progress: true + cancel-in-progress: true jobs: @@ -12,13 +12,13 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ 3.8, 3.9 ] + python-version: [ 3.8, 3.9, 3.10, 3.11 ] steps: - name: Cleanup more disk space run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" - uses: actions/checkout@v2 - - name: Set up Python 3.8 - uses: actions/setup-python@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' From 9ef0bc39aba4ed4bbcde2326dbf5dd390e9ac79b Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 8 Jan 2025 22:04:21 +0100 Subject: [PATCH 5/9] fix python versions --- .github/workflows/ci-build-unstable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml index e9b830e..032d9de 100644 --- a/.github/workflows/ci-build-unstable.yml +++ b/.github/workflows/ci-build-unstable.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ 3.8, 3.9, 3.10, 3.11 ] + python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] steps: - name: Cleanup more disk space run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" From 185b56781a6f312cd62f59972c5868211e91ed92 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 8 Jan 2025 22:06:47 +0100 Subject: [PATCH 6/9] remove 3.8 --- .github/workflows/ci-build-unstable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml index 032d9de..2432e73 100644 --- a/.github/workflows/ci-build-unstable.yml +++ b/.github/workflows/ci-build-unstable.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.8', '3.9', '3.10', '3.11', '3.12' ] + python-version: [ '3.9', '3.10', '3.11', '3.12' ] steps: - name: Cleanup more disk space run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" From 6d12f7652b015a76d0b975d6a68e79f00ff44285 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Wed, 8 Jan 2025 22:09:32 +0100 Subject: [PATCH 7/9] remove 3.12 --- .github/workflows/ci-build-unstable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml index 2432e73..b40049f 100644 --- a/.github/workflows/ci-build-unstable.yml +++ b/.github/workflows/ci-build-unstable.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10', '3.11', '3.12' ] + python-version: [ '3.9', '3.10', '3.11' ] steps: - name: Cleanup more disk space run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" From dc6391fedb26937f68247e8d7eb700ece42c3d7e Mon Sep 17 00:00:00 2001 From: lopezp Date: Fri, 28 Feb 2025 15:36:33 +0100 Subject: [PATCH 8/9] update with recent dependencies and tf --- requirements.txt | 19 ++++++++++--------- setup.py | 23 ----------------------- 2 files changed, 10 insertions(+), 32 deletions(-) diff --git a/requirements.txt b/requirements.txt index cd56dd6..30ad744 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,22 +1,23 @@ -numpy==1.23.5 -regex==2021.11.10 -scikit-learn==1.1 +numpy==1.26.4 +regex +scikit-learn==1.6.1 tqdm==4.62.3 #tensorflow==2.16.1 -tensorflow[and-cuda]==2.16.1 -tf_keras==2.16.0 -h5py==3.10.0 +tensorflow[and-cuda]==2.17.1 +tf_keras==2.17.0 +h5py==3.11.0 unidecode==1.3.2 pydot==1.4.0 -lmdb==1.2.1 +lmdb truecase requests>=2.20 pandas==1.3.5 transformers==4.33.2 #transformers==4.40.0 -torch==1.11.0 +torch pytest #tensorflow-addons==0.19.0 -tfa-nightly +tfa-nightly==0.23.0.dev20240415222534 blingfire==0.1.8 accelerate>=0.20.3 +Pillow diff --git a/setup.py b/setup.py index d46133f..7cfc0b4 100644 --- a/setup.py +++ b/setup.py @@ -12,29 +12,6 @@ packages=find_packages(exclude=['test', '*.test', '*.test.*']), include_package_data=True, python_requires='>=3.7', - install_requires=[ - 'numpy==1.23.5', - 'regex==2021.11.10', - 'scikit-learn==1.1', - 'tqdm==4.62.3', - #'tensorflow==2.16.1', - 'tensorflow[and-cuda]==2.16.1', - 'tf_keras==2.16.0', - 'h5py==3.10.0', - 'unidecode==1.3.2', - 'pydot==1.4.0', - 'lmdb==1.2.1', - #'transformers==4.33.2', - 'transformers==4.40.0', - 'torch==1.11.0', - 'truecase', - 'requests>=2.20', - 'pandas==1.3.5', - 'pytest', - #'tensorflow-addons==0.19.0', - 'tfa-nightly', - 'accelerate>=0.20.3' - ], classifiers=[ "Programming Language :: Python :: 3.8", "License :: OSI Approved :: Apache Software License", From 32367a150cec535b65f5c7c4eebcd3d822c8284b Mon Sep 17 00:00:00 2001 From: lopezp Date: Fri, 28 Feb 2025 15:48:52 +0100 Subject: [PATCH 9/9] start a new branch for new tf version --- .github/workflows/ci-build-unstable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-build-unstable.yml b/.github/workflows/ci-build-unstable.yml index b40049f..6fe8bf2 100644 --- a/.github/workflows/ci-build-unstable.yml +++ b/.github/workflows/ci-build-unstable.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.9', '3.10', '3.11' ] + python-version: [ '3.10', '3.11' ] steps: - name: Cleanup more disk space run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"