Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to tensorflow 2.16 and Keras 3.0 #170

Closed
wants to merge 10 commits into from
Closed
8 changes: 4 additions & 4 deletions .github/workflows/ci-build-unstable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@ on: [ push ]

concurrency:
group: build
# cancel-in-progress: true
cancel-in-progress: true


jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ 3.8, 3.9 ]
python-version: [ '3.9', '3.10', '3.11' ]
steps:
- name: Cleanup more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
Expand Down
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@

END OF TERMS AND CONDITIONS

Copyright [2018-2023] the DeLFT contributors
Copyright [2018-2025] the DeLFT contributors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
4 changes: 2 additions & 2 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ cd delft
It is advised to setup first a virtual environment to avoid falling into one of these gloomy python dependency marshlands:

```sh
virtualenv --system-site-packages -p python3.8 env
virtualenv --system-site-packages -p python3.10 env
source env/bin/activate
```

Expand Down Expand Up @@ -99,7 +99,7 @@ If you want to this work, please refer to the present GitHub project, together w
title = {DeLFT},
howpublished = {\url{https://github.com/kermitt2/delft}},
publisher = {GitHub},
year = {2018--2024},
year = {2018--2025},
archivePrefix = {swh},
eprint = {1:dir:54eb292e1c0af764e27dd179596f64679e44d06e}
}
Expand Down
2 changes: 1 addition & 1 deletion classifiers.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.10
License :: OSI Approved :: Apache Software License
Operating System :: OS Independent
10 changes: 8 additions & 2 deletions delft/sequenceLabelling/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,14 @@ def convert_single_text(self, text_tokens, chars_tokens, features_tokens, label_
chars_tokens.append(self.empty_char_vector)

# sub-tokenization
encoded_result = self.tokenizer(text_tokens, add_special_tokens=True, is_split_into_words=True,
max_length=max_seq_length, truncation=True, return_offsets_mapping=True)
encoded_result = self.tokenizer(
text_tokens,
add_special_tokens=True,
is_split_into_words=True,
max_length=max_seq_length,
truncation=True,
return_offsets_mapping=True
)

input_ids = encoded_result.input_ids
offsets = encoded_result.offset_mapping
Expand Down
4 changes: 2 additions & 2 deletions delft/sequenceLabelling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ def train_model(self, local_model, x_train, y_train, f_train=None,

# multiple workers should work with transformer layers, but not with ELMo due to GPU memory limit (with GTX 1080Ti 11GB)
if self.model_config.transformer_name is not None or (self.embeddings and self.embeddings.use_ELMo):
# worker at 0 means the training will be executed in the main thread
nb_workers = 0
# worker at 1 means the training will be executed in the main thread
nb_workers = 1
multiprocessing = False

local_model.fit(training_generator,
Expand Down
5 changes: 5 additions & 0 deletions delft/sequenceLabelling/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

from packaging import version

# for using legacy Keras 2, and not Keras 3 installed by default from TensorFlow 2.16
os.environ["TF_USE_LEGACY_KERAS"] = "1"
os.environ["KERAS_BACKEND"] = "tensorflow"
import tf_keras as keras

# ask tensorflow to be quiet and not print hundred lines of logs
from delft.utilities.Transformer import TRANSFORMER_CONFIG_FILE_NAME, DEFAULT_TRANSFORMER_TOKENIZER_DIR
from delft.utilities.misc import print_parameters
Expand Down
7 changes: 7 additions & 0 deletions delft/textClassification/wrapper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
import os

from packaging import version

# for using legacy Keras 2, and not Keras 3 installed by default from TensorFlow 2.16
os.environ["TF_USE_LEGACY_KERAS"] = "1"
os.environ["KERAS_BACKEND"] = "tensorflow"
import tf_keras as keras

from delft.sequenceLabelling.trainer import LogLearningRateCallback
# ask tensorflow to be quiet and not print hundred lines of logs
from delft.utilities.misc import print_parameters
Expand Down
9 changes: 3 additions & 6 deletions delft/utilities/Transformer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from typing import Union, Iterable

os.environ["KERAS_BACKEND"] = "tensorflow"

from transformers import AutoTokenizer, TFAutoModel, AutoConfig, BertTokenizer, TFBertModel

TRANSFORMER_CONFIG_FILE_NAME = 'transformer-config.json'
Expand Down Expand Up @@ -126,35 +128,30 @@ def init_preprocessor(self, max_sequence_length: int,
do_lower_case = False

if do_lower_case is not None:
if self.auth_token != None:
if self.auth_token is not None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case)
else:
if self.auth_token != None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)

elif self.loading_method == LOADING_METHOD_LOCAL_MODEL_DIR:
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir_path,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)
elif self.loading_method == LOADING_METHOD_PLAIN_MODEL:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ maintainers = [
{ name = "Patrice Lopez", email = "[email protected]" },
{ name = "Luca Foppiano", email = "[email protected]" }
]
requires-python = ">=3.7"
requires-python = ">=3.10"

dynamic = ['version', "dependencies"]

Expand Down
18 changes: 18 additions & 0 deletions requirements.macos.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
numpy==1.23.5
regex==2021.11.10
scikit-learn==1.1
tqdm==4.62.3
tensorflow==2.17.1
tf_keras==2.17.0
h5py==3.10.0
unidecode==1.3.2
pydot==1.4.0
lmdb==1.2.1
truecase
requests>=2.20
pandas==1.3.5
transformers==4.46.1
pytest
#tensorflow-addons==0.19.0
tfa-nightly
accelerate>=0.20.3
21 changes: 13 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
numpy==1.22.3
regex==2021.11.10
scikit-learn==1.0.1
numpy==1.26.4
regex
scikit-learn==1.6.1
tqdm==4.62.3
tensorflow==2.9.3
h5py==3.6.0
#tensorflow==2.16.1
tensorflow[and-cuda]==2.17.1
tf_keras==2.17.0
h5py==3.11.0
unidecode==1.3.2
pydot==1.4.0
lmdb==1.2.1
lmdb
truecase
requests>=2.20
pandas==1.3.5
transformers==4.33.2
torch==1.10.1
#transformers==4.40.0
torch
pytest
tensorflow-addons==0.19.0
#tensorflow-addons==0.19.0
tfa-nightly==0.23.0.dev20240415222534
blingfire==0.1.8
accelerate>=0.20.3
Pillow
20 changes: 20 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from setuptools import setup, find_packages

setup(
name="delft",
version="0.3.4",
author="Patrice Lopez",
author_email="[email protected]",
description="a Deep Learning Framework for Text",
long_description=open("Readme.md", encoding='utf-8').read(),
long_description_content_type="text/markdown",
url="https://github.com/kermitt2/delft",
packages=find_packages(exclude=['test', '*.test', '*.test.*']),
include_package_data=True,
python_requires='>=3.10',
classifiers=[
"Programming Language :: Python :: 3.10",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
],
)
Loading