Skip to content

Commit

Permalink
fix conflicts, add some setup consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Feb 28, 2025
2 parents dc6391f + 6d12f76 commit 01c9a4f
Show file tree
Hide file tree
Showing 13 changed files with 105 additions and 193,074 deletions.
51 changes: 28 additions & 23 deletions .github/workflows/ci-build-unstable.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,38 @@
name: Build

on: [push]
on: [ push ]

concurrency:
concurrency:
group: build
# cancel-in-progress: true
cancel-in-progress: true


jobs:
build:
runs-on: ubuntu-latest

strategy:
matrix:
python-version: [ '3.9', '3.10', '3.11' ]
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
- name: Cleanup more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
2 changes: 1 addition & 1 deletion LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@

END OF TERMS AND CONDITIONS

Copyright [2018-2023] the DeLFT contributors
Copyright [2018-2025] the DeLFT contributors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
7 changes: 4 additions & 3 deletions Readme.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
<img align="right" width="150" height="150" src="doc/cat-delft-small.jpg">

[![Documentation Status](https://readthedocs.org/projects/delft/badge/?version=latest)](https://readthedocs.org/projects/delft/?badge=latest)
[![Build Status](https://travis-ci.org/kermitt2/delft.svg?branch=master)](https://travis-ci.org/kermitt2/delft)
[![Build](https://github.com/kermitt2/delft/actions/workflows/ci-build-unstable.yml/badge.svg)](https://github.com/kermitt2/delft/actions/workflows/ci-build-unstable.yml)
[![PyPI version](https://badge.fury.io/py/delft.svg)](https://badge.fury.io/py/delft)
[![SWH](https://archive.softwareheritage.org/badge/origin/https://github.com/kermitt2/delft/)](https://archive.softwareheritage.org/browse/origin/https://github.com/kermitt2/delft/)
[![License](http://img.shields.io/:license-apache-blue.svg)](http://www.apache.org/licenses/LICENSE-2.0.html)
[![Downloads](https://static.pepy.tech/badge/delft)](https://pepy.tech/project/delft)


# DeLFT
Expand Down Expand Up @@ -63,7 +64,7 @@ cd delft
It is advised to setup first a virtual environment to avoid falling into one of these gloomy python dependency marshlands:

```sh
virtualenv --system-site-packages -p python3.8 env
virtualenv --system-site-packages -p python3.10 env
source env/bin/activate
```

Expand Down Expand Up @@ -98,7 +99,7 @@ If you want to this work, please refer to the present GitHub project, together w
title = {DeLFT},
howpublished = {\url{https://github.com/kermitt2/delft}},
publisher = {GitHub},
year = {2018--2024},
year = {2018--2025},
archivePrefix = {swh},
eprint = {1:dir:54eb292e1c0af764e27dd179596f64679e44d06e}
}
Expand Down
3 changes: 3 additions & 0 deletions classifiers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Programming Language :: Python :: 3.10
License :: OSI Approved :: Apache Software License
Operating System :: OS Independent
1 change: 1 addition & 0 deletions data/sequenceLabelling/grobid/quantities/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The training data for the Deep Learning models used in Grobid-quantities can be found on the Grobid-quantities repository: https://github.com/lfoppiano/grobid-quantities.
193,035 changes: 0 additions & 193,035 deletions data/sequenceLabelling/grobid/quantities/quantities.all.train

This file was deleted.

10 changes: 8 additions & 2 deletions delft/sequenceLabelling/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,14 @@ def convert_single_text(self, text_tokens, chars_tokens, features_tokens, label_
chars_tokens.append(self.empty_char_vector)

# sub-tokenization
encoded_result = self.tokenizer(text_tokens, add_special_tokens=True, is_split_into_words=True,
max_length=max_seq_length, truncation=True, return_offsets_mapping=True)
encoded_result = self.tokenizer(
text_tokens,
add_special_tokens=True,
is_split_into_words=True,
max_length=max_seq_length,
truncation=True,
return_offsets_mapping=True
)

input_ids = encoded_result.input_ids
offsets = encoded_result.offset_mapping
Expand Down
4 changes: 2 additions & 2 deletions delft/sequenceLabelling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ def train_model(self, local_model, x_train, y_train, f_train=None,

# multiple workers should work with transformer layers, but not with ELMo due to GPU memory limit (with GTX 1080Ti 11GB)
if self.model_config.transformer_name is not None or (self.embeddings and self.embeddings.use_ELMo):
# worker at 0 means the training will be executed in the main thread
nb_workers = 0
# worker at 1 means the training will be executed in the main thread
nb_workers = 1
multiprocessing = False

local_model.fit(training_generator,
Expand Down
7 changes: 1 addition & 6 deletions delft/utilities/Transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,35 +128,30 @@ def init_preprocessor(self, max_sequence_length: int,
do_lower_case = False

if do_lower_case is not None:
if self.auth_token != None:
if self.auth_token is not None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
do_lower_case=do_lower_case)
else:
if self.auth_token != None:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space,
use_auth_token=self.auth_token)
else:
self.tokenizer = AutoTokenizer.from_pretrained(self.name,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)

elif self.loading_method == LOADING_METHOD_LOCAL_MODEL_DIR:
self.tokenizer = AutoTokenizer.from_pretrained(self.local_dir_path,
add_special_tokens=add_special_tokens,
max_length=max_sequence_length,
add_prefix_space=add_prefix_space)
elif self.loading_method == LOADING_METHOD_PLAIN_MODEL:
Expand Down
36 changes: 36 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"

[tool.bumpversion]
current_version = "0.3.4"
commit = "true"
tag = "true"
tag_name = "v{new_version}"

[project]
name = "delft"
description = "a Deep Learning Framework for Text"
readme = "Readme.md"
authors = [
{ name = "Patrice Lopez", email = "[email protected]" }
]
maintainers = [
{ name = "Patrice Lopez", email = "[email protected]" },
{ name = "Luca Foppiano", email = "[email protected]" }
]
requires-python = ">=3.10"

dynamic = ['version', "dependencies"]

[tool.setuptools.dynamic]
dependencies = {file = ["requirements.txt"]}
classifiers = {file = ["classifiers.txt"]}

[project.urls]
Homepage = "https://github.com/kermitt2/delft"
Repository = "https://github.com/kermitt2/delft"
Changelog = "https://github.com/kermitt2/delft"

[tool.setuptools.packages.find]
exclude = ["test", "*.test", "*.test.*"]
18 changes: 18 additions & 0 deletions requirements.macos.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
numpy==1.23.5
regex==2021.11.10
scikit-learn==1.1
tqdm==4.62.3
tensorflow==2.17.1
tf_keras==2.17.0
h5py==3.10.0
unidecode==1.3.2
pydot==1.4.0
lmdb==1.2.1
truecase
requests>=2.20
pandas==1.3.5
transformers==4.46.1
pytest
#tensorflow-addons==0.19.0
tfa-nightly
accelerate>=0.20.3
1 change: 1 addition & 0 deletions requirements.txt.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bump-my-version
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
url="https://github.com/kermitt2/delft",
packages=find_packages(exclude=['test', '*.test', '*.test.*']),
include_package_data=True,
python_requires='>=3.7',
python_requires='>=3.10',
classifiers=[
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.10",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
],
Expand Down

0 comments on commit 01c9a4f

Please sign in to comment.