Skip to content

Commit

Permalink
Further removed torchtext
Browse files Browse the repository at this point in the history
  • Loading branch information
m.habedank committed Nov 25, 2024
1 parent 01533ef commit b2a1454
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 148 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest_slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
python --version
pip --version
python -m pip install -U pip
pip install torch==2.1.0 torchtext torchvision torchaudio
pip install torch==2.1.0 torchvision torchaudio
pip install ray==2.3.1
pip install '.[test]'
Expand Down
2 changes: 1 addition & 1 deletion docker/ludwig-ray-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ RUN pip install -U pip

WORKDIR /ludwig

RUN pip install --no-cache-dir torch==2.1.0 torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
RUN pip install --no-cache-dir torch==2.1.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu118

COPY . .
RUN pip install --no-cache-dir '.[full]' --extra-index-url https://download.pytorch.org/whl/cu118
2 changes: 1 addition & 1 deletion docker/ludwig-ray/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ RUN pip install -U pip

WORKDIR /ludwig

RUN pip install --no-cache-dir torch==2.1.0 torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir torch==2.1.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu

COPY . .
RUN pip install --no-cache-dir '.[full]' --extra-index-url https://download.pytorch.org/whl/cpu
2 changes: 1 addition & 1 deletion docker/ludwig/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ RUN pip install -U pip

WORKDIR /ludwig

RUN pip install --no-cache-dir torch==2.0.0 torchtext torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir torch==2.0.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu

COPY . .
RUN pip install --no-cache-dir '.[full]'
Expand Down
2 changes: 1 addition & 1 deletion ludwig/decoders/llm_decoders.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# flake8: noqa: E501
import logging
import re
from typing import Any, Dict, List, Union
Expand Down Expand Up @@ -91,7 +92,6 @@ def __init__(
# Transformer Tokenizers
self.tokenizer_vocab_size = self.tokenizer.tokenizer.vocab_size
else:
# TorchText Tokenizers
self.tokenizer_vocab_size = len(self.tokenizer.vocab)

# Maximum number of new tokens that will be generated
Expand Down
138 changes: 35 additions & 103 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ keywords = [
"processing",
"vision",
]

dependencies = [
"absl-py",
"bitsandbytes<0.41.0",
Expand Down Expand Up @@ -69,7 +70,6 @@ dependencies = [
"torchaudio==2.4.1",
"torchinfo",
"torchmetrics>=0.11.0",
#"torchtext==0.17.2",
"torchvision==0.19.1",
"tqdm",
"transformers>=4.42.3",
Expand All @@ -78,48 +78,42 @@ dependencies = [
"xlsxwriter>=1.4.3",
"xlwt",
"tifffile==2024.9.20",
"onnx",
]

# Optional Dependencies
[project.optional-dependencies]
full = [
"accelerate",
"awscli",
"captum",
"cartonml-nightly",
"dask[dataframe]<2023.4.0",
"deepspeed!=0.11.0,<0.13.0",
"faiss-cpu",
"fastapi",
"getdaft[ray]==0.1.20",
"GPUtil",
"hiplot",
"httpx",
"hummingbird-ml>=0.4.8",
"hyperopt",
"lightgbm",
"lightgbm-ray",
"loralib",
"matplotlib>3.4,<3.9.0; python_version > '3.6'",
"matplotlib>=3.0,<3.4; python_version <= '3.6'",
"neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'",
"peft>=0.10.0",
"ptitprince",
"pyarrow",
"python-multipart",
"ray[default,data,serve,tune]==2.3.1",
"ray[default,tune]>=2.0.0",
"s3fs",
"seaborn>=0.7,<0.12",
"sentence-transformers",
"tblib",
"tensorboardX<2.3",
"uvicorn",
]

dev = ["flake8", "flake8-pyproject", "pre-commit", "setuptools"]
test = [
# Core testing
"pytest",
"pytest-timeout",
"pytest-cov",
"tifffile",
"wget",
"six>=1.13.0",

test = ["pytest", "pytest-timeout", "wget", "six>=1.13.0", "cloudpickle"]
# Logging and experiment tracking
"aim",
"wandb<0.12.11",
"comet_ml",
"mlflow",
"sqlalchemy<2", # Pinned for aimstack compatibility

# Ray Tune Search Algorithms
"hpbandster", # BOHB algorithm
"ConfigSpace==0.7.1",
"ax-platform", # AX algorithm
"bayesian-optimization", # Bayesian optimization
"flaml[blendsearch]", # CFO and blendsearch
"HEBO", # HEBO algorithm
"nevergrad", # Nevergrad algorithm
"optuna", # Optuna algorithm
"scikit-optimize", # SKopt algorithm
"zoopt", # ZOOpt algorithm

# Storage
"s3fs>=2022.8.2",
]
benchmarking = ["s3fs"]
distributed = [
"awscli",
Expand Down Expand Up @@ -153,6 +147,7 @@ serve = [
"neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'",
"python-multipart",
"uvicorn",
"starlette",
]
tree = ["hummingbird-ml>=0.4.8", "lightgbm", "lightgbm-ray"]
viz = [
Expand All @@ -171,69 +166,6 @@ Website = "https://ludwig.ai/latest/"
[project.scripts]
ludwig = "ludwig.cli:main"

[tool.hatch.envs.hatch-test]
dependencies = [
"gpy >=1.10.0",
"accelerate",
"aim",
"awscli",
"ax-platform",
"bayesian-optimization",
"captum",
"cartonml-nightly",
"comet_ml",
"ConfigSpace==0.7.1",
"dask[dataframe]<2023.4.0",
"deepspeed!=0.11.0,<0.13.0",
"faiss-cpu",
"fastapi",
"flaml[blendsearch]",
"getdaft[ray]==0.1.20",
"GPUtil",
"HEBO",
"hiplot",
"hpbandster",
"httpx",
"hummingbird-ml>=0.4.8",
"hyperopt",
"lightgbm",
"lightgbm-ray",
"loralib",
"matplotlib>3.4,<3.9.0; python_version > '3.6'",
"matplotlib>=3.0,<3.4; python_version <= '3.6'",
"mlflow",
"neuropod==0.3.0rc6 ; platform_system != \"Windows\" and python_version < '3.9'",
"nevergrad",
"optuna",
"peft>=0.10.0",
"ptitprince",
"pyarrow",
"pytest",
"pytest-timeout",
"pytest-cov",
"python-multipart",
#"ray[default,data,serve,tune]==2.3.1",
#"ray[default,tune]>=2.0.0",
"s3fs",
"s3fs>=2022.8.2",
"scikit-optimize",
"seaborn>=0.7,<0.12",
"sentence-transformers",
"six>=1.13.0",
"sqlalchemy<2",
"tblib",
"tensorboardX<2.3",
"uvicorn",
"wandb",
#"wandb<0.12.11",
"wget",
"zoopt",
]

#[tool.hatch.envs.hatch-test]
#setup = "pip install -e .[test]"
#run = "pytest {args:test}"

[tool.hatch.version]
path = "ludwig/__about__.py"

Expand Down Expand Up @@ -270,11 +202,11 @@ dependencies = ["flake8", "flake8-pyproject"]
style = "flake8 ."

[tool.hatch.envs.default]
python = "3.12"
python = "3.11"
dependencies = ["setuptools>=65.0"]

[tool.hatch.envs.dev]
python = "3.12"
python = "3.11"
dependencies = [".[dev]"]


Expand Down
29 changes: 2 additions & 27 deletions tests/integration_tests/test_torchscript.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# flake8: noqa: E501
# Copyright (c) 2023 Predibase, Inc., 2019 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -21,7 +22,6 @@
import pandas as pd
import pytest
import torch
import torchtext

from ludwig.api import LudwigModel
from ludwig.backend import RAY
Expand Down Expand Up @@ -408,32 +408,7 @@ def test_torchscript_e2e_text(tmpdir, csv_filename):
validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)


@pytest.mark.skipif(
torch.torch_version.TorchVersion(torchtext.__version__) < (0, 14, 0),
reason="requires torchtext 0.14.0 or higher",
)
@pytest.mark.integration_tests_e
def test_torchscript_e2e_text_hf_tokenizer(tmpdir, csv_filename):
data_csv_path = os.path.join(tmpdir, csv_filename)
input_features = [text_feature(encoder={"vocab_size": 3, "type": "bert"})]
output_features = [
category_feature(),
]
backend = LocalTestBackend()
config = {
"input_features": input_features,
"output_features": output_features,
TRAINER: {"epochs": 2, BATCH_SIZE: 128, EVAL_BATCH_SIZE: 128},
}
training_data_csv_path = generate_data(input_features, output_features, data_csv_path)

validate_torchscript_outputs(tmpdir, config, backend, training_data_csv_path)


@pytest.mark.skipif(
torch.torch_version.TorchVersion(torchtext.__version__) < (0, 14, 0),
reason="requires torchtext 0.14.0 or higher",
)
@pytest.mark.skip()
@pytest.mark.integration_tests_e
def test_torchscript_e2e_text_hf_tokenizer_truncated_sequence(tmpdir, csv_filename):
data_csv_path = os.path.join(tmpdir, csv_filename)
Expand Down
18 changes: 5 additions & 13 deletions tests/ludwig/features/test_sequence_features.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# flake8: noqa: E501
from typing import List, Tuple

import numpy as np
import pytest
import torch
import torchtext

from ludwig.constants import ENCODER_OUTPUT, LAST_HIDDEN, LOGITS, SEQUENCE, TEXT, TYPE
from ludwig.features.sequence_feature import _SequencePreprocessing, SequenceInputFeature, SequenceOutputFeature
Expand Down Expand Up @@ -192,9 +192,7 @@ def test_text_preproc_module_space_punct_tokenizer():
)


@pytest.mark.skipif(
torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0), reason="requires torchtext 0.12.0 or higher"
)
@pytest.mark.skip()
def test_sequence_preproc_module_sentencepiece_tokenizer():
metadata = {
"preprocessing": {
Expand Down Expand Up @@ -227,9 +225,7 @@ def test_sequence_preproc_module_sentencepiece_tokenizer():
)


@pytest.mark.skipif(
torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0), reason="requires torchtext 0.12.0 or higher"
)
@pytest.mark.skip()
def test_sequence_preproc_module_clip_tokenizer():
metadata = {
"preprocessing": {
Expand Down Expand Up @@ -260,9 +256,7 @@ def test_sequence_preproc_module_clip_tokenizer():
)


@pytest.mark.skipif(
torch.torch_version.TorchVersion(torchtext.__version__) < (0, 12, 0), reason="requires torchtext 0.12.0 or higher"
)
@pytest.mark.skip()
def test_sequence_preproc_module_gpt2bpe_tokenizer():
metadata = {
"preprocessing": {
Expand Down Expand Up @@ -296,9 +290,7 @@ def test_sequence_preproc_module_gpt2bpe_tokenizer():
)


@pytest.mark.skipif(
torch.torch_version.TorchVersion(torchtext.__version__) < (0, 13, 0), reason="requires torchtext 0.13.0 or higher"
)
@pytest.mark.skip()
def test_sequence_preproc_module_bert_tokenizer():
metadata = {
"preprocessing": {
Expand Down

0 comments on commit b2a1454

Please sign in to comment.