huggingface · ydshieh · Oct 10, 2025 · Oct 10, 2025 · ydshieh · Oct 10, 2025
diff --git a/examples/legacy/run_language_modeling.py b/examples/legacy/run_language_modeling.py
@@ -39,10 +39,7 @@
     DataCollatorForPermutationLanguageModeling,
     DataCollatorForWholeWordMask,
     HfArgumentParser,
-    LineByLineTextDataset,
-    LineByLineWithRefDataset,
     PreTrainedTokenizer,
-    TextDataset,
     Trainer,
     TrainingArguments,
     set_seed,

diff --git a/examples/pytorch/language-modeling/README.md b/examples/pytorch/language-modeling/README.md
@@ -24,8 +24,6 @@ objectives in our [model summary](https://huggingface.co/transformers/model_summ
 
 There are two sets of scripts provided. The first set leverages the Trainer API. The second set with `no_trainer` in the suffix uses a custom training loop and leverages the 🤗 Accelerate library . Both sets use the 🤗 Datasets library. You can easily customize them to your needs if you need extra processing on your datasets.
 
-**Note:** The old script `run_language_modeling.py` is still available [here](https://github.com/huggingface/transformers/blob/main/examples/legacy/run_language_modeling.py).
-
 The following examples, will run on datasets hosted on our [hub](https://huggingface.co/datasets) or with your own
 text files for training and validation. We give examples of both below.
 

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -375,13 +375,8 @@
     _import_structure["data.datasets"] = [
         "GlueDataset",
         "GlueDataTrainingArguments",
-        "LineByLineTextDataset",
-        "LineByLineWithRefDataset",
-        "LineByLineWithSOPTextDataset",
         "SquadDataset",
         "SquadDataTrainingArguments",
-        "TextDataset",
-        "TextDatasetForNextSentencePrediction",
     ]
     _import_structure["generation"].extend(
         [
@@ -527,13 +522,8 @@
     from .data.data_collator import default_data_collator as default_data_collator
     from .data.datasets import GlueDataset as GlueDataset
     from .data.datasets import GlueDataTrainingArguments as GlueDataTrainingArguments
-    from .data.datasets import LineByLineTextDataset as LineByLineTextDataset
-    from .data.datasets import LineByLineWithRefDataset as LineByLineWithRefDataset
-    from .data.datasets import LineByLineWithSOPTextDataset as LineByLineWithSOPTextDataset
     from .data.datasets import SquadDataset as SquadDataset
     from .data.datasets import SquadDataTrainingArguments as SquadDataTrainingArguments
-    from .data.datasets import TextDataset as TextDataset
-    from .data.datasets import TextDatasetForNextSentencePrediction as TextDatasetForNextSentencePrediction
     from .feature_extraction_sequence_utils import SequenceFeatureExtractor as SequenceFeatureExtractor
 
     # Feature Extractor

diff --git a/src/transformers/data/datasets/__init__.py b/src/transformers/data/datasets/__init__.py
@@ -13,11 +13,4 @@
 # limitations under the License.
 
 from .glue import GlueDataset, GlueDataTrainingArguments
-from .language_modeling import (
-    LineByLineTextDataset,
-    LineByLineWithRefDataset,
-    LineByLineWithSOPTextDataset,
-    TextDataset,
-    TextDatasetForNextSentencePrediction,
-)
 from .squad import SquadDataset, SquadDataTrainingArguments