diff --git a/docs/source/examples/configs.rst b/docs/source/examples/configs.rst index ef13092d63..5c00459c05 100644 --- a/docs/source/examples/configs.rst +++ b/docs/source/examples/configs.rst @@ -102,7 +102,7 @@ keyword arguments not specified in the config if we'd like: # Tokenizer is needed for the dataset, configure it first tokenizer: - _component_: torchtune.models.llama2_tokenizer + _component_: torchtune.models.llama2.llama2_tokenizer path: /tmp/tokenizer.model dataset: diff --git a/docs/source/examples/finetune_llm.rst b/docs/source/examples/finetune_llm.rst index 440ed9d429..c7b9e09eb3 100644 --- a/docs/source/examples/finetune_llm.rst +++ b/docs/source/examples/finetune_llm.rst @@ -30,7 +30,7 @@ An example config for training the Llama 7B model using the Alpaca dataset looks # Tokenizer tokenizer: - _component_: torchtune.models.llama2_tokenizer + _component_: torchtune.models.llama2.llama2_tokenizer path: /tmp/tokenizer.model # Dataset @@ -40,7 +40,7 @@ An example config for training the Llama 7B model using the Alpaca dataset looks # Model Arguments model: - _component_: torchtune.models.llama2_7b + _component_: torchtune.models.llama2.llama2_7b model_checkpoint: /tmp/llama2-7b # Fine-tuning arguments diff --git a/docs/source/examples/first_finetune_tutorial.rst b/docs/source/examples/first_finetune_tutorial.rst index 5c0c4dd547..87b2cc302b 100644 --- a/docs/source/examples/first_finetune_tutorial.rst +++ b/docs/source/examples/first_finetune_tutorial.rst @@ -97,7 +97,7 @@ lowering the epochs to 1 so you can see results sooner, and updating the learnin # Tokenizer tokenizer: - _component_: torchtune.models.llama2_tokenizer + _component_: torchtune.models.llama2.llama2_tokenizer path: /tmp/tokenizer.model # Dataset @@ -108,7 +108,7 @@ lowering the epochs to 1 so you can see results sooner, and updating the learnin # Model Arguments model: - _component_: torchtune.models.llama2_7b + _component_: torchtune.models.llama2.llama2_7b model_checkpoint: /tmp/llama2/native_pytorch_model.pt # Fine-tuning arguments diff --git a/recipes/configs/alpaca_llama2_full_finetune.yaml b/recipes/configs/alpaca_llama2_full_finetune.yaml index a2318ac51d..5598ed5021 100644 --- a/recipes/configs/alpaca_llama2_full_finetune.yaml +++ b/recipes/configs/alpaca_llama2_full_finetune.yaml @@ -5,7 +5,7 @@ # Tokenizer tokenizer: - _component_: torchtune.models.llama2_tokenizer + _component_: torchtune.models.llama2.llama2_tokenizer path: /tmp/llama2/tokenizer.model # Dataset @@ -17,7 +17,7 @@ shuffle: True # Model Arguments model: - _component_: torchtune.models.llama2_7b + _component_: torchtune.models.llama2.llama2_7b model_checkpoint: /tmp/llama2_native # Fine-tuning arguments diff --git a/recipes/configs/alpaca_llama2_generate.yaml b/recipes/configs/alpaca_llama2_generate.yaml index 2dacb09125..2c4a3f9781 100644 --- a/recipes/configs/alpaca_llama2_generate.yaml +++ b/recipes/configs/alpaca_llama2_generate.yaml @@ -5,12 +5,12 @@ # Model arguments model: - _component_: torchtune.models.llama2_7b + _component_: torchtune.models.llama2.llama2_7b model_checkpoint: /tmp/llama2_native # Tokenizer arguments tokenizer: - _component_: torchtune.models.llama2_tokenizer + _component_: torchtune.models.llama2.llama2_tokenizer path: /tmp/llama2/tokenizer.model # Generation arguments diff --git a/recipes/configs/alpaca_llama2_lora_finetune.yaml b/recipes/configs/alpaca_llama2_lora_finetune.yaml index 8b0560fbde..02f41f8aa2 100644 --- a/recipes/configs/alpaca_llama2_lora_finetune.yaml +++ b/recipes/configs/alpaca_llama2_lora_finetune.yaml @@ -5,7 +5,7 @@ # Model Arguments model: - _component_: torchtune.models.lora_llama2_7b + _component_: torchtune.models.llama2.lora_llama2_7b lora_attn_modules: ['q_proj', 'v_proj'] lora_rank: 8 lora_alpha: 16 @@ -15,7 +15,7 @@ lora_checkpoint: null # Tokenizer tokenizer: - _component_: torchtune.models.llama2_tokenizer + _component_: torchtune.models.llama2.llama2_tokenizer path: /tmp/llama2/tokenizer.model # Dataset and Sampler diff --git a/recipes/tests/test_alpaca_generate.py b/recipes/tests/test_alpaca_generate.py index a2c0a2a65d..08a7d5fdd8 100644 --- a/recipes/tests/test_alpaca_generate.py +++ b/recipes/tests/test_alpaca_generate.py @@ -36,19 +36,19 @@ class TestAlpacaGenerateRecipe: def _fetch_ckpt_model_path(self, ckpt) -> str: if ckpt == "small_test_ckpt": return "/tmp/test-artifacts/small-ckpt-01242024" - if ckpt == "llama2_7b": + if ckpt == "llama2.llama2_7b": return "/tmp/test-artifacts/llama2-7b-01242024" raise ValueError(f"Unknown ckpt {ckpt}") def test_alpaca_generate(self, capsys, pytestconfig): large_scale = pytestconfig.getoption("--large-scale") - ckpt = "llama2_7b" if large_scale else "small_test_ckpt" + ckpt = "llama2.llama2_7b" if large_scale else "small_test_ckpt" kwargs_values = { "model": {"_component_": f"torchtune.models.{ckpt}"}, "model_checkpoint": self._fetch_ckpt_model_path(ckpt), "tokenizer": { - "_component_": "torchtune.models.llama2_tokenizer", + "_component_": "torchtune.models.llama2.llama2_tokenizer", "path": "/tmp/test-artifacts/tokenizer.model", }, "instruction": "Answer the question.", diff --git a/recipes/tests/test_full_finetune.py b/recipes/tests/test_full_finetune.py index d08ed24d79..7f4c849a85 100644 --- a/recipes/tests/test_full_finetune.py +++ b/recipes/tests/test_full_finetune.py @@ -51,13 +51,13 @@ def _fetch_expected_loss_values(self, ckpt) -> Dict[str, float]: } if ckpt == "small_test_ckpt": return small_test_ckpt_loss_values - if ckpt == "llama2_7b": + if ckpt == "llama2.llama2_7b": return llama2_7b_ckpt_loss_values raise ValueError(f"Unknown ckpt {ckpt}") def test_loss(self, capsys, pytestconfig): large_scale = pytestconfig.getoption("--large-scale") - ckpt = "llama2_7b" if large_scale else "small_test_ckpt" + ckpt = "llama2.llama2_7b" if large_scale else "small_test_ckpt" expected_loss_values = self._fetch_expected_loss_values(ckpt) kwargs_values = default_recipe_kwargs(ckpt) @@ -93,7 +93,7 @@ def test_training_state_on_resume(self): "model": {"_component_": f"torchtune.models.{model_ckpt}"}, "model_checkpoint": fetch_ckpt_model_path(model_ckpt), "tokenizer": { - "_component_": "torchtune.models.llama2_tokenizer", + "_component_": "torchtune.models.llama2.llama2_tokenizer", "path": "/tmp/test-artifacts/tokenizer.model", }, "epochs": 4, @@ -127,7 +127,7 @@ def test_training_state_on_resume(self): "model": {"_component_": f"torchtune.models.{model_ckpt}"}, "model_checkpoint": os.path.join(tmpdirname, "model_2.ckpt"), "tokenizer": { - "_component_": "torchtune.models.llama2_tokenizer", + "_component_": "torchtune.models.llama2.llama2_tokenizer", "path": "/tmp/test-artifacts/tokenizer.model", }, "epochs": 4, @@ -228,7 +228,7 @@ def test_gradient_accumulation( "model": {"_component_": f"torchtune.models.{model_ckpt}"}, "model_checkpoint": None, "tokenizer": { - "_component_": "torchtune.models.llama2_tokenizer", + "_component_": "torchtune.models.llama2.llama2_tokenizer", "path": "/tmp/test-artifacts/tokenizer.model", }, "batch_size": full_batch_size, diff --git a/recipes/tests/utils.py b/recipes/tests/utils.py index 8a4d742323..db6625841a 100644 --- a/recipes/tests/utils.py +++ b/recipes/tests/utils.py @@ -89,7 +89,7 @@ def default_recipe_kwargs(ckpt): "model": {"_component_": f"torchtune.models.{ckpt}"}, "model_checkpoint": fetch_ckpt_model_path(ckpt), "tokenizer": { - "_component_": "torchtune.models.llama2_tokenizer", + "_component_": "torchtune.models.llama2.llama2_tokenizer", "path": "/tmp/test-artifacts/tokenizer.model", }, "batch_size": 8, diff --git a/tests/torchtune/config/test_utils.py b/tests/torchtune/config/test_utils.py index c4e3fc044a..6a27bfdfd4 100644 --- a/tests/torchtune/config/test_utils.py +++ b/tests/torchtune/config/test_utils.py @@ -13,7 +13,7 @@ def test_get_component_from_path(self): good_paths = [ "torchtune", # Test single module without dot "torchtune.models", # Test dotpath for a module - "torchtune.models.llama2_7b", # Test dotpath for an object + "torchtune.models.llama2.llama2_7b", # Test dotpath for an object ] for path in good_paths: _ = _get_component_from_path(path) diff --git a/torchtune/models/__init__.py b/torchtune/models/__init__.py new file mode 100644 index 0000000000..a4fcc101f4 --- /dev/null +++ b/torchtune/models/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from torchtune.models import llama2 + +__all__ = [ + "llama2", +]