Skip to content

Commit

Permalink
Merge pull request #24 from aws-neuron/release_cut_2.21
Browse files Browse the repository at this point in the history
Release 2.21
  • Loading branch information
Arjunbala authored and awsjoshir committed Dec 22, 2024
1 parent 083d3ea commit 3aa65c6
Show file tree
Hide file tree
Showing 192 changed files with 13,756 additions and 4,644 deletions.
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Python .gitignore template

*.deb
*.pt

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down Expand Up @@ -78,6 +81,7 @@ target/

# Jupyter Notebook
.ipynb_checkpoints
*.ipynb

# IPython
profile_default/
Expand Down Expand Up @@ -140,3 +144,8 @@ src/neuronx_distributed.egg-info/
*.whl
**/.DS_Store
__pycache__
.vscode
/exp*
/tmp*
tmp.*
pyproject.toml
9 changes: 8 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ repos:
- id: clang-format
args: [--style=file, -i]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.0
rev: v0.6.2
hooks:
- id: ruff
name: ruff
Expand All @@ -23,3 +23,10 @@ repos:
types: [python]
language: system
exclude: cases_update
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.11.2
hooks:
- id: mypy
name: mypy
language: python
files: src/.*\.py
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ To build from source, run the following command:
```
bash ./build.sh
```

It should place the wheel at `build/`

## API Reference Guide
Expand Down
4 changes: 2 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ fi
# Run static code analysis
python3.8 -m pip install mypy
# Install type bindings
python3.8 -m pip install types-requests boto3-stubs[s3]
python3.8 -m pip install types-requests boto3-stubs[s3] types-PyYAML
# removing cache fails in ToD
python3.8 -m mypy --no-incremental || true
python3.8 -m mypy --no-incremental --cache-dir=/dev/null
# exit when asked to run `mypy` only
if [[ "$1" == "mypy" ]]
then
Expand Down
30 changes: 11 additions & 19 deletions examples/inference/dbrx/dbrx_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,54 +5,46 @@
NeuronDbrxModel,
)
from runner import InferenceRunner
from transformers import AutoTokenizer
from transformers import AutoTokenizer, DbrxConfig

from neuronx_distributed.parallel_layers.checkpointing import _invoke_preshard_hook


class DbrxRunner(InferenceRunner):
def load_hf_model(self):
config = NeuronDbrxConfig.from_pretrained(self.model_path)
return NeuronDbrxForCausalLM.load_hf_model(self.model_path, config)
hf_config = DbrxConfig.from_pretrained(self.model_path)
return NeuronDbrxForCausalLM.load_hf_model(self.model_path, hf_config)

def load_neuron_model_on_cpu(self, max_prompt_length, sequence_length, batch_size, **kwargs):
# On CPU we can only run tensor parallelism with degree 1
config = self.get_config_for_nxd(
hf_config = self.get_hf_config(sequence_length=sequence_length, **kwargs)
neuron_config = self.get_config_for_nxd(
hf_config,
batch_size,
1,
max_prompt_length=max_prompt_length,
sequence_length=sequence_length,
enable_bucketing=False,
**kwargs)
config.torch_dtype = torch.float32
hf_config.torch_dtype = torch.float32

self.init_ditributed_env()
neuron_model = NeuronDbrxModel(config)
neuron_model = NeuronDbrxModel(neuron_config)

state_dict = NeuronDbrxForCausalLM.get_state_dict(self.model_path, config)
state_dict = NeuronDbrxForCausalLM.get_state_dict(self.model_path, neuron_config)

_invoke_preshard_hook(neuron_model, state_dict)

neuron_model.load_state_dict(state_dict, strict=False)

if config.torch_dtype == torch.bfloat16:
if hf_config.torch_dtype == torch.bfloat16:
neuron_model.bfloat16()

model = NeuronDbrxForCausalLM(None, config)
model = NeuronDbrxForCausalLM(None, neuron_config)
model.context_encoding_model.model = neuron_model
model.token_generation_model.model = neuron_model
return model

def load_neuron_model(self, traced_model_path):
config = NeuronDbrxConfig.from_pretrained(traced_model_path)
model = NeuronDbrxForCausalLM.from_pretrained("", config)

model.load(traced_model_path)
if config.torch_dtype == torch.bfloat16:
model.bfloat16()

return model

def load_tokenizer(self, padding_side=None):
tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_path)
tokenizer.pad_token = tokenizer.unk_token
Expand Down
Loading

0 comments on commit 3aa65c6

Please sign in to comment.