From 870415262cf12f7144d93359317acb24629e6dfd Mon Sep 17 00:00:00 2001 From: HDCharles Date: Tue, 19 Mar 2024 19:53:29 -0700 Subject: [PATCH 1/3] Summary: redoing https://github.com/pytorch-labs/gpt-fast/commit/5bf70c114088a5133299609694a8c17b37de69c4 in a way that doesn't get reverted Test Plan: sh run.sh Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] --- GPTQ.py | 4 ++-- model.py | 6 ++---- quantize.py | 32 +++++++++++++++----------------- run.sh | 17 +++++++++++++++++ 4 files changed, 36 insertions(+), 23 deletions(-) create mode 100644 run.sh diff --git a/GPTQ.py b/GPTQ.py index 806ffad..e1279bd 100644 --- a/GPTQ.py +++ b/GPTQ.py @@ -150,9 +150,9 @@ def __init__( } # trace model for one input - one_input = [multi.values[0] for multi in inputs] + one_input = [multi.values[0].cpu() for multi in inputs] exported_model = torch._dynamo.export( - model, aten_graph=True, pre_dispatch=True, tracing_mode="fake" + model.cpu(), aten_graph=True, pre_dispatch=True, tracing_mode="fake" )(*one_input) super().__init__(exported_model.graph_module) self.new_state_dict = model.state_dict() diff --git a/model.py b/model.py index dbf24e5..e70e87a 100644 --- a/model.py +++ b/model.py @@ -78,10 +78,8 @@ def update(self, input_pos, k_val, v_val): # input_pos: [S], k_val: [B, H, S, D] assert input_pos.shape[0] == k_val.shape[2] - k_out = self.k_cache - v_out = self.v_cache - k_out[:, :, input_pos] = k_val - v_out[:, :, input_pos] = v_val + k_out = torch.ops.aten.index_put_(self.k_cache, [None, None, input_pos], k_val) + v_out = torch.ops.aten.index_put_(self.v_cache, [None, None, input_pos], v_val) return k_out, v_out diff --git a/quantize.py b/quantize.py index db47775..a9b3f79 100644 --- a/quantize.py +++ b/quantize.py @@ -365,6 +365,9 @@ def prepare_int4_weight_and_scales_and_zeros(weight_bf16, groupsize, inner_k_til weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(weight_int32, inner_k_tiles) return weight_int4pack, scales_and_zeros +def _calc_padded_size(k, groupsize=1, innner_k_tiles=1): + from model import find_multiple + return find_multiple(k, 1024) def linear_forward_int4(x, weight_int4pack, scales_and_zeros, out_features, groupsize): origin_x_size = x.size() @@ -378,29 +381,24 @@ def linear_forward_int4(x, weight_int4pack, scales_and_zeros, out_features, grou def _check_linear_int4_k(k, groupsize = 1, inner_k_tiles = 1): return k % groupsize == 0 and k % (inner_k_tiles * 16) == 0 -def replace_linear_int4(module, groupsize, inner_k_tiles, padding, use_cuda): +def replace_linear_int4(module, groupsize, inner_k_tiles, padding_allowed, use_cuda): for name, child in module.named_children(): if isinstance(child, nn.Linear): - if _check_linear_int4_k(child.in_features, groupsize, inner_k_tiles): + if _check_linear_int4_k(child.in_features, groupsize, inner_k_tiles) or padding_allowed: setattr(module, name, WeightOnlyInt4Linear( child.in_features, child.out_features, bias=False, - groupsize=groupsize, inner_k_tiles=inner_k_tiles, padding=False, use_cuda=use_cuda - )) - elif padding: - setattr(module, name, WeightOnlyInt4Linear( - child.in_features, child.out_features, bias=False, - groupsize=groupsize, inner_k_tiles=inner_k_tiles, padding=True, use_cuda=use_cuda + groupsize=groupsize, inner_k_tiles=inner_k_tiles, use_cuda=use_cuda )) else: - replace_linear_int4(child, groupsize, inner_k_tiles, padding, use_cuda) + replace_linear_int4(child, groupsize, inner_k_tiles, padding_allowed, use_cuda) class WeightOnlyInt4QuantHandler: - def __init__(self, mod, groupsize=128, inner_k_tiles=8, padding=True): + def __init__(self, mod, groupsize=128, inner_k_tiles=8, padding_allowed=True): self.mod = mod self.groupsize = groupsize self.inner_k_tiles = inner_k_tiles - self.padding = padding + self.padding_allowed = padding_allowed assert groupsize in [32, 64, 128, 256] assert inner_k_tiles in [2, 4, 8] @@ -417,7 +415,7 @@ def create_quantized_state_dict(self): weight = mod.weight.data if not _check_linear_int4_k(in_features, self.groupsize, self.inner_k_tiles): - if self.padding: + if self.padding_allowed: from model import find_multiple import torch.nn.functional as F print(f"warning: {fqn} is padded to satisfy in_features % 1024 == 0") @@ -436,7 +434,7 @@ def create_quantized_state_dict(self): return cur_state_dict def convert_for_runtime(self, use_cuda): - replace_linear_int4(self.mod, self.groupsize, self.inner_k_tiles, self.padding, use_cuda) + replace_linear_int4(self.mod, self.groupsize, self.inner_k_tiles, self.padding_allowed, use_cuda) return self.mod class WeightOnlyInt4GPTQQuantHandler(GPTQQuantHandler): @@ -485,11 +483,11 @@ class WeightOnlyInt4Linear(torch.nn.Module): def __init__( self, in_features: int, out_features: int, - bias=True, device=None, dtype=None, groupsize: int = 128, inner_k_tiles: int = 8, padding: bool = True, use_cuda=True, + bias=True, device=None, dtype=None, groupsize: int = 128, inner_k_tiles: int = 8, use_cuda=True, ) -> None: super().__init__() - self.padding = padding - if padding: + self.padding = _check_linear_int4_k(in_features, groupsize, inner_k_tiles) + if self.padding: from model import find_multiple self.origin_in_features = in_features in_features = find_multiple(in_features, 1024) @@ -597,7 +595,7 @@ def quantize( dir_name = checkpoint_path.parent base_name = checkpoint_path.name - new_base_name = base_name.replace('.pth', f"{label}int4-gptq.g{groupsize}.pth") + new_base_name = base_name.replace('.pth', f"{label}int4-gptq.g{groupsize}.{device}.pth") else: raise ValueError(f"Invalid quantization mode {mode} needs to be one of [int8, int4, int4-gpptq]") diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..b772fa3 --- /dev/null +++ b/run.sh @@ -0,0 +1,17 @@ +export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf + +# python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --compile # working +# echo "base" + +python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 +python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4-gptq.g32.cuda.pth --tasks wikitext --limit 5 + +# python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.pth --compile +# echo "quant good" + +# python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4 +# python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth --tasks wikitext --limit 5 + +# ENABLE_INTRA_NODE_COMM=1 torchrun --standalone --nproc_per_node=8 generate.py --compile --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth + +# python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 From 749093eacb2181e02e1b79be81d94bd3119a7385 Mon Sep 17 00:00:00 2001 From: HDCharles Date: Tue, 26 Mar 2024 13:06:22 -0700 Subject: [PATCH 2/3] Update on "Summary: redoing" https://github.com/pytorch-labs/gpt-fast/commit/5bf70c114088a5133299609694a8c17b37de69c4 in a way that doesn't get reverted Test Plan: export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4-gptq.g32.cuda.pth --tasks wikitext --limit 5 Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] --- GPTQ.py | 2 +- log.log | 477 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ run.sh | 8 +- 3 files changed, 482 insertions(+), 5 deletions(-) create mode 100644 log.log diff --git a/GPTQ.py b/GPTQ.py index e1279bd..792259e 100644 --- a/GPTQ.py +++ b/GPTQ.py @@ -150,7 +150,7 @@ def __init__( } # trace model for one input - one_input = [multi.values[0].cpu() for multi in inputs] + one_input = tuple([multi.values[0].cpu() for multi in inputs]) exported_model = torch._dynamo.export( model.cpu(), aten_graph=True, pre_dispatch=True, tracing_mode="fake" )(*one_input) diff --git a/log.log b/log.log new file mode 100644 index 0000000..1d40c56 --- /dev/null +++ b/log.log @@ -0,0 +1,477 @@ +2024-03-25:17:50:41,903 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:50:41,904 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +2024-03-25:17:50:41,904 INFO [utils.py:160] NumExpr defaulting to 8 threads. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +2024-03-25:17:50:49,270 INFO [huggingface.py:148] Using device 'cuda' +2024-03-25:17:50:54,575 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details. +2024-03-25:17:50:58,714 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details. +2024-03-25:17:50:58,714 WARNING [task.py:626] [Task: wikitext] metric word_perplexity is defined, but aggregation is not. using default aggregation=weighted_perplexity +2024-03-25:17:50:58,714 WARNING [task.py:638] [Task: wikitext] metric word_perplexity is defined, but higher_is_better is not. using default higher_is_better=False +2024-03-25:17:50:58,715 WARNING [task.py:626] [Task: wikitext] metric byte_perplexity is defined, but aggregation is not. using default aggregation=weighted_perplexity +2024-03-25:17:50:58,715 WARNING [task.py:638] [Task: wikitext] metric byte_perplexity is defined, but higher_is_better is not. using default higher_is_better=False +2024-03-25:17:50:58,715 WARNING [task.py:626] [Task: wikitext] metric bits_per_byte is defined, but aggregation is not. using default aggregation=bits_per_byte +2024-03-25:17:50:58,715 WARNING [task.py:638] [Task: wikitext] metric bits_per_byte is defined, but higher_is_better is not. using default higher_is_better=False +Repo card metadata block was not found. Setting CardData to empty. +2024-03-25:17:50:59,420 WARNING [repocard.py:107] Repo card metadata block was not found. Setting CardData to empty. +2024-03-25:17:50:59,466 INFO [task.py:363] Building contexts for task on rank 0... +2024-03-25:17:50:59,475 INFO [evaluator.py:324] Running loglikelihood_rolling requests +Loading model ... +Using int4 weight-only quantization! +Time to load model: 3.75 seconds. + 0%| | 0/5 [00:00 + main( + File "/home/cdhernandez/local/gpt-fast/eval.py", line 227, in main + model = _load_model(checkpoint_path, device, precision, False) + File "/home/cdhernandez/local/gpt-fast/generate.py", line 240, in _load_model + model.load_state_dict(checkpoint, assign=True) + File "/home/cdhernandez/local/pytorch/torch/nn/modules/module.py", line 2184, in load_state_dict + raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( +RuntimeError: Error(s) in loading state_dict for Transformer: + size mismatch for layers.0.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.0.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.1.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.1.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.2.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.2.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.3.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.3.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.4.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.4.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.5.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.5.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.6.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.6.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.7.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.7.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.8.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.8.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.9.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.9.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.10.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.10.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.11.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.11.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.12.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.12.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.13.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.13.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.14.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.14.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.15.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.15.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.16.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.16.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.17.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.17.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.18.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.18.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.19.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.19.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.20.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.20.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.21.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.21.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.22.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.22.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.23.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.23.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.24.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.24.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.25.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.25.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.26.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.26.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.27.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.27.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.28.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.28.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.29.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.29.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.30.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.30.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). + size mismatch for layers.31.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). + size mismatch for layers.31.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). +W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] +W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] ***************************************** +W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] ***************************************** +2024-03-25:17:52:08,088 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,088 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +2024-03-25:17:52:08,258 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,258 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +2024-03-25:17:52:08,311 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,311 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +2024-03-25:17:52:08,358 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,358 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +2024-03-25:17:52:08,427 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,427 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +2024-03-25:17:52:08,429 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,429 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +2024-03-25:17:52:08,433 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,433 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +2024-03-25:17:52:08,502 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. +2024-03-25:17:52:08,502 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. +/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). + from pandas.core import ( +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +Traceback (most recent call last): + File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in + main( + File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main + rank = maybe_init_dist() + File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist + torch.cuda.set_device(rank) + File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device + torch._C._cuda_setDevice(device) +RuntimeError: CUDA error: invalid device ordinal +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. + +W0325 17:52:11.561000 140417423238144 torch/distributed/elastic/multiprocessing/api.py:851] Sending process 1765652 closing signal SIGTERM +W0325 17:52:11.562000 140417423238144 torch/distributed/elastic/multiprocessing/api.py:851] Sending process 1765656 closing signal SIGTERM +E0325 17:52:11.675000 140417423238144 torch/distributed/elastic/multiprocessing/api.py:826] failed (exitcode: 1) local_rank: 1 (pid: 1765653) of binary: /home/cdhernandez/local/miniconda3/envs/pytorch/bin/python +Traceback (most recent call last): + File "/home/cdhernandez/local/miniconda3/envs/pytorch/bin/torchrun", line 33, in + sys.exit(load_entry_point('torch', 'console_scripts', 'torchrun')()) + File "/home/cdhernandez/local/pytorch/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 347, in wrapper + return f(*args, **kwargs) + File "/home/cdhernandez/local/pytorch/torch/distributed/run.py", line 879, in main + run(args) + File "/home/cdhernandez/local/pytorch/torch/distributed/run.py", line 870, in run + elastic_launch( + File "/home/cdhernandez/local/pytorch/torch/distributed/launcher/api.py", line 132, in __call__ + return launch_agent(self._config, self._entrypoint, list(args)) + File "/home/cdhernandez/local/pytorch/torch/distributed/launcher/api.py", line 263, in launch_agent + raise ChildFailedError( +torch.distributed.elastic.multiprocessing.errors.ChildFailedError: +============================================================ +generate.py FAILED +------------------------------------------------------------ +Failures: +[1]: + time : 2024-03-25_17:52:11 + host : devgpu001.ash8.facebook.com + rank : 2 (local_rank: 2) + exitcode : 1 (pid: 1765654) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +[2]: + time : 2024-03-25_17:52:11 + host : devgpu001.ash8.facebook.com + rank : 3 (local_rank: 3) + exitcode : 1 (pid: 1765655) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +[3]: + time : 2024-03-25_17:52:11 + host : devgpu001.ash8.facebook.com + rank : 5 (local_rank: 5) + exitcode : 1 (pid: 1765657) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +[4]: + time : 2024-03-25_17:52:11 + host : devgpu001.ash8.facebook.com + rank : 6 (local_rank: 6) + exitcode : 1 (pid: 1765658) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +[5]: + time : 2024-03-25_17:52:11 + host : devgpu001.ash8.facebook.com + rank : 7 (local_rank: 7) + exitcode : 1 (pid: 1765659) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +------------------------------------------------------------ +Root Cause (first observed failure): +[0]: + time : 2024-03-25_17:52:11 + host : devgpu001.ash8.facebook.com + rank : 1 (local_rank: 1) + exitcode : 1 (pid: 1765653) + error_file: + traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html +============================================================ diff --git a/run.sh b/run.sh index b772fa3..d657afe 100644 --- a/run.sh +++ b/run.sh @@ -3,15 +3,15 @@ export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf # python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --compile # working # echo "base" -python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 +# python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4-gptq.g32.cuda.pth --tasks wikitext --limit 5 # python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.pth --compile # echo "quant good" -# python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4 -# python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth --tasks wikitext --limit 5 +python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4 +python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth --tasks wikitext --limit 5 -# ENABLE_INTRA_NODE_COMM=1 torchrun --standalone --nproc_per_node=8 generate.py --compile --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth +ENABLE_INTRA_NODE_COMM=1 torchrun --standalone --nproc_per_node=8 generate.py --compile --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth # python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 From 2e7673714b8ffdb4062e6dd89764f8b7f16a936e Mon Sep 17 00:00:00 2001 From: HDCharles Date: Tue, 26 Mar 2024 14:09:32 -0700 Subject: [PATCH 3/3] Update on "int4 gptq shape fix" Summary: redoing https://github.com/pytorch-labs/gpt-fast/commit/5bf70c114088a5133299609694a8c17b37de69c4 in a way that doesn't get reverted. note, needed to fix a device issue as well. Test Plan: export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4-gptq.g32.cuda.pth --tasks wikitext --limit 5 Reviewers: Subscribers: Tasks: Tags: [ghstack-poisoned] --- GPTQ.py | 2 +- log.log | 477 ------------------------------------------------------- model.py | 6 +- run.sh | 17 -- 4 files changed, 5 insertions(+), 497 deletions(-) delete mode 100644 log.log delete mode 100644 run.sh diff --git a/GPTQ.py b/GPTQ.py index 792259e..e1279bd 100644 --- a/GPTQ.py +++ b/GPTQ.py @@ -150,7 +150,7 @@ def __init__( } # trace model for one input - one_input = tuple([multi.values[0].cpu() for multi in inputs]) + one_input = [multi.values[0].cpu() for multi in inputs] exported_model = torch._dynamo.export( model.cpu(), aten_graph=True, pre_dispatch=True, tracing_mode="fake" )(*one_input) diff --git a/log.log b/log.log deleted file mode 100644 index 1d40c56..0000000 --- a/log.log +++ /dev/null @@ -1,477 +0,0 @@ -2024-03-25:17:50:41,903 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:50:41,904 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -2024-03-25:17:50:41,904 INFO [utils.py:160] NumExpr defaulting to 8 threads. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -2024-03-25:17:50:49,270 INFO [huggingface.py:148] Using device 'cuda' -2024-03-25:17:50:54,575 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details. -2024-03-25:17:50:58,714 WARNING [__init__.py:194] Some tasks could not be loaded due to missing dependencies. Run with `--verbosity DEBUG` for full details. -2024-03-25:17:50:58,714 WARNING [task.py:626] [Task: wikitext] metric word_perplexity is defined, but aggregation is not. using default aggregation=weighted_perplexity -2024-03-25:17:50:58,714 WARNING [task.py:638] [Task: wikitext] metric word_perplexity is defined, but higher_is_better is not. using default higher_is_better=False -2024-03-25:17:50:58,715 WARNING [task.py:626] [Task: wikitext] metric byte_perplexity is defined, but aggregation is not. using default aggregation=weighted_perplexity -2024-03-25:17:50:58,715 WARNING [task.py:638] [Task: wikitext] metric byte_perplexity is defined, but higher_is_better is not. using default higher_is_better=False -2024-03-25:17:50:58,715 WARNING [task.py:626] [Task: wikitext] metric bits_per_byte is defined, but aggregation is not. using default aggregation=bits_per_byte -2024-03-25:17:50:58,715 WARNING [task.py:638] [Task: wikitext] metric bits_per_byte is defined, but higher_is_better is not. using default higher_is_better=False -Repo card metadata block was not found. Setting CardData to empty. -2024-03-25:17:50:59,420 WARNING [repocard.py:107] Repo card metadata block was not found. Setting CardData to empty. -2024-03-25:17:50:59,466 INFO [task.py:363] Building contexts for task on rank 0... -2024-03-25:17:50:59,475 INFO [evaluator.py:324] Running loglikelihood_rolling requests -Loading model ... -Using int4 weight-only quantization! -Time to load model: 3.75 seconds. - 0%| | 0/5 [00:00 - main( - File "/home/cdhernandez/local/gpt-fast/eval.py", line 227, in main - model = _load_model(checkpoint_path, device, precision, False) - File "/home/cdhernandez/local/gpt-fast/generate.py", line 240, in _load_model - model.load_state_dict(checkpoint, assign=True) - File "/home/cdhernandez/local/pytorch/torch/nn/modules/module.py", line 2184, in load_state_dict - raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( -RuntimeError: Error(s) in loading state_dict for Transformer: - size mismatch for layers.0.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.0.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.1.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.1.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.2.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.2.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.3.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.3.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.4.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.4.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.5.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.5.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.6.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.6.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.7.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.7.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.8.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.8.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.9.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.9.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.10.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.10.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.11.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.11.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.12.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.12.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.13.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.13.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.14.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.14.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.15.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.15.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.16.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.16.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.17.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.17.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.18.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.18.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.19.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.19.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.20.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.20.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.21.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.21.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.22.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.22.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.23.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.23.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.24.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.24.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.25.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.25.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.26.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.26.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.27.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.27.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.28.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.28.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.29.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.29.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.30.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.30.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). - size mismatch for layers.31.feed_forward.w2.weight: copying a param with shape torch.Size([512, 86, 32, 4]) from checkpoint, the shape in current model is torch.Size([512, 88, 32, 4]). - size mismatch for layers.31.feed_forward.w2.scales_and_zeros: copying a param with shape torch.Size([344, 4096, 2]) from checkpoint, the shape in current model is torch.Size([352, 4096, 2]). -W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] -W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] ***************************************** -W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. -W0325 17:52:01.365000 140417423238144 torch/distributed/run.py:757] ***************************************** -2024-03-25:17:52:08,088 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,088 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -2024-03-25:17:52:08,258 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,258 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -2024-03-25:17:52:08,311 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,311 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -2024-03-25:17:52:08,358 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,358 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -2024-03-25:17:52:08,427 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,427 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -2024-03-25:17:52:08,429 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,429 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -2024-03-25:17:52:08,433 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,433 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -2024-03-25:17:52:08,502 INFO [utils.py:145] Note: detected 192 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. -2024-03-25:17:52:08,502 INFO [utils.py:148] Note: NumExpr detected 192 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. -/home/cdhernandez/local/miniconda3/envs/pytorch/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). - from pandas.core import ( -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -Traceback (most recent call last): - File "/home/cdhernandez/local/gpt-fast/generate.py", line 423, in - main( - File "/home/cdhernandez/local/gpt-fast/generate.py", line 276, in main - rank = maybe_init_dist() - File "/home/cdhernandez/local/gpt-fast/tp.py", line 49, in maybe_init_dist - torch.cuda.set_device(rank) - File "/home/cdhernandez/local/pytorch/torch/cuda/__init__.py", line 399, in set_device - torch._C._cuda_setDevice(device) -RuntimeError: CUDA error: invalid device ordinal -CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. -For debugging consider passing CUDA_LAUNCH_BLOCKING=1. -Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. - -W0325 17:52:11.561000 140417423238144 torch/distributed/elastic/multiprocessing/api.py:851] Sending process 1765652 closing signal SIGTERM -W0325 17:52:11.562000 140417423238144 torch/distributed/elastic/multiprocessing/api.py:851] Sending process 1765656 closing signal SIGTERM -E0325 17:52:11.675000 140417423238144 torch/distributed/elastic/multiprocessing/api.py:826] failed (exitcode: 1) local_rank: 1 (pid: 1765653) of binary: /home/cdhernandez/local/miniconda3/envs/pytorch/bin/python -Traceback (most recent call last): - File "/home/cdhernandez/local/miniconda3/envs/pytorch/bin/torchrun", line 33, in - sys.exit(load_entry_point('torch', 'console_scripts', 'torchrun')()) - File "/home/cdhernandez/local/pytorch/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 347, in wrapper - return f(*args, **kwargs) - File "/home/cdhernandez/local/pytorch/torch/distributed/run.py", line 879, in main - run(args) - File "/home/cdhernandez/local/pytorch/torch/distributed/run.py", line 870, in run - elastic_launch( - File "/home/cdhernandez/local/pytorch/torch/distributed/launcher/api.py", line 132, in __call__ - return launch_agent(self._config, self._entrypoint, list(args)) - File "/home/cdhernandez/local/pytorch/torch/distributed/launcher/api.py", line 263, in launch_agent - raise ChildFailedError( -torch.distributed.elastic.multiprocessing.errors.ChildFailedError: -============================================================ -generate.py FAILED ------------------------------------------------------------- -Failures: -[1]: - time : 2024-03-25_17:52:11 - host : devgpu001.ash8.facebook.com - rank : 2 (local_rank: 2) - exitcode : 1 (pid: 1765654) - error_file: - traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html -[2]: - time : 2024-03-25_17:52:11 - host : devgpu001.ash8.facebook.com - rank : 3 (local_rank: 3) - exitcode : 1 (pid: 1765655) - error_file: - traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html -[3]: - time : 2024-03-25_17:52:11 - host : devgpu001.ash8.facebook.com - rank : 5 (local_rank: 5) - exitcode : 1 (pid: 1765657) - error_file: - traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html -[4]: - time : 2024-03-25_17:52:11 - host : devgpu001.ash8.facebook.com - rank : 6 (local_rank: 6) - exitcode : 1 (pid: 1765658) - error_file: - traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html -[5]: - time : 2024-03-25_17:52:11 - host : devgpu001.ash8.facebook.com - rank : 7 (local_rank: 7) - exitcode : 1 (pid: 1765659) - error_file: - traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html ------------------------------------------------------------- -Root Cause (first observed failure): -[0]: - time : 2024-03-25_17:52:11 - host : devgpu001.ash8.facebook.com - rank : 1 (local_rank: 1) - exitcode : 1 (pid: 1765653) - error_file: - traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html -============================================================ diff --git a/model.py b/model.py index e70e87a..dbf24e5 100644 --- a/model.py +++ b/model.py @@ -78,8 +78,10 @@ def update(self, input_pos, k_val, v_val): # input_pos: [S], k_val: [B, H, S, D] assert input_pos.shape[0] == k_val.shape[2] - k_out = torch.ops.aten.index_put_(self.k_cache, [None, None, input_pos], k_val) - v_out = torch.ops.aten.index_put_(self.v_cache, [None, None, input_pos], v_val) + k_out = self.k_cache + v_out = self.v_cache + k_out[:, :, input_pos] = k_val + v_out[:, :, input_pos] = v_val return k_out, v_out diff --git a/run.sh b/run.sh deleted file mode 100644 index d657afe..0000000 --- a/run.sh +++ /dev/null @@ -1,17 +0,0 @@ -export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf - -# python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --compile # working -# echo "base" - -# python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5 -python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4-gptq.g32.cuda.pth --tasks wikitext --limit 5 - -# python generate.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.pth --compile -# echo "quant good" - -python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4 -python eval.py --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth --tasks wikitext --limit 5 - -ENABLE_INTRA_NODE_COMM=1 torchrun --standalone --nproc_per_node=8 generate.py --compile --checkpoint_path checkpoints/$MODEL_REPO/model_int4.g32.cuda.pth - -# python quantize.py --checkpoint_path checkpoints/$MODEL_REPO/model.pth --mode int4-gptq --calibration_tasks wikitext --calibration_limit 5