Skip to content

Commit

Permalink
Merge branch 'main' into finetune
Browse files Browse the repository at this point in the history
Signed-off-by: Mamta Singh <[email protected]>
  • Loading branch information
quic-mamta committed Jan 10, 2025
2 parents fb95e47 + 40751a2 commit 04fa8f8
Show file tree
Hide file tree
Showing 10 changed files with 52 additions and 52 deletions.
6 changes: 3 additions & 3 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def main(**kwargs):
# TODO: may have to init qccl backend, next try run with torchrun command
torch_device = torch.device(device)
assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
assert (
torch_device.index is None
), f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
assert torch_device.index is None, (
f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
)
dist.init_process_group(backend=train_config.dist_backend)
# from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
getattr(torch, torch_device.type).set_device(dist.get_rank())
Expand Down
22 changes: 11 additions & 11 deletions QEfficient/finetune/utils/train_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def train(

# Start the training loop
for epoch in range(train_config.num_epochs):
print(f"Starting epoch {epoch+1}/{train_config.num_epochs}")
print(f"Starting epoch {epoch + 1}/{train_config.num_epochs}")
print(f"train_config.max_train_step: {train_config.max_train_step}")
# stop when the maximum number of training steps is reached
if max_steps_reached:
Expand All @@ -108,7 +108,7 @@ def train(
total_length = len(train_dataloader) // train_config.gradient_accumulation_steps
pbar = tqdm(
colour="blue",
desc=f"Training Epoch: {epoch+1}",
desc=f"Training Epoch: {epoch + 1}",
total=total_length,
dynamic_ncols=True,
)
Expand All @@ -123,9 +123,9 @@ def train(
break
batch = {k: v.to(device) for k, v in batch.items()} # move the batch elements to qaic device

with torch.autocast(
device_type=device, dtype=torch.float16
) if train_config.use_autocast else nullcontext():
with (
torch.autocast(device_type=device, dtype=torch.float16) if train_config.use_autocast else nullcontext()
):
# an additional condition can be put here to avoid opByOpVerifier getting triggered for each step
if train_config.opByOpVerifier:
with qaic_debug.OpByOpVerifierMode(
Expand Down Expand Up @@ -183,7 +183,7 @@ def train(
model.save_pretrained(train_config.output_dir + f"/trained_weights/step_{step}")

pbar.set_description(
f"Training Epoch: {epoch+1}/{train_config.num_epochs}, step {step+1}/{len(train_dataloader)} completed (loss: {loss.detach().float()})"
f"Training Epoch: {epoch + 1}/{train_config.num_epochs}, step {step + 1}/{len(train_dataloader)} completed (loss: {loss.detach().float()})"
)
if train_config.save_metrics:
save_to_json(
Expand Down Expand Up @@ -244,11 +244,11 @@ def train(
if train_config.run_validation:
if eval_epoch_loss < best_val_loss:
best_val_loss = eval_epoch_loss
print(f"best eval loss on epoch {epoch+1} is {best_val_loss}")
print(f"best eval loss on epoch {epoch + 1} is {best_val_loss}")
val_loss.append(float(eval_epoch_loss))
val_prep.append(float(eval_ppl))
print(
f"Epoch {epoch+1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}, epoch time {epoch_end_time}s"
f"Epoch {epoch + 1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}, epoch time {epoch_end_time}s"
)

# Saving the results every epoch to plot later
Expand Down Expand Up @@ -322,9 +322,9 @@ def evaluation(model, train_config, eval_dataloader, local_rank, tokenizer, devi
# Ensure no gradients are computed for this scope to save memory
with torch.no_grad():
# Forward pass and compute loss
with torch.autocast(
device_type=device, dtype=torch.float16
) if train_config.use_autocast else nullcontext():
with (
torch.autocast(device_type=device, dtype=torch.float16) if train_config.use_autocast else nullcontext()
):
outputs = model(**batch)
loss = outputs.loss

Expand Down
4 changes: 2 additions & 2 deletions scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ pipeline {
}
stage('Run Non-CLI QAIC Tests') {
steps {
timeout(time: 60, unit: 'MINUTES') {
timeout(time: 70, unit: 'MINUTES') {
sh '''
sudo docker exec ${BUILD_TAG} bash -c "
cd /efficient-transformers &&
. preflight_qeff/bin/activate &&
mkdir -p $PWD/Non_qaic &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_qaic &&
pytest tests -m '(not cli) and (on_qaic) and (not qnn)' -n 4 --junitxml=tests/tests_log2.xml &&
pytest tests -m '(not cli) and (on_qaic) and (not qnn)' -n 3 --junitxml=tests/tests_log2.xml &&
deactivate"
'''
}
Expand Down
6 changes: 3 additions & 3 deletions scripts/perplexity_computation/calculate_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def torch_perplexity(

loop_time = time.time() - loop_s
logger.info(
f"E2E Sample Time: {(loop_time)/batch_size:.4f}s\t E2E TOKENS/S : {((ctx_len-prompt_len)*batch_size)/loop_time:.2f}"
f"E2E Sample Time: {(loop_time) / batch_size:.4f}s\t E2E TOKENS/S : {((ctx_len - prompt_len) * batch_size) / loop_time:.2f}"
)

del outputs
Expand Down Expand Up @@ -332,7 +332,7 @@ def calculate_perplexity(

loop_time = time.time() - loop_s
logger.info(
f"e2e sample time: {(loop_time)/batch_size:.4f}s\t e2e tokens/s : {((ctx_len-prompt_len)*batch_size)/loop_time:.2f}"
f"e2e sample time: {(loop_time) / batch_size:.4f}s\t e2e tokens/s : {((ctx_len - prompt_len) * batch_size) / loop_time:.2f}"
)

avg_loss = total_loss / total_tokens
Expand Down Expand Up @@ -415,7 +415,7 @@ def main():
print(f"Dataset Stride: {args.stride}", file=fp)
print(f"Overall Loss: {loss}", file=fp)
print(f"Perplexity: {perplexity}", file=fp)
print(f"Total time for evaluation: {(time.time()-start_time)/3600.0} hrs", file=fp)
print(f"Total time for evaluation: {(time.time() - start_time) / 3600.0} hrs", file=fp)
if isinstance(args.model_type, str) and args.model_type == "torch":
print("\n*******************************************************", file=fp)
print(f"Torch Original Perplexity: {perplexity}", file=fp)
Expand Down
6 changes: 3 additions & 3 deletions tests/base/test_onnx_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ def test_split_tensors_transform(tmp_path):
>
test_split () => ()
<
float[1, 32] tensor0 = [ "location": "{external_tensors_file}", "offset": "0", "length": "{32*4}" ],
float[1, 32] tensor1 = [ "location": "{external_tensors_file}", "offset": "{32*4}", "length": "{32*4}" ],
float[1, 16] tensor2 = [ "location": "{external_tensors_file}", "offset": "{64*4}", "length": "{16*4}" ]
float[1, 32] tensor0 = [ "location": "{external_tensors_file}", "offset": "0", "length": "{32 * 4}" ],
float[1, 32] tensor1 = [ "location": "{external_tensors_file}", "offset": "{32 * 4}", "length": "{32 * 4}" ],
float[1, 16] tensor2 = [ "location": "{external_tensors_file}", "offset": "{64 * 4}", "length": "{16 * 4}" ]
>
{{
}}
Expand Down
12 changes: 6 additions & 6 deletions tests/qnn_tests/test_causal_lm_models_qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(

pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)

assert (
pytorch_hf_tokens == pytorch_kv_tokens
).all(), "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
assert (pytorch_hf_tokens == pytorch_kv_tokens).all(), (
"Tokens don't match for HF PyTorch model output and KV PyTorch model output"
)

onnx_model_path = qeff_model.export()
ort_tokens = api_runner.run_kv_model_on_ort(onnx_model_path)
Expand All @@ -109,9 +109,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
gen_len = ort_tokens.shape[-1]
assert (
ort_tokens == cloud_ai_100_tokens[:, :gen_len]
).all(), "Tokens don't match for ONNXRT output and Cloud AI 100 output."
assert (ort_tokens == cloud_ai_100_tokens[:, :gen_len]).all(), (
"Tokens don't match for ONNXRT output and Cloud AI 100 output."
)

# testing for CB models
model_hf, _ = load_causal_lm_model(model_config)
Expand Down
6 changes: 3 additions & 3 deletions tests/text_generation/test_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,6 @@ def test_generate_text_stream(
for decoded_tokens in text_generator.generate_stream_tokens(Constants.INPUT_STR, generation_len=max_gen_len):
stream_tokens.extend(decoded_tokens)

assert (
cloud_ai_100_output == stream_tokens
), f"Deviation in output observed while comparing regular execution and streamed output: {cloud_ai_100_output} != {stream_tokens}"
assert cloud_ai_100_output == stream_tokens, (
f"Deviation in output observed while comparing regular execution and streamed output: {cloud_ai_100_output} != {stream_tokens}"
)
18 changes: 9 additions & 9 deletions tests/transformers/models/test_causal_lm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(

pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)

assert (
pytorch_hf_tokens == pytorch_kv_tokens
).all(), "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
assert (pytorch_hf_tokens == pytorch_kv_tokens).all(), (
"Tokens don't match for HF PyTorch model output and KV PyTorch model output"
)

onnx_model_path = qeff_model.export()
ort_tokens = api_runner.run_kv_model_on_ort(onnx_model_path, is_tlm=is_tlm)
Expand All @@ -133,9 +133,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
gen_len = ort_tokens.shape[-1]
assert (
ort_tokens == cloud_ai_100_tokens[:, :gen_len]
).all(), "Tokens don't match for ONNXRT output and Cloud AI 100 output."
assert (ort_tokens == cloud_ai_100_tokens[:, :gen_len]).all(), (
"Tokens don't match for ONNXRT output and Cloud AI 100 output."
)

# testing for CB models
model_hf, _ = load_causal_lm_model(model_config)
Expand Down Expand Up @@ -204,9 +204,9 @@ def test_causal_lm_export_with_deprecated_api(model_name):
new_api_ort_tokens = api_runner.run_kv_model_on_ort(new_api_onnx_model_path)
old_api_ort_tokens = api_runner.run_kv_model_on_ort(old_api_onnx_model_path)

assert (
new_api_ort_tokens == old_api_ort_tokens
).all(), "New API output does not match old API output for ONNX export function"
assert (new_api_ort_tokens == old_api_ort_tokens).all(), (
"New API output does not match old API output for ONNX export function"
)


@pytest.mark.on_qaic
Expand Down
12 changes: 6 additions & 6 deletions tests/transformers/spd/test_spd_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
"""
num_chunks = -(input_len // -prefill_seq_len) # ceil divide without float
input_len_padded = num_chunks * prefill_seq_len # Convert input_len to a multiple of prefill_seq_len
assert (
input_len_padded <= ctx_len
), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
assert input_len_padded <= ctx_len, (
"input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
)
return input_len_padded


Expand Down Expand Up @@ -325,9 +325,9 @@ def test_spec_decode_inference(
for prompt, generation in zip(prompts, batch_decode):
print(f"{prompt=} {generation=}")
# validation check
assert mean_num_accepted_tokens == float(
num_speculative_tokens + 1
), f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens+1}"
assert mean_num_accepted_tokens == float(num_speculative_tokens + 1), (
f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
)
del target_model_session
del draft_model_session
generated_ids = np.asarray(generated_ids).flatten()
Expand Down
12 changes: 6 additions & 6 deletions tests/transformers/test_transformer_pytorch_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,9 @@ def test_awq_to_matmulnbits_transform(in_features, out_features):
assert transformed
new_out = new_module(rand_data)
assert isinstance(new_module, QuantLinearORT)
assert compare_original_vs_kv_model_pt_outputs(
old_out, new_out, tolerance=1e-8
), "Test failed because MAE is greater than tolerance"
assert compare_original_vs_kv_model_pt_outputs(old_out, new_out, tolerance=1e-8), (
"Test failed because MAE is greater than tolerance"
)


@pytest.mark.parametrize("in_features", [4096, 4096])
Expand All @@ -349,6 +349,6 @@ def test_gptq_to_matmulnbits_transform(in_features, out_features):
assert transformed
new_out = new_module(rand_data)
assert isinstance(new_module, QuantLinearORT)
assert compare_original_vs_kv_model_pt_outputs(
old_out, new_out, tolerance=1e-4
), "Test failed because MAE is greater than tolerance"
assert compare_original_vs_kv_model_pt_outputs(old_out, new_out, tolerance=1e-4), (
"Test failed because MAE is greater than tolerance"
)

0 comments on commit 04fa8f8

Please sign in to comment.