Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor Jenkins fix #212

Merged
merged 3 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def main(**kwargs):
# TODO: may have to init qccl backend, next try run with torchrun command
torch_device = torch.device(device)
assert torch_device.type != "cpu", "Host doesn't support single-node DDP"
assert (
torch_device.index is None
), f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
assert torch_device.index is None, (
f"DDP requires specification of device type only, however provided device index as well: {torch_device}"
)
dist.init_process_group(backend=train_config.dist_backend)
# from here onward "qaic/cuda" will automatically map to "qaic:i/cuda:i", where i = process rank
getattr(torch, torch_device.type).set_device(dist.get_rank())
Expand Down
22 changes: 11 additions & 11 deletions QEfficient/finetune/utils/train_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def train(

# Start the training loop
for epoch in range(train_config.num_epochs):
print(f"Starting epoch {epoch+1}/{train_config.num_epochs}")
print(f"Starting epoch {epoch + 1}/{train_config.num_epochs}")
print(f"train_config.max_train_step: {train_config.max_train_step}")
# stop when the maximum number of training steps is reached
if max_steps_reached:
Expand All @@ -108,7 +108,7 @@ def train(
total_length = len(train_dataloader) // train_config.gradient_accumulation_steps
pbar = tqdm(
colour="blue",
desc=f"Training Epoch: {epoch+1}",
desc=f"Training Epoch: {epoch + 1}",
total=total_length,
dynamic_ncols=True,
)
Expand All @@ -123,9 +123,9 @@ def train(
break
batch = {k: v.to(device) for k, v in batch.items()} # move the batch elements to qaic device

with torch.autocast(
device_type=device, dtype=torch.float16
) if train_config.use_autocast else nullcontext():
with (
torch.autocast(device_type=device, dtype=torch.float16) if train_config.use_autocast else nullcontext()
):
# an additional condition can be put here to avoid opByOpVerifier getting triggered for each step
if train_config.opByOpVerifier:
with qaic_debug.OpByOpVerifierMode(
Expand Down Expand Up @@ -183,7 +183,7 @@ def train(
model.save_pretrained(train_config.output_dir + f"/trained_weights/step_{step}")

pbar.set_description(
f"Training Epoch: {epoch+1}/{train_config.num_epochs}, step {step+1}/{len(train_dataloader)} completed (loss: {loss.detach().float()})"
f"Training Epoch: {epoch + 1}/{train_config.num_epochs}, step {step + 1}/{len(train_dataloader)} completed (loss: {loss.detach().float()})"
)
if train_config.save_metrics:
save_to_json(
Expand Down Expand Up @@ -244,11 +244,11 @@ def train(
if train_config.run_validation:
if eval_epoch_loss < best_val_loss:
best_val_loss = eval_epoch_loss
print(f"best eval loss on epoch {epoch+1} is {best_val_loss}")
print(f"best eval loss on epoch {epoch + 1} is {best_val_loss}")
val_loss.append(float(eval_epoch_loss))
val_prep.append(float(eval_ppl))
print(
f"Epoch {epoch+1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}, epoch time {epoch_end_time}s"
f"Epoch {epoch + 1}: train_perplexity={train_perplexity:.4f}, train_epoch_loss={train_epoch_loss:.4f}, epoch time {epoch_end_time}s"
)

# Saving the results every epoch to plot later
Expand Down Expand Up @@ -322,9 +322,9 @@ def evaluation(model, train_config, eval_dataloader, local_rank, tokenizer, devi
# Ensure no gradients are computed for this scope to save memory
with torch.no_grad():
# Forward pass and compute loss
with torch.autocast(
device_type=device, dtype=torch.float16
) if train_config.use_autocast else nullcontext():
with (
torch.autocast(device_type=device, dtype=torch.float16) if train_config.use_autocast else nullcontext()
):
outputs = model(**batch)
loss = outputs.loss

Expand Down
4 changes: 2 additions & 2 deletions scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ pipeline {
}
stage('Run Non-CLI QAIC Tests') {
steps {
timeout(time: 60, unit: 'MINUTES') {
timeout(time: 70, unit: 'MINUTES') {
sh '''
sudo docker exec ${BUILD_TAG} bash -c "
cd /efficient-transformers &&
. preflight_qeff/bin/activate &&
mkdir -p $PWD/Non_qaic &&
export TOKENIZERS_PARALLELISM=false &&
export QEFF_HOME=$PWD/Non_qaic &&
pytest tests -m '(not cli) and (on_qaic) and (not qnn)' -n 4 --junitxml=tests/tests_log2.xml &&
pytest tests -m '(not cli) and (on_qaic) and (not qnn)' -n 3 --junitxml=tests/tests_log2.xml &&
deactivate"
'''
}
Expand Down
6 changes: 3 additions & 3 deletions scripts/perplexity_computation/calculate_perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ def torch_perplexity(

loop_time = time.time() - loop_s
logger.info(
f"E2E Sample Time: {(loop_time)/batch_size:.4f}s\t E2E TOKENS/S : {((ctx_len-prompt_len)*batch_size)/loop_time:.2f}"
f"E2E Sample Time: {(loop_time) / batch_size:.4f}s\t E2E TOKENS/S : {((ctx_len - prompt_len) * batch_size) / loop_time:.2f}"
)

del outputs
Expand Down Expand Up @@ -332,7 +332,7 @@ def calculate_perplexity(

loop_time = time.time() - loop_s
logger.info(
f"e2e sample time: {(loop_time)/batch_size:.4f}s\t e2e tokens/s : {((ctx_len-prompt_len)*batch_size)/loop_time:.2f}"
f"e2e sample time: {(loop_time) / batch_size:.4f}s\t e2e tokens/s : {((ctx_len - prompt_len) * batch_size) / loop_time:.2f}"
)

avg_loss = total_loss / total_tokens
Expand Down Expand Up @@ -415,7 +415,7 @@ def main():
print(f"Dataset Stride: {args.stride}", file=fp)
print(f"Overall Loss: {loss}", file=fp)
print(f"Perplexity: {perplexity}", file=fp)
print(f"Total time for evaluation: {(time.time()-start_time)/3600.0} hrs", file=fp)
print(f"Total time for evaluation: {(time.time() - start_time) / 3600.0} hrs", file=fp)
if isinstance(args.model_type, str) and args.model_type == "torch":
print("\n*******************************************************", file=fp)
print(f"Torch Original Perplexity: {perplexity}", file=fp)
Expand Down
6 changes: 3 additions & 3 deletions tests/base/test_onnx_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ def test_split_tensors_transform(tmp_path):
>
test_split () => ()
<
float[1, 32] tensor0 = [ "location": "{external_tensors_file}", "offset": "0", "length": "{32*4}" ],
float[1, 32] tensor1 = [ "location": "{external_tensors_file}", "offset": "{32*4}", "length": "{32*4}" ],
float[1, 16] tensor2 = [ "location": "{external_tensors_file}", "offset": "{64*4}", "length": "{16*4}" ]
float[1, 32] tensor0 = [ "location": "{external_tensors_file}", "offset": "0", "length": "{32 * 4}" ],
float[1, 32] tensor1 = [ "location": "{external_tensors_file}", "offset": "{32 * 4}", "length": "{32 * 4}" ],
float[1, 16] tensor2 = [ "location": "{external_tensors_file}", "offset": "{64 * 4}", "length": "{16 * 4}" ]
>
{{
}}
Expand Down
12 changes: 6 additions & 6 deletions tests/qnn_tests/test_causal_lm_models_qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(

pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)

assert (
pytorch_hf_tokens == pytorch_kv_tokens
).all(), "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
assert (pytorch_hf_tokens == pytorch_kv_tokens).all(), (
"Tokens don't match for HF PyTorch model output and KV PyTorch model output"
)

onnx_model_path = qeff_model.export()
ort_tokens = api_runner.run_kv_model_on_ort(onnx_model_path)
Expand All @@ -109,9 +109,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
gen_len = ort_tokens.shape[-1]
assert (
ort_tokens == cloud_ai_100_tokens[:, :gen_len]
).all(), "Tokens don't match for ONNXRT output and Cloud AI 100 output."
assert (ort_tokens == cloud_ai_100_tokens[:, :gen_len]).all(), (
"Tokens don't match for ONNXRT output and Cloud AI 100 output."
)

# testing for CB models
model_hf, _ = load_causal_lm_model(model_config)
Expand Down
6 changes: 3 additions & 3 deletions tests/text_generation/test_text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,6 @@ def test_generate_text_stream(
for decoded_tokens in text_generator.generate_stream_tokens(Constants.INPUT_STR, generation_len=max_gen_len):
stream_tokens.extend(decoded_tokens)

assert (
cloud_ai_100_output == stream_tokens
), f"Deviation in output observed while comparing regular execution and streamed output: {cloud_ai_100_output} != {stream_tokens}"
assert cloud_ai_100_output == stream_tokens, (
f"Deviation in output observed while comparing regular execution and streamed output: {cloud_ai_100_output} != {stream_tokens}"
)
18 changes: 9 additions & 9 deletions tests/transformers/models/test_causal_lm_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(

pytorch_kv_tokens = api_runner.run_kv_model_on_pytorch(qeff_model.model)

assert (
pytorch_hf_tokens == pytorch_kv_tokens
).all(), "Tokens don't match for HF PyTorch model output and KV PyTorch model output"
assert (pytorch_hf_tokens == pytorch_kv_tokens).all(), (
"Tokens don't match for HF PyTorch model output and KV PyTorch model output"
)

onnx_model_path = qeff_model.export()
ort_tokens = api_runner.run_kv_model_on_ort(onnx_model_path, is_tlm=is_tlm)
Expand All @@ -133,9 +133,9 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
exec_info = qeff_model.generate(tokenizer, prompts=Constants.INPUT_STR)
cloud_ai_100_tokens = exec_info.generated_ids[0] # Because we always run for single input and single batch size
gen_len = ort_tokens.shape[-1]
assert (
ort_tokens == cloud_ai_100_tokens[:, :gen_len]
).all(), "Tokens don't match for ONNXRT output and Cloud AI 100 output."
assert (ort_tokens == cloud_ai_100_tokens[:, :gen_len]).all(), (
"Tokens don't match for ONNXRT output and Cloud AI 100 output."
)

# testing for CB models
model_hf, _ = load_causal_lm_model(model_config)
Expand Down Expand Up @@ -204,9 +204,9 @@ def test_causal_lm_export_with_deprecated_api(model_name):
new_api_ort_tokens = api_runner.run_kv_model_on_ort(new_api_onnx_model_path)
old_api_ort_tokens = api_runner.run_kv_model_on_ort(old_api_onnx_model_path)

assert (
new_api_ort_tokens == old_api_ort_tokens
).all(), "New API output does not match old API output for ONNX export function"
assert (new_api_ort_tokens == old_api_ort_tokens).all(), (
"New API output does not match old API output for ONNX export function"
)


@pytest.mark.on_qaic
Expand Down
12 changes: 6 additions & 6 deletions tests/transformers/spd/test_spd_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def get_padded_input_len(input_len: int, prefill_seq_len: int, ctx_len: int):
"""
num_chunks = -(input_len // -prefill_seq_len) # ceil divide without float
input_len_padded = num_chunks * prefill_seq_len # Convert input_len to a multiple of prefill_seq_len
assert (
input_len_padded <= ctx_len
), "input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
assert input_len_padded <= ctx_len, (
"input_len rounded to nearest prefill_seq_len multiple should be less than ctx_len"
)
return input_len_padded


Expand Down Expand Up @@ -325,9 +325,9 @@ def test_spec_decode_inference(
for prompt, generation in zip(prompts, batch_decode):
print(f"{prompt=} {generation=}")
# validation check
assert mean_num_accepted_tokens == float(
num_speculative_tokens + 1
), f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens+1}"
assert mean_num_accepted_tokens == float(num_speculative_tokens + 1), (
f"mean number of accepted tokens is {mean_num_accepted_tokens} but should be {num_speculative_tokens + 1}"
)
del target_model_session
del draft_model_session
generated_ids = np.asarray(generated_ids).flatten()
Expand Down
12 changes: 6 additions & 6 deletions tests/transformers/test_transformer_pytorch_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,9 @@ def test_awq_to_matmulnbits_transform(in_features, out_features):
assert transformed
new_out = new_module(rand_data)
assert isinstance(new_module, QuantLinearORT)
assert compare_original_vs_kv_model_pt_outputs(
old_out, new_out, tolerance=1e-8
), "Test failed because MAE is greater than tolerance"
assert compare_original_vs_kv_model_pt_outputs(old_out, new_out, tolerance=1e-8), (
"Test failed because MAE is greater than tolerance"
)


@pytest.mark.parametrize("in_features", [4096, 4096])
Expand All @@ -349,6 +349,6 @@ def test_gptq_to_matmulnbits_transform(in_features, out_features):
assert transformed
new_out = new_module(rand_data)
assert isinstance(new_module, QuantLinearORT)
assert compare_original_vs_kv_model_pt_outputs(
old_out, new_out, tolerance=1e-4
), "Test failed because MAE is greater than tolerance"
assert compare_original_vs_kv_model_pt_outputs(old_out, new_out, tolerance=1e-4), (
"Test failed because MAE is greater than tolerance"
)
Loading