Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: NVIDIA/Megatron-LM
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 87dd51b45fb2e01953a5f42b7cd787c44ab3a1d9
Choose a base ref
..
head repository: NVIDIA/Megatron-LM
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 2a0775891e562d3443a992e822cd7caa43eb56e0
Choose a head ref
Showing with 0 additions and 5 deletions.
  1. +0 −3 tools/checkpoint/saver_qwen2_hf.py
  2. +0 −2 tools/checkpoint/schema_base.py
3 changes: 0 additions & 3 deletions tools/checkpoint/saver_qwen2_hf.py
Original file line number Diff line number Diff line change
@@ -123,7 +123,6 @@ def construct_qwen2moe_config(
raise('Cannot import Qwen2MoeForCausalLM from transformers.')

print("Converting from megatron to qwen2-moe ...")
print('Megatron config:', pprint.pformat(megatron_cfg.__dict__))

if megatron_cfg.moe_shared_expert_intermediate_size is not None:
moe_shared_expert_intermediate_size = megatron_cfg.moe_shared_expert_intermediate_size
@@ -175,7 +174,6 @@ def construct_qwen2_config(
exit(1)

print("Converting from megatron to qwen2 ...")
print('Megatron config:', pprint.pformat(megatron_cfg.__dict__))

config_dict = dict(
bos_token_id=151643,
@@ -214,7 +212,6 @@ def set_dense_mlp(qwen2_hf, prefix, msg):


def set_moe_mlp(qwen2_hf, prefix, msg, md):

shared_expert_mlp_l0_weight_W = msg.pop("shared mlp l0 weight W")
shared_expert_mlp_l0_weight_V = msg.pop("shared mlp l0 weight V")
shared_expert_mlp_l1_weight = msg.pop("shared mlp l1 weight")
2 changes: 0 additions & 2 deletions tools/checkpoint/schema_base.py
Original file line number Diff line number Diff line change
@@ -77,8 +77,6 @@ def _set(cls, schema, model, params):
for k, m in schema.items():
if k in params:
cls._set_deep_tensor(model, m, params[k])
else:
print(f"miss key '{k}'")

def set(self, key, model, params):
self._set(self[key], model, params)