From 8e2d9b2a9b49b99c3867650ca81c319d05cb98ed Mon Sep 17 00:00:00 2001 From: sallyjunjun <72725839+sallyjunjun@users.noreply.github.com> Date: Tue, 30 Jul 2024 16:14:57 +0800 Subject: [PATCH] fix moe mistral num_hidden_layers error (#9) --- .../mixtral_7b_8expert/configuration_moe_mistral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py b/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py index 2f2c7fb..56bf1fb 100644 --- a/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py +++ b/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py @@ -106,7 +106,7 @@ def __init__( vocab_size=32000, hidden_size=4096, intermediate_size=14336, - num_hidden_layers=4, + num_hidden_layers=32, num_attention_heads=32, num_key_value_heads=8, hidden_act="silu",