diff --git a/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py b/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py index 2f2c7fb..56bf1fb 100644 --- a/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py +++ b/huggingface_model/DiscoResearch/mixtral_7b_8expert/configuration_moe_mistral.py @@ -106,7 +106,7 @@ def __init__( vocab_size=32000, hidden_size=4096, intermediate_size=14336, - num_hidden_layers=4, + num_hidden_layers=32, num_attention_heads=32, num_key_value_heads=8, hidden_act="silu",