Skip to content

Commit

Permalink
Fix model saving bug for GQA
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongkaifu committed Oct 7, 2024
1 parent ec3eeb6 commit 7e8fee2
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 3 deletions.
4 changes: 2 additions & 2 deletions Seq2SeqSharp/Layers/GroupQueryAttention.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public GroupQueryAttention(string name, int num_heads, int num_kv_groups, int d_
/// <returns>Transformered output tensor</returns>
public (IWeightTensor, IWeightTensor) Perform(IWeightTensor inputQ, IWeightTensor inputK, IWeightTensor inputV, IWeightTensor keyMask, int batchSize, IComputeGraph graph, bool outputAttenWeights = false, Dictionary<string, IWeightTensor> cachedTensors = null)
{
string keyName = $"{m_name}_MultiHeadAttention_3";
string keyName = $"{m_name}_GroupQueryAttention_3";
using IComputeGraph g = graph.CreateSubGraph(keyName);
int seqLenQ = inputQ.Rows / batchSize;

Expand Down Expand Up @@ -340,7 +340,7 @@ public void Load(IModel stream)

public IWeightTensor Perform(IWeightTensor inputQ, IWeightTensor keyMask, int batchSize, IComputeGraph graph, Dictionary<string, IWeightTensor> cachedTensors = null)
{
string keyName = $"{m_name}_MultiHeadAttention_3";
string keyName = $"{m_name}_GroupQueryAttention_1";
using IComputeGraph g = graph.CreateSubGraph(keyName);
int seqLenQ = inputQ.Rows / batchSize;

Expand Down
2 changes: 2 additions & 0 deletions Seq2SeqSharp/Models/Model_4_ProtoBufSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,8 @@ public Model_4_ProtoBufSerializer(Model m)
ExpertsPerTokenFactor = m.ExpertsPerTokenFactor;
PEType= m.PEType;
NormType = m.NormType;
MultiHeadAttentionType = m.MultiHeadAttentionType;
KVGroupNum = m.KVGroupNum;
}
public static Model_4_ProtoBufSerializer Create(Model m) => new Model_4_ProtoBufSerializer(m);

Expand Down
2 changes: 1 addition & 1 deletion Seq2SeqSharp/Seq2SeqSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
<PlatformTarget>AnyCPU</PlatformTarget>
<AppendTargetFrameworkToOutputPath>false</AppendTargetFrameworkToOutputPath>
<OutputPath>bin\</OutputPath>
<Version>2.8.13</Version>
<Version>2.8.14</Version>
<Description>Seq2SeqSharp is a tensor based fast &amp; flexible encoder-decoder deep neural network framework written by .NET (C#). It can be used for sequence-to-sequence task, sequence-labeling task and sequence-classification task and other NLP tasks. Seq2SeqSharp supports both CPUs (x86, x64 and ARM64) and GPUs. It's powered by .NET core, so Seq2SeqSharp can run on both Windows and Linux without any modification and recompilation.</Description>
<PackageReadmeFile>README.md</PackageReadmeFile>
<Title>Seq2SeqSharp</Title>
Expand Down

0 comments on commit 7e8fee2

Please sign in to comment.