-
Notifications
You must be signed in to change notification settings - Fork 366
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed decoding of large tokens (over 16 bytes) in streaming text decoder
- Loading branch information
1 parent
54dffe7
commit 98635a0
Showing
4 changed files
with
61 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
using System.Text; | ||
using LLama.Common; | ||
using Xunit.Abstractions; | ||
|
||
namespace LLama.Unittest; | ||
|
||
public class StreamingTextDecoderTests | ||
: IDisposable | ||
{ | ||
private readonly LLamaWeights _model; | ||
private readonly ITestOutputHelper _testOutputHelper; | ||
private readonly ModelParams _params; | ||
|
||
public StreamingTextDecoderTests(ITestOutputHelper testOutputHelper) | ||
{ | ||
_testOutputHelper = testOutputHelper; | ||
_params = new ModelParams(Constants.ModelPath); | ||
_model = LLamaWeights.LoadFromFile(_params); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
_model.Dispose(); | ||
} | ||
|
||
[Fact] | ||
public void DecodesSimpleText() | ||
{ | ||
var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model); | ||
|
||
const string text = "The cat sat on the mat"; | ||
var tokens = _model.NativeHandle.Tokenize(text, false, false, Encoding.UTF8); | ||
|
||
foreach (var lLamaToken in tokens) | ||
decoder.Add(lLamaToken); | ||
|
||
Assert.Equal(text, decoder.Read().Trim()); | ||
} | ||
|
||
[Fact] | ||
public void DecodesComplexText() | ||
{ | ||
var decoder = new StreamingTokenDecoder(Encoding.UTF8, _model); | ||
|
||
const string text = "猫坐在垫子上 😀🤨🤐😏"; | ||
var tokens = _model.NativeHandle.Tokenize(text, false, false, Encoding.UTF8); | ||
|
||
foreach (var lLamaToken in tokens) | ||
decoder.Add(lLamaToken); | ||
|
||
Assert.Equal(text, decoder.Read().Trim()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters