Skip to content

Commit

Permalink
Merge pull request #197 from creachadair/muckthebits
Browse files Browse the repository at this point in the history
Prevent tokenization from modifying its input.
  • Loading branch information
creachadair authored Jan 29, 2019
2 parents 3748186 + 7d277b1 commit 260dcfe
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
4 changes: 4 additions & 0 deletions internal/tokenizer/tokenize.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ func Tokenize(content []byte) []string {
content = content[:byteLimit]
}

// Copy the input so that changes wrought by the tokenization steps do not
// modify the caller's copy of the input. See #196.
content = append([]byte(nil), content...)

tokens := make([][]byte, 0, 50)
for _, extract := range extractTokens {
var extractedTokens [][]byte
Expand Down
3 changes: 3 additions & 0 deletions internal/tokenizer/tokenize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ func TestTokenize(t *testing.T) {

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
before := string(test.content)
tokens := Tokenize(test.content)
after := string(test.content)
assert.Equal(t, before, after, "the input slice was modified")
assert.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens)))
for i, expectedToken := range test.expected {
assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken))
Expand Down

0 comments on commit 260dcfe

Please sign in to comment.