From 4027b494b3b969736d0dd12f9e3c09ff776b9384 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Tue, 29 Jan 2019 10:41:06 -0800 Subject: [PATCH 1/3] Add documentation comments to package tokenizer. Although this package is internal, it still exports an API and deserves some comments. Serves in partial satisfaction of #195. Signed-off-by: M. J. Fromberger --- internal/tokenizer/tokenize.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go index dadbccda..d7f1c431 100644 --- a/internal/tokenizer/tokenize.go +++ b/internal/tokenizer/tokenize.go @@ -1,3 +1,6 @@ +// Package tokenizer implements file tokenization used by the enry file +// classifier. This package is an implementation detail of enry and should not +// be imported by other packages. package tokenizer import ( @@ -8,6 +11,9 @@ import ( const byteLimit = 100000 +// Tokenize returns classification tokens from content. The tokens returned +// should match what the Linguist library returns. At most the first 100KB of +// content are tokenized. func Tokenize(content []byte) []string { if len(content) > byteLimit { content = content[:byteLimit] From dabb41527f55e6ef88f3a8302d6d3f243ce1f972 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Tue, 29 Jan 2019 11:25:53 -0800 Subject: [PATCH 2/3] Apply suggestions from review. Signed-off-by: M. J. Fromberger --- internal/tokenizer/tokenize.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go index d7f1c431..5c5094ba 100644 --- a/internal/tokenizer/tokenize.go +++ b/internal/tokenizer/tokenize.go @@ -11,9 +11,9 @@ import ( const byteLimit = 100000 -// Tokenize returns classification tokens from content. The tokens returned -// should match what the Linguist library returns. At most the first 100KB of -// content are tokenized. +// Tokenize returns language-agnostic lexical tokens from content. The tokens +// returned should match what the Linguist library returns. At most the first +// 100KB of content are tokenized. func Tokenize(content []byte) []string { if len(content) > byteLimit { content = content[:byteLimit] From 5245079744af89d52622364758a486d8ff5b7b2f Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Tue, 29 Jan 2019 11:27:45 -0800 Subject: [PATCH 3/3] Apply suggestions from review. Signed-off-by: M. J. Fromberger --- internal/tokenizer/tokenize.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/tokenizer/tokenize.go b/internal/tokenizer/tokenize.go index 5c5094ba..6a721c46 100644 --- a/internal/tokenizer/tokenize.go +++ b/internal/tokenizer/tokenize.go @@ -1,4 +1,4 @@ -// Package tokenizer implements file tokenization used by the enry file +// Package tokenizer implements file tokenization used by the enry content // classifier. This package is an implementation detail of enry and should not // be imported by other packages. package tokenizer