Skip to content

Commit

Permalink
update workflows and code format
Browse files Browse the repository at this point in the history
  • Loading branch information
sunhailin-Leo committed Aug 29, 2023
1 parent ae1ee97 commit 1951a3f
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-go@v4
with:
go-version: 1.20.x
go-version: 1.21.x
- run: go version
#- run: diff -u <(echo -n) <(gofmt -d .)
- name: Run golangci-lint
Expand Down
8 changes: 4 additions & 4 deletions models/bert/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,12 @@ func (m *ModelService) generateHTTPOutputs(
for i := range inferOutputs {
requestOutputs[i] = HTTPOutput{Name: inferOutputs[i].Name}
if _, ok := inferOutputs[i].Parameters[ModelRespBodyOutputBinaryDataKey]; ok {
requestOutputs[i].Parameters.BinaryData =
inferOutputs[i].Parameters[ModelRespBodyOutputBinaryDataKey].GetBoolParam()
requestOutputs[i].Parameters.BinaryData = inferOutputs[i].Parameters[ModelRespBodyOutputBinaryDataKey].
GetBoolParam()
}
if _, ok := inferOutputs[i].Parameters[ModelRespBodyOutputClassificationDataKey]; ok {
requestOutputs[i].Parameters.Classification =
inferOutputs[i].Parameters[ModelRespBodyOutputClassificationDataKey].GetInt64Param()
requestOutputs[i].Parameters.Classification = inferOutputs[i].Parameters[ModelRespBodyOutputClassificationDataKey].
GetInt64Param()
}
}
return requestOutputs
Expand Down
30 changes: 22 additions & 8 deletions models/bert/tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,16 @@ func (t *BaseTokenizer) splitOn(text string, shouldSplit func(rune) bool, includ
wordLen := len(word)
if wordLen > 0 {
words = append(words, StringOffsetsPair{
String: string(word), Offsets: OffsetsType{Start: offset - wordLen, End: offset}})
String: string(word),
Offsets: OffsetsType{Start: offset - wordLen, End: offset},
})
word = make([]rune, 0, cap(word))
}
if includeSplitToken {
words = append(words, StringOffsetsPair{
String: string(r), Offsets: OffsetsType{Start: offset, End: offset + 1}})
String: string(r),
Offsets: OffsetsType{Start: offset, End: offset + 1},
})
}
} else {
word = append(word, r)
Expand All @@ -196,7 +200,9 @@ func (t *BaseTokenizer) splitOn(text string, shouldSplit func(rune) bool, includ
// Don't forget the potential last word
if len(word) > 0 {
words = append(words, StringOffsetsPair{
String: string(word), Offsets: OffsetsType{Start: offset - len(word), End: offset}})
String: string(word),
Offsets: OffsetsType{Start: offset - len(word), End: offset},
})
}
return words
}
Expand All @@ -213,12 +219,16 @@ func (t *BaseTokenizer) splitOnChinese(text string, shouldSplit func(rune) bool,
wordLen := len(word)
if wordLen > 0 {
words = append(words, StringOffsetsPair{
String: string(word), Offsets: OffsetsType{Start: offset - wordLen, End: offset}})
String: string(word),
Offsets: OffsetsType{Start: offset - wordLen, End: offset},
})
word = make([]rune, 0, cap(word))
}
if includeSplitToken || includeSplitFunc(r) {
words = append(words, StringOffsetsPair{
String: string(r), Offsets: OffsetsType{Start: offset, End: offset + 1}})
String: string(r),
Offsets: OffsetsType{Start: offset, End: offset + 1},
})
}
} else {
word = append(word, r)
Expand All @@ -229,7 +239,9 @@ func (t *BaseTokenizer) splitOnChinese(text string, shouldSplit func(rune) bool,
// Don't forget the potential last word
if len(word) > 0 {
words = append(words, StringOffsetsPair{
String: string(word), Offsets: OffsetsType{Start: offset - len(word), End: offset}})
String: string(word),
Offsets: OffsetsType{Start: offset - len(word), End: offset},
})
}
return words
}
Expand Down Expand Up @@ -275,7 +287,7 @@ func (t *WordPieceTokenizer) TokenizeChineseCharMode(text string) []StringOffset
}

// WordPieceTokenize
//transforms the input token in a new slice of words or sub-words units based on the supplied vocabulary.
// transforms the input token in a new slice of words or sub-words units based on the supplied vocabulary.
// The resulting tokens preserve the alignment with the portion of the original text they belong to.
func (t *WordPieceTokenizer) WordPieceTokenize(tokens []StringOffsetsPair) []StringOffsetsPair {
outputTokens := make([]StringOffsetsPair, 0)
Expand Down Expand Up @@ -308,7 +320,9 @@ func (t *WordPieceTokenizer) WordPieceTokenize(tokens []StringOffsetsPair) []Str
found = true
curStrToken.String = subStr
curStrToken.Offsets = OffsetsType{
Start: tokens[i].Offsets.Start + start, End: tokens[i].Offsets.Start + end}
Start: tokens[i].Offsets.Start + start,
End: tokens[i].Offsets.Start + end,
}
break
}
end--
Expand Down
4 changes: 2 additions & 2 deletions nvidia_inferenceserver/triton_service_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ func (t *TritonClientService) ModelInferStats(

// ModelLoadWithHTTP Load Model with http
// modelConfigBody ==>
//https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_repository.md#examples
// https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_model_repository.md#examples
func (t *TritonClientService) ModelLoadWithHTTP(
modelName string, modelConfigBody []byte, timeout time.Duration,
) (*RepositoryModelLoadResponse, error) {
Expand Down Expand Up @@ -796,7 +796,7 @@ func (t *TritonClientService) GetModelTracingSetting(

// SetModelTracingSetting set model tracing setting.
// Param: settingMap ==>
//https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_trace.md#trace-setting-response-json-object
// https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_trace.md#trace-setting-response-json-object
func (t *TritonClientService) SetModelTracingSetting(
modelName string, settingMap map[string]*TraceSettingRequest_SettingValue, timeout time.Duration,
) (*TraceSettingResponse, error) {
Expand Down
68 changes: 53 additions & 15 deletions test/tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ func TestFullTokenizerNotChinese(t *testing.T) {
}
tokenizer := bert.NewWordPieceTokenizer(voc)
tokenResult := tokenizer.Tokenize("นครปฐม เมืองนครปฐม ถนนขาด เลขที่ 69 หมู่ 1 ซ. - - ถ. -")
var tokenStrSlice = make([]string, len(tokenResult))
var tokenOffsetSlice = make([]bert.OffsetsType, len(tokenResult))
tokenStrSlice := make([]string, len(tokenResult))
tokenOffsetSlice := make([]bert.OffsetsType, len(tokenResult))
for i, token := range tokenResult {
tokenStrSlice[i] = token.String
tokenOffsetSlice[i] = token.Offsets
Expand All @@ -31,19 +31,47 @@ func TestFullTokenizerNotChinese(t *testing.T) {
expectedTokenSlice := []string{
"น", "##คร", "##ป", "##ฐ", "##ม", "เ", "##มือง", "##น", "##คร", "##ป", "##ฐ", "##ม",
"ถ", "##น", "##น", "##ข", "##า", "##ด", "เ", "##ล", "##ข", "##ที่", "69", "ห", "##ม",
"##ู่", "1", "ซ", ".", "-", "-", "ถ", ".", "-"}
"##ู่", "1", "ซ", ".", "-", "-", "ถ", ".", "-",
}
if !reflect.DeepEqual(tokenStrSlice, expectedTokenSlice) {
t.Errorf("Expected '%v', but got '%v'", expectedTokenSlice, tokenStrSlice)
}

expectedTokenOffsetSlice := []bert.OffsetsType{
{Start: 0, End: 1}, {Start: 1, End: 3}, {Start: 3, End: 4}, {Start: 4, End: 5}, {Start: 5, End: 6},
{Start: 7, End: 8}, {Start: 8, End: 12}, {Start: 12, End: 13}, {Start: 13, End: 15}, {Start: 15, End: 16},
{Start: 16, End: 17}, {Start: 17, End: 18}, {Start: 19, End: 20}, {Start: 20, End: 21}, {Start: 21, End: 22},
{Start: 22, End: 23}, {Start: 23, End: 24}, {Start: 24, End: 25}, {Start: 26, End: 27}, {Start: 27, End: 28},
{Start: 28, End: 29}, {Start: 29, End: 32}, {Start: 33, End: 35}, {Start: 36, End: 37}, {Start: 37, End: 38},
{Start: 38, End: 40}, {Start: 41, End: 42}, {Start: 43, End: 44}, {Start: 44, End: 45}, {Start: 46, End: 47},
{Start: 48, End: 49}, {Start: 50, End: 51}, {Start: 51, End: 52}, {Start: 53, End: 54},
{Start: 0, End: 1},
{Start: 1, End: 3},
{Start: 3, End: 4},
{Start: 4, End: 5},
{Start: 5, End: 6},
{Start: 7, End: 8},
{Start: 8, End: 12},
{Start: 12, End: 13},
{Start: 13, End: 15},
{Start: 15, End: 16},
{Start: 16, End: 17},
{Start: 17, End: 18},
{Start: 19, End: 20},
{Start: 20, End: 21},
{Start: 21, End: 22},
{Start: 22, End: 23},
{Start: 23, End: 24},
{Start: 24, End: 25},
{Start: 26, End: 27},
{Start: 27, End: 28},
{Start: 28, End: 29},
{Start: 29, End: 32},
{Start: 33, End: 35},
{Start: 36, End: 37},
{Start: 37, End: 38},
{Start: 38, End: 40},
{Start: 41, End: 42},
{Start: 43, End: 44},
{Start: 44, End: 45},
{Start: 46, End: 47},
{Start: 48, End: 49},
{Start: 50, End: 51},
{Start: 51, End: 52},
{Start: 53, End: 54},
}
if !reflect.DeepEqual(tokenOffsetSlice, expectedTokenOffsetSlice) {
t.Errorf("Expected '%v', but got '%v'", expectedTokenOffsetSlice, tokenOffsetSlice)
Expand Down Expand Up @@ -72,8 +100,8 @@ func TestFullTokenizerChinese(t *testing.T) {
}
tokenizer := bert.NewWordPieceTokenizer(voc)
tokenResult := tokenizer.TokenizeChinese(strings.ToLower("广东省深圳市南山区腾讯大厦"))
var tokenStrSlice = make([]string, len(tokenResult))
var tokenOffsetSlice = make([]bert.OffsetsType, len(tokenResult))
tokenStrSlice := make([]string, len(tokenResult))
tokenOffsetSlice := make([]bert.OffsetsType, len(tokenResult))
for i, token := range tokenResult {
tokenStrSlice[i] = token.String
tokenOffsetSlice[i] = token.Offsets
Expand All @@ -91,9 +119,19 @@ func TestFullTokenizerChinese(t *testing.T) {
}

expectedTokenOffsetSlice := []bert.OffsetsType{
{Start: 0, End: 1}, {Start: 1, End: 2}, {Start: 2, End: 3}, {Start: 3, End: 4}, {Start: 4, End: 5},
{Start: 5, End: 6}, {Start: 6, End: 7}, {Start: 7, End: 8}, {Start: 8, End: 9}, {Start: 9, End: 10},
{Start: 10, End: 11}, {Start: 11, End: 12}, {Start: 12, End: 13},
{Start: 0, End: 1},
{Start: 1, End: 2},
{Start: 2, End: 3},
{Start: 3, End: 4},
{Start: 4, End: 5},
{Start: 5, End: 6},
{Start: 6, End: 7},
{Start: 7, End: 8},
{Start: 8, End: 9},
{Start: 9, End: 10},
{Start: 10, End: 11},
{Start: 11, End: 12},
{Start: 12, End: 13},
}
if !reflect.DeepEqual(tokenOffsetSlice, expectedTokenOffsetSlice) {
t.Errorf("Expected '%v', but got '%v'", expectedTokenOffsetSlice, tokenOffsetSlice)
Expand Down
7 changes: 7 additions & 0 deletions utils/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
)

// IsWhitespace checks whether rune c is a BERT whitespace character.
//
//go:inline
func IsWhitespace(c rune) bool {
if c <= 0xFF {
Expand All @@ -20,6 +21,7 @@ func IsWhitespace(c rune) bool {
}

// IsControl checks whether rune c is a BERT control character.
//
//go:inline
func IsControl(c rune) bool {
switch c {
Expand All @@ -35,6 +37,7 @@ func IsControl(c rune) bool {
}

// IsPunctuation checks whether rune c is a BERT punctuation character.
//
//go:inline
func IsPunctuation(c rune) bool {
// return unicode.In(c, utils.Bp, unicode.P)
Expand All @@ -43,25 +46,29 @@ func IsPunctuation(c rune) bool {
}

// IsChinese validates that rune c is in the CJK range according to BERT spec.
//
//go:inline
func IsChinese(c rune) bool {
// unicode.Is(unicode.Han, c)
return unicode.In(c, BertChineseChar, unicode.P)
}

// IsChineseOrNumber validates that rune c is in the CJK range according to BERT spec or Number.
//
//go:inline
func IsChineseOrNumber(c rune) bool {
return unicode.In(c, BertChineseChar, unicode.P) || unicode.IsNumber(c)
}

// IsWhiteSpaceOrChinese validates that rune c is whitespace or is Chinese.
//
//go:inline
func IsWhiteSpaceOrChinese(c rune) bool {
return IsWhitespace(c) || IsChinese(c)
}

// IsWhiteSpaceOrChineseOrNumber validates that rune c is whitespace or is Chinese or is Number.
//
//go:inline
func IsWhiteSpaceOrChineseOrNumber(c rune) bool {
return IsWhitespace(c) || IsChineseOrNumber(c)
Expand Down
2 changes: 2 additions & 0 deletions utils/times.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ package utils
import "time"

// GetNanoTimeFromSys get nano timestamp.
//
//go:inline
func GetNanoTimeFromSys() int64 {
return time.Now().UnixNano()
}

// CalTimeGapWithNS get nano timestamp gap.
//
//go:inline
func CalTimeGapWithNS(begin int64) int64 {
return GetNanoTimeFromSys() - begin
Expand Down

0 comments on commit 1951a3f

Please sign in to comment.