Skip to content

Commit

Permalink
Merge pull request #571 from adamdecaf/misc-updates-2024-10-22
Browse files Browse the repository at this point in the history
cmd/server: pool x/text/transform.Transformer instances
  • Loading branch information
adamdecaf authored Oct 22, 2024
2 parents 2dba69f + 5210a9d commit 1eca0f6
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 19 deletions.
4 changes: 2 additions & 2 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ func main() {

var pipeline *pipeliner
if debug, err := strconv.ParseBool(os.Getenv("DEBUG_NAME_PIPELINE")); debug && err == nil {
pipeline = newPipeliner(logger)
pipeline = newPipeliner(logger, true)
} else {
pipeline = newPipeliner(log.NewNopLogger())
pipeline = newPipeliner(log.NewNopLogger(), false)
}

searchWorkers := readInt(os.Getenv("SEARCH_MAX_WORKERS"), *flagWorkers)
Expand Down
20 changes: 13 additions & 7 deletions cmd/server/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,21 @@ func (ds *debugStep) apply(in *Name) error {
return nil
}

func newPipeliner(logger log.Logger) *pipeliner {
func newPipeliner(logger log.Logger, debug bool) *pipeliner {
steps := []step{
&reorderSDNStep{},
&companyNameCleanupStep{},
&stopwordsStep{},
&normalizeStep{},
}
if debug {
for i := range steps {
steps[i] = &debugStep{logger: logger, step: steps[i]}
}
}
return &pipeliner{
logger: logger,
steps: []step{
&debugStep{logger: logger, step: &reorderSDNStep{}},
&debugStep{logger: logger, step: &companyNameCleanupStep{}},
&debugStep{logger: logger, step: &stopwordsStep{}},
&debugStep{logger: logger, step: &normalizeStep{}},
},
steps: steps,
}
}

Expand Down
33 changes: 31 additions & 2 deletions cmd/server/pipeline_normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package main

import (
"strings"
"sync"
"unicode"

"golang.org/x/text/runes"
Expand Down Expand Up @@ -34,7 +35,35 @@ func precompute(s string) string {
trimmed := strings.TrimSpace(strings.ToLower(punctuationReplacer.Replace(s)))

// UTF-8 normalization
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) // Mn: nonspacing marks
result, _, _ := transform.String(t, trimmed)
chain := getTransformChain()
defer saveBuffer(chain)

result, _, _ := transform.String(chain, trimmed)
return result
}

var (
transformChainPool = sync.Pool{
New: func() any {
return newTransformChain()
},
}
)

func newTransformChain() transform.Transformer {
nonspacingMarksRemover := runes.Remove(runes.In(unicode.Mn)) // Mn: nonspacing marks
return transform.Chain(norm.NFD, nonspacingMarksRemover, norm.NFC)
}

func getTransformChain() transform.Transformer {
t, ok := transformChainPool.Get().(transform.Transformer)
if !ok {
return newTransformChain()
}
return t
}

func saveBuffer(t transform.Transformer) {
t.Reset()
transformChainPool.Put(t)
}
3 changes: 2 additions & 1 deletion cmd/server/pipeline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ var (
steps: []step{},
}

noLogPipeliner = newPipeliner(log.NewNopLogger())
noLogPipeliner = newPipeliner(log.NewNopLogger(), false)
)

func TestPipelineNoop(t *testing.T) {
Expand Down Expand Up @@ -75,6 +75,7 @@ func TestFullPipeline(t *testing.T) {
// Normalize ("-" -> " ")
{company("ANGLO-CARIBBEAN CO., LTD."), "anglo caribbean"},
}

for i := range cases {
if err := noLogPipeliner.Do(cases[i].in); err != nil {
t.Error(err)
Expand Down
8 changes: 7 additions & 1 deletion cmd/server/search_benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,14 @@ func BenchmarkSearch__All(b *testing.B) {
searcher := createBenchmarkSearcher(b)
b.ResetTimer()

var filters filterRequest

for i := 0; i < b.N; i++ {
buildFullSearchResponse(searcher, filterRequest{}, 10, 0.0, fake.Person().Name())
b.StopTimer()
name := fake.Person().Name()
b.StartTimer()

buildFullSearchResponse(searcher, filters, 10, 0.0, name)
}
}

Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ require (
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673
go4.org v0.0.0-20230225012048-214862532bf5
golang.org/x/oauth2 v0.14.0
golang.org/x/sync v0.6.0
golang.org/x/text v0.14.0
golang.org/x/sync v0.8.0
golang.org/x/text v0.19.0
)

require (
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down Expand Up @@ -224,8 +224,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM=
golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
Expand Down

0 comments on commit 1eca0f6

Please sign in to comment.