From 88386f13901b6e9a6333b9c884b025ae6439f3cb Mon Sep 17 00:00:00 2001 From: Dmitrii Laptev Date: Sat, 13 Jun 2020 11:08:07 +0300 Subject: [PATCH 1/3] normalize-space & concat benchmarks and tests --- func_test.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ operator.go | 2 +- xpath_test.go | 2 +- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 func_test.go diff --git a/func_test.go b/func_test.go new file mode 100644 index 0000000..2ee13fe --- /dev/null +++ b/func_test.go @@ -0,0 +1,48 @@ +package xpath + +import "testing" + +type testQuery string + +func (t testQuery) Select(_ iterator) NodeNavigator { + panic("implement me") +} + +func (t testQuery) Clone() query { + return t +} + +func (t testQuery) Evaluate(_ iterator) interface{} { + return string(t) +} + +const strForNormalization = "\t \rloooooooonnnnnnngggggggg \r \n tes \u00a0 t strinĀ \n\n \r g " +const expectedStrAfterNormalization = `loooooooonnnnnnngggggggg tes t strin g` + +func Test_NormalizeSpaceFunc(t *testing.T) { + result := normalizespaceFunc(testQuery(strForNormalization), nil).(string) + if expectedStrAfterNormalization != result { + t.Fatalf("unexpected result '%s'", result) + } +} + +func Test_ConcatFunc(t *testing.T) { + result := concatFunc(testQuery("a"), testQuery("b"))(nil, nil).(string) + if "ab" != result { + t.Fatalf("unexpected result '%s'", result) + } +} + +func Benchmark_NormalizeSpaceFunc(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = normalizespaceFunc(testQuery(strForNormalization), nil) + } +} + +func Benchmark_ConcatFunc(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _ = concatFunc(testQuery("a"), testQuery("b"))(nil, nil) + } +} diff --git a/operator.go b/operator.go index f9c10bc..8c2f31f 100644 --- a/operator.go +++ b/operator.go @@ -173,7 +173,7 @@ func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool { if y == nil { return false } - return cmpStringStringF(op,x.Value(),y.Value()) + return cmpStringStringF(op, x.Value(), y.Value()) } func cmpStringNumeric(t iterator, op string, m, n interface{}) bool { diff --git a/xpath_test.go b/xpath_test.go index 5b62cca..a864665 100644 --- a/xpath_test.go +++ b/xpath_test.go @@ -278,7 +278,7 @@ func TestFunction(t *testing.T) { func TestTransformFunctionReverse(t *testing.T) { nodes := selectNodes(html, "reverse(//li)") - expectedReversedNodeValues := []string { "", "login", "about", "Home" } + expectedReversedNodeValues := []string{"", "login", "about", "Home"} if len(nodes) != len(expectedReversedNodeValues) { t.Fatalf("reverse(//li) should return %d
  • nodes", len(expectedReversedNodeValues)) } From be94ad656e1a7abbd52bbda53153a08d40546024 Mon Sep 17 00:00:00 2001 From: Dmitrii Laptev Date: Sat, 13 Jun 2020 11:09:30 +0300 Subject: [PATCH 2/3] normalize-space & concat performance optimization + fix for uncode spaces --- func.go | 53 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/func.go b/func.go index 3873e33..f751a61 100644 --- a/func.go +++ b/func.go @@ -4,11 +4,16 @@ import ( "errors" "fmt" "math" - "regexp" "strconv" "strings" + "sync" + "unicode" ) +var builderPool = sync.Pool{New: func() interface{} { + return &strings.Builder{} +}} + // The XPath function list. func predicate(q query) func(NodeNavigator) bool { @@ -201,7 +206,7 @@ func asBool(t iterator, v interface{}) bool { case *NodeIterator: return v.MoveNext() case bool: - return bool(v) + return v case float64: return v != 0 case string: @@ -338,11 +343,6 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { } } -var ( - regnewline = regexp.MustCompile(`[\r\n\t]`) - regseqspace = regexp.MustCompile(`\s{2,}`) -) - // normalizespaceFunc is XPath functions normalize-space(string?) func normalizespaceFunc(q query, t iterator) interface{} { var m string @@ -356,10 +356,26 @@ func normalizespaceFunc(q query, t iterator) interface{} { } m = node.Value() } - m = strings.TrimSpace(m) - m = regnewline.ReplaceAllString(m, " ") - m = regseqspace.ReplaceAllString(m, " ") - return m + var b = builderPool.Get().(*strings.Builder) + b.Grow(len(m)) + + runeStr := []rune(strings.TrimSpace(m)) + l := len(runeStr) + for i := range runeStr { + r := runeStr[i] + isSpace := unicode.IsSpace(r) + if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) { + if isSpace { + r = ' ' + } + b.WriteRune(r) + } + } + result := b.String() + b.Reset() + builderPool.Put(b) + + return result } // substringFunc is XPath functions substring function returns a part of a given string. @@ -466,7 +482,7 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { src := asString(t, functionArgs(arg2).Evaluate(t)) dst := asString(t, functionArgs(arg3).Evaluate(t)) - var replace []string + replace := make([]string, 0, len(src)) for i, s := range src { d := "" if i < len(dst) { @@ -507,20 +523,25 @@ func notFunc(q query, t iterator) interface{} { // concat( string1 , string2 [, stringn]* ) func concatFunc(args ...query) func(query, iterator) interface{} { return func(q query, t iterator) interface{} { - var a []string + b := builderPool.Get().(*strings.Builder) for _, v := range args { v = functionArgs(v) + switch v := v.Evaluate(t).(type) { case string: - a = append(a, v) + b.WriteString(v) case query: node := v.Select(t) if node != nil { - a = append(a, node.Value()) + b.WriteString(node.Value()) } } } - return strings.Join(a, "") + result := b.String() + b.Reset() + builderPool.Put(b) + + return result } } From 44e7a6bdb3fbdbc713c80425f8b73ee790570308 Mon Sep 17 00:00:00 2001 From: Dmitrii Laptev Date: Sat, 13 Jun 2020 15:35:20 +0300 Subject: [PATCH 3/3] avoid nil pointer panic (thread save) --- func.go | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/func.go b/func.go index 3873e33..98db568 100644 --- a/func.go +++ b/func.go @@ -58,6 +58,7 @@ func lastFunc(q query, t iterator) interface{} { // countFunc is a XPath Node Set functions count(node-set). func countFunc(q query, t iterator) interface{} { var count = 0 + q = functionArgs(q) test := predicate(q) switch typ := q.Evaluate(t).(type) { case query: @@ -73,7 +74,7 @@ func countFunc(q query, t iterator) interface{} { // sumFunc is a XPath Node Set functions sum(node-set). func sumFunc(q query, t iterator) interface{} { var sum float64 - switch typ := q.Evaluate(t).(type) { + switch typ := functionArgs(q).Evaluate(t).(type) { case query: for node := typ.Select(t); node != nil; node = typ.Select(t) { if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { @@ -116,19 +117,19 @@ func asNumber(t iterator, o interface{}) float64 { // ceilingFunc is a XPath Node Set functions ceiling(node-set). func ceilingFunc(q query, t iterator) interface{} { - val := asNumber(t, q.Evaluate(t)) + val := asNumber(t, functionArgs(q).Evaluate(t)) return math.Ceil(val) } // floorFunc is a XPath Node Set functions floor(node-set). func floorFunc(q query, t iterator) interface{} { - val := asNumber(t, q.Evaluate(t)) + val := asNumber(t, functionArgs(q).Evaluate(t)) return math.Floor(val) } // roundFunc is a XPath Node Set functions round(node-set). func roundFunc(q query, t iterator) interface{} { - val := asNumber(t, q.Evaluate(t)) + val := asNumber(t, functionArgs(q).Evaluate(t)) //return math.Round(val) return round(val) } @@ -239,19 +240,19 @@ func asString(t iterator, v interface{}) string { // booleanFunc is a XPath functions boolean([node-set]). func booleanFunc(q query, t iterator) interface{} { - v := q.Evaluate(t) + v := functionArgs(q).Evaluate(t) return asBool(t, v) } // numberFunc is a XPath functions number([node-set]). func numberFunc(q query, t iterator) interface{} { - v := q.Evaluate(t) + v := functionArgs(q).Evaluate(t) return asNumber(t, v) } // stringFunc is a XPath functions string([node-set]). func stringFunc(q query, t iterator) interface{} { - v := q.Evaluate(t) + v := functionArgs(q).Evaluate(t) return asString(t, v) } @@ -346,7 +347,7 @@ var ( // normalizespaceFunc is XPath functions normalize-space(string?) func normalizespaceFunc(q query, t iterator) interface{} { var m string - switch typ := q.Evaluate(t).(type) { + switch typ := functionArgs(q).Evaluate(t).(type) { case string: m = typ case query: @@ -491,7 +492,7 @@ func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { // notFunc is XPATH functions not(expression) function operation. func notFunc(q query, t iterator) interface{} { - switch v := q.Evaluate(t).(type) { + switch v := functionArgs(q).Evaluate(t).(type) { case bool: return !v case query: