Skip to content

Commit

Permalink
Tiny Weights (vitessio#14402)
Browse files Browse the repository at this point in the history
Signed-off-by: Vicent Marti <[email protected]>
  • Loading branch information
vmg authored Nov 7, 2023
1 parent 3aead9e commit a15ef42
Show file tree
Hide file tree
Showing 33 changed files with 939 additions and 615 deletions.
7 changes: 5 additions & 2 deletions go/hack/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,11 @@ func RuntimeAllocSize(size int64) int64 {
return int64(roundupsize(uintptr(size)))
}

//go:linkname ParseFloatPrefix strconv.parseFloatPrefix
func ParseFloatPrefix(s string, bitSize int) (float64, int, error)
//go:linkname Atof64 strconv.atof64
func Atof64(s string) (float64, int, error)

//go:linkname Atof32 strconv.atof32
func Atof32(s string) (float32, int, error)

//go:linkname FastRand runtime.fastrand
func FastRand() uint32
18 changes: 18 additions & 0 deletions go/mysql/collations/colldata/8bit.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
package colldata

import (
"encoding/binary"

"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/mysql/collations/charset"
"vitess.io/vitess/go/vt/vthash"
Expand Down Expand Up @@ -168,6 +170,16 @@ func (c *Collation_8bit_simple_ci) Collate(left, right []byte, rightIsPrefix boo
return len(left) - len(right)
}

func (c *Collation_8bit_simple_ci) TinyWeightString(src []byte) uint32 {
var w32 [4]byte
sortOrder := c.sort
sortLen := min(4, len(src))
for i := 0; i < sortLen; i++ {
w32[i] = sortOrder[src[i]]
}
return binary.BigEndian.Uint32(w32[:4])
}

func (c *Collation_8bit_simple_ci) WeightString(dst, src []byte, numCodepoints int) []byte {
padToMax := false
sortOrder := c.sort
Expand Down Expand Up @@ -272,6 +284,12 @@ func (c *Collation_binary) Collate(left, right []byte, isPrefix bool) int {
return collationBinary(left, right, isPrefix)
}

func (c *Collation_binary) TinyWeightString(src []byte) uint32 {
var w32 [4]byte
copy(w32[:4], src)
return binary.BigEndian.Uint32(w32[:4])
}

func (c *Collation_binary) WeightString(dst, src []byte, numCodepoints int) []byte {
padToMax := false
copyCodepoints := len(src)
Expand Down
8 changes: 8 additions & 0 deletions go/mysql/collations/colldata/collation.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,14 @@ type CaseAwareCollation interface {
ToLower(dst []byte, src []byte) []byte
}

// TinyWeightCollation implements the TinyWeightString API for collations.
type TinyWeightCollation interface {
Collation
// TinyWeightString returns a 32-bit weight string for a source string based on this collation.
// This is usually the 4-byte prefix of the full weight string, calculated more efficiently.
TinyWeightString(src []byte) uint32
}

func Lookup(id collations.ID) Collation {
if int(id) >= len(collationsById) {
return nil
Expand Down
23 changes: 23 additions & 0 deletions go/mysql/collations/colldata/uca.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package colldata

import (
"bytes"
"encoding/binary"
"math/bits"

"vitess.io/vitess/go/mysql/collations"
Expand Down Expand Up @@ -119,6 +120,28 @@ nextLevel:
return int(l) - int(r)
}

func (c *Collation_utf8mb4_uca_0900) TinyWeightString(src []byte) uint32 {
it := c.uca.Iterator(src)
defer it.Done()

if fast, ok := it.(*uca.FastIterator900); ok {
var chunk [16]byte
fast.NextWeightBlock64(chunk[:16])
return binary.BigEndian.Uint32(chunk[:4])
}

var w32 uint32
w, ok := it.Next()
if ok {
w32 = uint32(w) << 16
w, ok = it.Next()
if ok {
w32 |= uint32(w)
}
}
return w32
}

func (c *Collation_utf8mb4_uca_0900) WeightString(dst, src []byte, numCodepoints int) []byte {
it := c.uca.Iterator(src)
defer it.Done()
Expand Down
56 changes: 56 additions & 0 deletions go/mysql/collations/colldata/uca_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,62 @@ func TestCompareWithWeightString(t *testing.T) {
}
}

func TestTinyWeightStrings(t *testing.T) {
var Collations = []Collation{
testcollation(t, "utf8mb4_0900_as_cs"),
testcollation(t, "utf8mb4_0900_as_ci"),
testcollation(t, "utf8mb4_0900_ai_ci"),
}

var Strings = []string{
"a", "A", "aa", "AA", "aaa", "AAA", "aaaa", "AAAA",
"b", "B", "BB", "BB", "bbb", "BBB", "bbbb", "BBBB",
"Abc", "aBC",
"ǍḄÇ", "ÁḆĈ",
"\uA73A", "\uA738",
"\uAC00", "\u326E",
ExampleString,
ExampleStringLong,
JapaneseString,
WhitespaceString,
HungarianString,
JapaneseString2,
ChineseString,
ChineseString2,
SpanishString,
EnglishString,
}

for _, coll := range Collations {
tw := coll.(TinyWeightCollation)

for _, a := range Strings {
aw := tw.TinyWeightString([]byte(a))

for _, b := range Strings {
bw := tw.TinyWeightString([]byte(b))
cmp := tw.Collate([]byte(a), []byte(b), false)

switch {
case cmp == 0:
if aw != bw {
t.Errorf("[%s] %q vs %q: should be equal, got %08x / %08x", coll.Name(), a, b, aw, bw)
}
case cmp < 0:
if aw > bw {
t.Errorf("[%s] %q vs %q: should be <=, got %08x / %08x", coll.Name(), a, b, aw, bw)
}
case cmp > 0:
if aw < bw {
t.Errorf("[%s] %q vs %q: should be >= got %08x / %08x", coll.Name(), a, b, aw, bw)
}
}
}
}
}

}

func TestFastIterators(t *testing.T) {
allASCIICharacters := make([]byte, 128)
for n := range allASCIICharacters {
Expand Down
2 changes: 1 addition & 1 deletion go/mysql/fastparse/fastparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ func ParseFloat64(s string) (float64, error) {
// We only care to parse as many of the initial float characters of the
// string as possible. This functionality is implemented in the `strconv` package
// of the standard library, but not exposed, so we hook into it.
val, l, err := hack.ParseFloatPrefix(s[ws:], 64)
val, l, err := hack.Atof64(s[ws:])
for l < len(s[ws:]) {
if !isSpace(s[ws+uint(l)]) {
break
Expand Down
4 changes: 2 additions & 2 deletions go/sqltypes/bind_variables.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func TupleToProto(v []Value) *querypb.Value {

// ValueToProto converts Value to a *querypb.Value.
func ValueToProto(v Value) *querypb.Value {
return &querypb.Value{Type: v.typ, Value: v.val}
return &querypb.Value{Type: v.Type(), Value: v.val}
}

// ProtoToValue converts a *querypb.Value to a Value.
Expand Down Expand Up @@ -143,7 +143,7 @@ func BytesBindVariable(v []byte) *querypb.BindVariable {

// ValueBindVariable converts a Value to a bind var.
func ValueBindVariable(v Value) *querypb.BindVariable {
return &querypb.BindVariable{Type: v.typ, Value: v.val}
return &querypb.BindVariable{Type: v.Type(), Value: v.val}
}

// BuildBindVariable builds a *querypb.BindVariable from a valid input type.
Expand Down
10 changes: 8 additions & 2 deletions go/sqltypes/parse_rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package sqltypes
import (
"fmt"
"io"
"reflect"
"slices"
"strconv"
"strings"
"text/scanner"
Expand Down Expand Up @@ -127,6 +127,12 @@ func (e *RowMismatchError) Error() string {
return fmt.Sprintf("results differ: %v\n\twant: %v\n\tgot: %v", e.err, e.want, e.got)
}

func RowEqual(want, got Row) bool {
return slices.EqualFunc(want, got, func(a, b Value) bool {
return a.Equal(b)
})
}

func RowsEquals(want, got []Row) error {
if len(want) != len(got) {
return &RowMismatchError{
Expand All @@ -143,7 +149,7 @@ func RowsEquals(want, got []Row) error {
if matched[i] {
continue
}
if reflect.DeepEqual(aa, bb) {
if RowEqual(aa, bb) {
matched[i] = true
ok = true
break
Expand Down
10 changes: 6 additions & 4 deletions go/sqltypes/result.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package sqltypes
import (
"crypto/sha256"
"fmt"
"reflect"
"slices"

"google.golang.org/protobuf/proto"

Expand Down Expand Up @@ -69,8 +69,8 @@ func (result *Result) Repair(fields []*querypb.Field) {
// Usage of j is intentional.
for j, f := range fields {
for _, r := range result.Rows {
if r[j].typ != Null {
r[j].typ = f.Type
if r[j].Type() != Null {
r[j].typ = uint16(f.Type)
}
}
}
Expand Down Expand Up @@ -198,7 +198,9 @@ func (result *Result) Equal(other *Result) bool {
return FieldsEqual(result.Fields, other.Fields) &&
result.RowsAffected == other.RowsAffected &&
result.InsertID == other.InsertID &&
reflect.DeepEqual(result.Rows, other.Rows)
slices.EqualFunc(result.Rows, other.Rows, func(a, b Row) bool {
return RowEqual(a, b)
})
}

// ResultsEqual compares two arrays of Result.
Expand Down
2 changes: 1 addition & 1 deletion go/sqltypes/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func TestValue(typ querypb.Type, val string) Value {
// This function should only be used for testing.
func TestTuple(vals ...Value) Value {
return Value{
typ: Tuple,
typ: uint16(Tuple),
val: encodeTuple(vals),
}
}
Expand Down
Loading

0 comments on commit a15ef42

Please sign in to comment.