Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support protobuf struct type #206

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions arrow/util/messages/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

syntax = "proto3";
import "google/protobuf/any.proto";
import "google/protobuf/struct.proto";

option go_package = "../util_message";

Expand Down Expand Up @@ -48,6 +49,7 @@ message AllTheTypes {
map<string, ExampleMessage> complex_map = 20;
repeated string simple_list = 21;
repeated ExampleMessage complex_list = 22;
google.protobuf.Struct jsonlike_field = 23;

enum ExampleEnum {
OPTION_0 = 0;
Expand Down
18 changes: 18 additions & 0 deletions arrow/util/protobuf_reflect.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package util

import (
"fmt"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/types/known/structpb"
Comment on lines +21 to +22
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you shift these includes to be grouped with the others of the same prefix below?

"reflect"

"github.com/apache/arrow-go/v18/arrow"
Expand Down Expand Up @@ -118,6 +120,9 @@ func (pfr *ProtobufFieldReflection) arrowType() arrow.Type {
return arrow.DICTIONARY
}
}
if pfr.isJson() {
return arrow.STRING
}
if pfr.isStruct() {
return arrow.STRUCT
}
Expand Down Expand Up @@ -170,6 +175,13 @@ func (pfr *ProtobufFieldReflection) isEnum() bool {
return pfr.descriptor.Kind() == protoreflect.EnumKind
}

func (pfr *ProtobufFieldReflection) isJson() bool {
if pfr.descriptor.Kind() != protoreflect.MessageKind {
return false
}
return pfr.descriptor.Message().FullName() == "google.protobuf.Struct"
}

func (pfr *ProtobufFieldReflection) isStruct() bool {
return pfr.descriptor.Kind() == protoreflect.MessageKind && !pfr.descriptor.IsMap() && !pfr.isList()
}
Expand Down Expand Up @@ -603,6 +615,7 @@ type protobufReflection interface {
GetDescriptor() protoreflect.FieldDescriptor
isNull() bool
isEnum() bool
isJson() bool
asDictionary() protobufDictReflection
isList() bool
asList() protobufListReflection
Expand Down Expand Up @@ -754,6 +767,11 @@ func (f ProtobufMessageFieldReflection) AppendValueOrNull(b array.Builder, mem m
case arrow.STRING:
if f.protobufReflection.isEnum() {
b.(*array.StringBuilder).Append(string(fd.Enum().Values().ByNumber(pv.Enum()).Name()))
} else if f.protobufReflection.isJson() {
valueAsStructPb := f.reflectValue().Interface().(structpb.Struct)
jsonData, _ := protojson.Marshal(&valueAsStructPb)

b.(*array.StringBuilder).Append(string(jsonData))
} else {
b.(*array.StringBuilder).Append(pv.String())
}
Expand Down
68 changes: 38 additions & 30 deletions arrow/util/protobuf_reflect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package util
import (
"encoding/json"
"fmt"
"google.golang.org/protobuf/types/known/structpb"
Comment on lines 21 to +22
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above, please place this with the sorted imports below

"testing"

"github.com/apache/arrow-go/v18/arrow"
Expand All @@ -43,28 +44,31 @@ type J map[string]any
func AllTheTypesFixture() Fixture {
e := J{"field1": "Example"}

s, _ := structpb.NewStruct(e)

m := J{
"str": "Hello",
"int32": 10,
"int64": 100,
"sint32": -10,
"sin64": -100,
"uint32": 10,
"uint64": 100,
"fixed32": 10,
"fixed64": 1000,
"sfixed32": 10,
"bool": false,
"bytes": "SGVsbG8sIHdvcmxkIQ==",
"double": 1.1,
"enum": "OPTION_1",
"message": e,
"oneof": []any{0, "World"},
"any": J{"field1": "Example"},
"simple_map": []J{{"key": 99, "value": "Hello"}},
"complex_map": []J{{"key": "complex", "value": e}},
"simple_list": []any{"Hello", "World"},
"complex_list": []J{e},
"str": "Hello",
"int32": 10,
"int64": 100,
"sint32": -10,
"sin64": -100,
"uint32": 10,
"uint64": 100,
"fixed32": 10,
"fixed64": 1000,
"sfixed32": 10,
"bool": false,
"bytes": "SGVsbG8sIHdvcmxkIQ==",
"double": 1.1,
"enum": "OPTION_1",
"message": e,
"oneof": []any{0, "World"},
"any": J{"field1": "Example"},
"simple_map": []J{{"key": 99, "value": "Hello"}},
"complex_map": []J{{"key": "complex", "value": e}},
"simple_list": []any{"Hello", "World"},
"complex_list": []J{e},
"jsonlike_field": "{\"field1\":\"Example\"}",
}
jm, err := json.Marshal(m)
if err != nil {
Expand Down Expand Up @@ -97,14 +101,15 @@ func AllTheTypesFixture() Fixture {
Any: anyMsg,
//Breaks the test as the Golang maps have a non-deterministic order
//SimpleMap: map[int32]string{99: "Hello", 100: "World", 98: "How", 101: "Are", 1: "You"},
SimpleMap: map[int32]string{99: "Hello"},
ComplexMap: map[string]*util_message.ExampleMessage{"complex": &exampleMsg},
SimpleList: []string{"Hello", "World"},
ComplexList: []*util_message.ExampleMessage{&exampleMsg},
SimpleMap: map[int32]string{99: "Hello"},
ComplexMap: map[string]*util_message.ExampleMessage{"complex": &exampleMsg},
SimpleList: []string{"Hello", "World"},
ComplexList: []*util_message.ExampleMessage{&exampleMsg},
JsonlikeField: s,
tscottcoombes1 marked this conversation as resolved.
Show resolved Hide resolved
}

schema := `schema:
fields: 22
fields: 23
- str: type=utf8, nullable
- int32: type=int32, nullable
- int64: type=int64, nullable
Expand All @@ -126,7 +131,8 @@ func AllTheTypesFixture() Fixture {
- simple_map: type=map<int32, utf8, items_nullable>, nullable
- complex_map: type=map<utf8, struct<field1: utf8>, items_nullable>, nullable
- simple_list: type=list<item: utf8, nullable>, nullable
- complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable`
- complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable
- jsonlike_field: type=utf8, nullable`

return Fixture{
msg: &msg,
Expand Down Expand Up @@ -240,7 +246,7 @@ func TestGetSchema(t *testing.T) {

pmr = NewProtobufMessageReflection(f.msg, WithOneOfHandler(OneOfDenseUnion))
want := `schema:
fields: 21
fields: 22
- str: type=utf8, nullable
- int32: type=int32, nullable
- int64: type=int64, nullable
Expand All @@ -261,7 +267,8 @@ func TestGetSchema(t *testing.T) {
- simple_map: type=map<int32, utf8, items_nullable>, nullable
- complex_map: type=map<utf8, struct<field1: utf8>, items_nullable>, nullable
- simple_list: type=list<item: utf8, nullable>, nullable
- complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable`
- complex_list: type=list<item: struct<field1: utf8>, nullable>, nullable
- jsonlike_field: type=utf8, nullable`
CheckSchema(t, pmr, want)

excludeComplex := func(pfr *ProtobufFieldReflection) bool {
Expand Down Expand Up @@ -376,7 +383,8 @@ func TestNullRecordFromProtobuf(t *testing.T) {
"simple_map":[],
"complex_map":[],
"simple_list":[],
"complex_list":[]
"complex_list":[],
"jsonlike_field":null
}]`)
}

Expand Down
Loading
Loading