Skip to content

Commit

Permalink
[Go] Fixes record builder support for not-nullable fixed-size lists
Browse files Browse the repository at this point in the history
Prior to this commit, the builder constructor for fixed-length lists
would lose information as to whether the list elements were marked
not-nullable, which meant the data type available from the builder would
always reflect "nullable". When NewRecord was invoked, this type would
get checked against the type present in the original schema. If the
schema requested a not-nullable fixed-size array, this check would
always fail and cause a panic.

This commit introduces an alternative builder constructor
"NewFixedSizeListBuilderWithField" for fixed-size lists that takes the
entire field context, similar to what already exists for lists and large
lists.
  • Loading branch information
wkalt committed Jun 23, 2024
1 parent c27c710 commit bbb802e
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 13 deletions.
2 changes: 1 addition & 1 deletion go/arrow/array/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
return bldr
case arrow.FIXED_SIZE_LIST:
typ := dtype.(*arrow.FixedSizeListType)
return NewFixedSizeListBuilder(mem, typ.Len(), typ.Elem())
return NewFixedSizeListBuilderWithField(mem, typ.Len(), typ.ElemField())
case arrow.DURATION:
typ := dtype.(*arrow.DurationType)
return NewDurationBuilder(mem, typ)
Expand Down
39 changes: 27 additions & 12 deletions go/arrow/array/fixed_size_list.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,25 +162,36 @@ func (a *FixedSizeList) MarshalJSON() ([]byte, error) {
}

type FixedSizeListBuilder struct {
builder

etype arrow.DataType // data type of the list's elements.
n int32 // number of elements in the fixed-size list.
values Builder // value builder for the list's elements.
baseListBuilder
n int32
}

// NewFixedSizeListBuilder returns a builder, using the provided memory allocator.
// The created list builder will create a list whose elements will be of type etype.
func NewFixedSizeListBuilder(mem memory.Allocator, n int32, etype arrow.DataType) *FixedSizeListBuilder {
return &FixedSizeListBuilder{
builder: builder{refCount: 1, mem: mem},
etype: etype,
n: n,
values: NewBuilder(mem, etype),
baseListBuilder{
builder: builder{refCount: 1, mem: mem},
values: NewBuilder(mem, etype),
dt: arrow.FixedSizeListOf(n, etype),
},
n,
}
}

func (b *FixedSizeListBuilder) Type() arrow.DataType { return arrow.FixedSizeListOf(b.n, b.etype) }
// NewFixedSizeListeBuilderWithField returns a builder similarly to
// NewFixedSizeListBuilder, but it accepts a child rather than just a datatype
// to ensure nullability context is preserved.
func NewFixedSizeListBuilderWithField(mem memory.Allocator, n int32, field arrow.Field) *FixedSizeListBuilder {
return &FixedSizeListBuilder{
baseListBuilder{
builder: builder{refCount: 1, mem: mem},
values: NewBuilder(mem, field.Type),
dt: arrow.FixedSizeListOfField(n, field),
},
n,
}
}

// Release decreases the reference count by 1.
// When the reference count goes to zero, the memory is freed.
Expand Down Expand Up @@ -228,6 +239,10 @@ func (b *FixedSizeListBuilder) AppendEmptyValue() {
}
}

func (b *FixedSizeListBuilder) Type() arrow.DataType {
return b.dt
}

func (b *FixedSizeListBuilder) AppendEmptyValues(n int) {
for i := 0; i < n; i++ {
b.AppendEmptyValue()
Expand Down Expand Up @@ -296,7 +311,7 @@ func (b *FixedSizeListBuilder) newData() (data *Data) {
defer values.Release()

data = NewData(
arrow.FixedSizeListOf(b.n, b.etype), b.length,
b.dt, b.length,
[]*memory.Buffer{b.nullBitmap},
[]arrow.ArrayData{values.Data()},
b.nulls,
Expand Down Expand Up @@ -336,7 +351,7 @@ func (b *FixedSizeListBuilder) UnmarshalOne(dec *json.Decoder) error {
default:
return &json.UnmarshalTypeError{
Value: fmt.Sprint(t),
Struct: arrow.FixedSizeListOf(b.n, b.etype).String(),
Struct: b.dt.String(),
}
}

Expand Down
39 changes: 39 additions & 0 deletions go/arrow/array/record_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,45 @@ func TestRecordReader(t *testing.T) {
}
}

func TestRecordBuilderRespectsFixedSizeArrayNullability(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)

cases := []struct {
assertion string
fields []arrow.Field
}{
{
"nullable",
[]arrow.Field{{Name: "data", Type: arrow.FixedSizeListOf(1, arrow.PrimitiveTypes.Int32)}},
},
{
"not nullable",
[]arrow.Field{{Name: "data", Type: arrow.FixedSizeListOfNonNullable(1, arrow.PrimitiveTypes.Int32)}},
},
}
for _, c := range cases {
t.Run(c.assertion, func(t *testing.T) {
schema := arrow.NewSchema(c.fields, nil)
b := array.NewRecordBuilder(mem, schema)
defer b.Release()

lb := b.Field(0).(*array.FixedSizeListBuilder)
lb.Append(true)

vb := lb.ValueBuilder().(*array.Int32Builder)
vb.Append(10)

rec := b.NewRecord()
defer rec.Release()

if got, want := rec.Column(0).String(), "[[10]]"; got != want {
t.Fatalf("invalid record: got=%q, want=%q", got, want)
}
})
}
}

func TestRecordBuilder(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)
Expand Down

0 comments on commit bbb802e

Please sign in to comment.