Skip to content

Commit

Permalink
apacheGH-43168: Add buffer and array builders for Struct type
Browse files Browse the repository at this point in the history
  • Loading branch information
abandy committed Jul 9, 2024
1 parent 8fc40fc commit 4ba8d8c
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 13 deletions.
5 changes: 3 additions & 2 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ public class ArrowArray<T>: AsString, AnyArray {
self.arrowData = arrowData
}

public func initialize() throws {}

public func isNull(_ at: UInt) throws -> Bool {
if at >= self.length {
throw ArrowError.outOfBounds(index: Int64(at))
Expand Down Expand Up @@ -281,14 +283,13 @@ public class StructArray: ArrowArray<[Any?]> {
super.init(arrowData)
}

public func initialize() throws -> StructArray {
public override func initialize() throws {
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}

self.arrowFields = fields
return self
}

public override subscript(_ index: UInt) -> [Any?]? {
Expand Down
120 changes: 119 additions & 1 deletion swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>
public func finish() throws -> ArrowArray<T.ItemType> {
let buffers = self.bufferBuilder.finish()
let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount)
return U(arrowData)
let array = U(arrowData)
try array.initialize()
return array
}

public func getStride() -> Int {
Expand Down Expand Up @@ -118,6 +120,56 @@ public class Time64ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time64>, T
}
}

public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}

public init(_ fields: [ArrowField]) throws {
self.fields = fields
var builders = [any ArrowArrayHolderBuilder]()
for field in fields {
builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type))
}

self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
}

public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let anyValues = values {
for index in 0..<builders.count {
self.builders[index].appendAny(anyValues[index])
}
} else {
for index in 0..<builders.count {
self.builders[index].appendAny(nil)
}
}
}

public override func finish() throws -> StructArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
childData.append(try builder.toHolder().array.arrowData)
}

let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount: self.nullCount,
length: self.length)
let structArray = StructArray(arrowData)
try structArray.initialize()
return structArray
}
}

public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
Expand Down Expand Up @@ -168,6 +220,72 @@ public class ArrowArrayBuilders {
type == Float.self || type == Date.self
}

public static func loadStructArrayBuilderForType<T>(_ obj: T) throws -> StructArrayBuilder {
let mirror = Mirror(reflecting: obj)
var builders = [ArrowArrayHolderBuilder]()
var fields = [ArrowField]()
for (property, value) in mirror.children {
guard let propertyName = property else {
continue
}

let builderType = type(of: value)
let arrowType = ArrowType(ArrowType.infoForType(builderType))
fields.append(ArrowField(propertyName, type: arrowType, isNullable: true))
builders.append(try loadBuilder(arrowType: arrowType))
}

return try StructArrayBuilder(fields, builders: builders)
}

public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
arrowType: ArrowType) throws -> ArrowArrayHolderBuilder {
switch arrowType.id {
case .uint8:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt8>
case .uint16:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt16>
case .uint32:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt32>
case .uint64:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt64>
case .int8:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int8>
case .int16:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int16>
case .int32:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int32>
case .int64:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int64>
case .double:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Double>
case .float:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Float>
case .string:
return try StringArrayBuilder()
case .boolean:
return try BoolArrayBuilder()
case .binary:
return try BinaryArrayBuilder()
case .date32:
return try Date32ArrayBuilder()
case .date64:
return try Date64ArrayBuilder()
case .time32:
guard let timeType = arrowType as? ArrowTypeTime32 else {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try Time32ArrayBuilder(timeType.unit)
case .time64:
guard let timeType = arrowType as? ArrowTypeTime64 else {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try Time64ArrayBuilder(timeType.unit)
default:
throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
}
}

public static func loadNumberArrayBuilder<T>() throws -> NumberArrayBuilder<T> {
let type = T.self
if type == Int8.self {
Expand Down
72 changes: 62 additions & 10 deletions swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder {
func finish() -> [ArrowBuffer]
}

public class BaseBufferBuilder<T> {
var values: ArrowBuffer
public class BaseBufferBuilder {
var nulls: ArrowBuffer
var stride: Int
public var offset: UInt = 0
public var capacity: UInt {return self.values.capacity}
public var capacity: UInt {return self.nulls.capacity}
public var length: UInt = 0
public var nullCount: UInt = 0

init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
self.stride = stride
self.values = values
init(_ nulls: ArrowBuffer) {
self.nulls = nulls
}

Expand All @@ -61,7 +57,19 @@ public class BaseBufferBuilder<T> {
}
}

public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
public class ValuesBufferBuilder<T>: BaseBufferBuilder {
var values: ArrowBuffer
var stride: Int
public override var capacity: UInt {return self.values.capacity}

init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
self.stride = stride
self.values = values
super.init(nulls)
}
}

public class FixedBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
public typealias ItemType = T
private let defaultVal: ItemType
public required init() throws {
Expand Down Expand Up @@ -138,7 +146,7 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
}
}

public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
public class BoolBufferBuilder: ValuesBufferBuilder<Bool>, ArrowBufferBuilder {
public typealias ItemType = Bool
public required init() throws {
let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
Expand Down Expand Up @@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
}
}

public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
public class VariableBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
public typealias ItemType = T
var offsets: ArrowBuffer
let binaryStride = MemoryLayout<UInt8>.stride
Expand Down Expand Up @@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder<Date, Int64> {
}
}
}

public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var info: ArrowNestedType?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}

public func initializeTypeInfo(_ fields: [ArrowField]) {
info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
if length > self.nulls.length {
self.resize(length)
}

if newValue != nil {
BitUtility.setBit(index + self.offset, buffer: self.nulls)
} else {
self.nullCount += 1
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
}
}

public func resize(_ length: UInt) {
if length > self.nulls.length {
let resizeLength = resizeLength(self.nulls)
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
self.nulls = nulls
}
}

public func finish() -> [ArrowBuffer] {
let length = self.length
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
return [nulls]
}
}
61 changes: 61 additions & 0 deletions swift/Arrow/Tests/ArrowTests/ArrayTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,67 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length
XCTAssertEqual(microArray[2], 987654321)
}

func testStructArray() throws { // swiftlint:disable:this function_body_length
class StructTest {
var fieldBool: Bool = false
var fieldInt8: Int8 = 0
var fieldInt16: Int16 = 0
var fieldInt32: Int32 = 0
var fieldInt64: Int64 = 0
var fieldUInt8: UInt8 = 0
var fieldUInt16: UInt16 = 0
var fieldUInt32: UInt32 = 0
var fieldUInt64: UInt64 = 0
var fieldDouble: Double = 0
var fieldFloat: Float = 0
var fieldString: String = ""
var fieldData = Data()
var fieldDate: Date = Date.now
}

enum STIndex: Int {
case bool, int8, int16, int32, int64
case uint8, uint16, uint32, uint64, double
case float, string, data, date
}

let testData = StructTest()
let dateNow = Date.now
let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType(testData)
structBuilder.append([true, Int8(1), Int16(2), Int32(3), Int64(4),
UInt8(5), UInt16(6), UInt32(7), UInt64(8), Double(9.9),
Float(10.10), "11", Data("12".utf8), dateNow])
structBuilder.append(nil)
structBuilder.append([true, Int8(13), Int16(14), Int32(15), Int64(16),
UInt8(17), UInt16(18), UInt32(19), UInt64(20), Double(21.21),
Float(22.22), "23", Data("24".utf8), dateNow])
XCTAssertEqual(structBuilder.length, 3)
let structArray = try structBuilder.finish()
XCTAssertEqual(structArray.length, 3)
XCTAssertNil(structArray[1])
XCTAssertEqual(structArray.arrowFields![0].length, 3)
XCTAssertNil(structArray.arrowFields![0].array.asAny(1))
XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true)
XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1)
XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2)
XCTAssertEqual(structArray[0]![STIndex.int32.rawValue] as? Int32, 3)
XCTAssertEqual(structArray[0]![STIndex.int64.rawValue] as? Int64, 4)
XCTAssertEqual(structArray[0]![STIndex.uint8.rawValue] as? UInt8, 5)
XCTAssertEqual(structArray[0]![STIndex.uint16.rawValue] as? UInt16, 6)
XCTAssertEqual(structArray[0]![STIndex.uint32.rawValue] as? UInt32, 7)
XCTAssertEqual(structArray[0]![STIndex.uint64.rawValue] as? UInt64, 8)
XCTAssertEqual(structArray[0]![STIndex.double.rawValue] as? Double, 9.9)
XCTAssertEqual(structArray[0]![STIndex.float.rawValue] as? Float, 10.10)
XCTAssertEqual(structArray[2]![STIndex.string.rawValue] as? String, "23")
XCTAssertEqual(
String(decoding: (structArray[0]![STIndex.data.rawValue] as? Data)!, as: UTF8.self), "12")
let dateFormatter = DateFormatter()
dateFormatter.timeStyle = .full
XCTAssertTrue(
dateFormatter.string(from: (structArray[0]![STIndex.date.rawValue] as? Date)!) ==
dateFormatter.string(from: dateNow))
}

func checkHolderForType(_ checkType: ArrowType) throws {
let buffers = [ArrowBuffer(length: 0, capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)),
Expand Down

0 comments on commit 4ba8d8c

Please sign in to comment.