diff --git a/swift/Arrow/Sources/Arrow/ArrowBuffer.swift b/swift/Arrow/Sources/Arrow/ArrowBuffer.swift index 4ac4eb93c91db..1be6ba205af34 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBuffer.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBuffer.swift @@ -39,6 +39,13 @@ public class ArrowBuffer { data.append(ptr, count: Int(capacity)) } + static func createEmptyBuffer() -> ArrowBuffer { + return ArrowBuffer( + length: 0, + capacity: 0, + rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)) + } + static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer { let byteCount = UInt(data.count) let capacity = alignTo64(byteCount) @@ -48,14 +55,10 @@ public class ArrowBuffer { return ArrowBuffer(length: length, capacity: capacity, rawPointer: rawPointer) } - static func createBuffer(_ length: UInt, size: UInt, doAlign: Bool = true) -> ArrowBuffer { + static func createBuffer(_ length: UInt, size: UInt) -> ArrowBuffer { let actualLen = max(length, ArrowBuffer.minLength) let byteCount = size * actualLen - var capacity = byteCount - if doAlign { - capacity = alignTo64(byteCount) - } - + let capacity = alignTo64(byteCount) let memory = MemoryAllocator(64) let rawPointer = memory.allocateArray(Int(capacity)) rawPointer.initializeMemory(as: UInt8.self, repeating: 0, count: Int(capacity)) @@ -66,7 +69,11 @@ public class ArrowBuffer { to.rawPointer.copyMemory(from: from.rawPointer, byteCount: Int(len)) } - private static func alignTo64(_ length: UInt) -> UInt { + static func copyCurrent(_ from: ArrowBuffer, to: inout ArrowNullBuffer, len: UInt) { + to.rawPointer.copyMemory(from: from.rawPointer, byteCount: Int(len)) + } + + fileprivate static func alignTo64(_ length: UInt) -> UInt { let bufAlignment = length % 64 if bufAlignment != 0 { return length + (64 - bufAlignment) + 8 @@ -75,3 +82,30 @@ public class ArrowBuffer { return length + 8 } } + +public class ArrowNullBuffer: ArrowBuffer { + var nullCount: UInt + init(_ nullCount: UInt, length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) { + self.nullCount = nullCount + super.init(length: length, capacity: capacity, rawPointer: rawPointer) + } + + static func createBuffer(_ data: [UInt8], length: UInt, nullCount: UInt) -> ArrowNullBuffer { + let byteCount = UInt(data.count) + let capacity = alignTo64(byteCount) + let memory = MemoryAllocator(64) + let rawPointer = memory.allocateArray(Int(capacity)) + rawPointer.copyMemory(from: data, byteCount: data.count) + return ArrowNullBuffer(nullCount, length: length, capacity: capacity, rawPointer: rawPointer) + } + + static func createBuffer(_ length: UInt, size: UInt, nullCount: UInt) -> ArrowNullBuffer { + let actualLen = max(length, ArrowBuffer.minLength) + let byteCount = size * actualLen + let capacity = alignTo64(byteCount) + let memory = MemoryAllocator(64) + let rawPointer = memory.allocateArray(Int(capacity)) + rawPointer.initializeMemory(as: UInt8.self, repeating: 0, count: Int(capacity)) + return ArrowNullBuffer(nullCount, length: length, capacity: capacity, rawPointer: rawPointer) + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift index e4c8036c327d1..982cd8d12b3f8 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift @@ -32,14 +32,14 @@ public protocol ArrowBufferBuilder { public class BaseBufferBuilder { var values: ArrowBuffer - var nulls: ArrowBuffer + var nulls: ArrowNullBuffer var stride: Int public var offset: UInt = 0 public var capacity: UInt {return self.values.capacity} public var length: UInt = 0 - public var nullCount: UInt = 0 + public var nullCount: UInt {return self.nulls.nullCount} - init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { + init(values: ArrowBuffer, nulls: ArrowNullBuffer, stride: Int = MemoryLayout.stride) { self.stride = stride self.values = values self.nulls = nulls @@ -67,7 +67,7 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { public required init() throws { self.defaultVal = try FixedBufferBuilder.defaultValueForType() let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) - let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) + let nulls = ArrowNullBuffer.createBuffer(0, size: UInt(MemoryLayout.stride), nullCount: 0) super.init(values: values, nulls: nulls) } @@ -83,7 +83,7 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { BitUtility.setBit(index + self.offset, buffer: self.nulls) self.values.rawPointer.advanced(by: byteIndex).storeBytes(of: val, as: T.self) } else { - self.nullCount += 1 + self.nulls.nullCount += 1 BitUtility.clearBit(index + self.offset, buffer: self.nulls) self.values.rawPointer.advanced(by: byteIndex).storeBytes(of: defaultVal, as: T.self) } @@ -93,7 +93,8 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { if length > self.values.length { let resizeLength = resizeLength(self.values) var values = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout.size)) - var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size)) + var nulls = ArrowNullBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size), + nullCount: self.nullCount) ArrowBuffer.copyCurrent(self.values, to: &values, len: self.values.capacity) ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) self.values = values @@ -104,7 +105,8 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { public func finish() -> [ArrowBuffer] { let length = self.length var values = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout.size)) - var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size)) + var nulls = ArrowNullBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size), + nullCount: self.nullCount) ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity) ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) return [nulls, values] @@ -142,7 +144,8 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { public typealias ItemType = Bool public required init() throws { let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) - let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) + let nulls = ArrowNullBuffer.createBuffer(0, size: UInt(MemoryLayout.stride), + nullCount: 0) super.init(values: values, nulls: nulls) } @@ -162,7 +165,7 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } else { - self.nullCount += 1 + self.nulls.nullCount += 1 BitUtility.clearBit(index + self.offset, buffer: self.nulls) BitUtility.clearBit(index + self.offset, buffer: self.values) } @@ -172,7 +175,8 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { if (length/8) > self.values.length { let resizeLength = resizeLength(self.values) var values = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout.size)) - var nulls = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout.size)) + var nulls = ArrowNullBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout.size), + nullCount: nullCount) ArrowBuffer.copyCurrent(self.values, to: &values, len: self.values.capacity) ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) self.values = values @@ -183,7 +187,8 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { public func finish() -> [ArrowBuffer] { let length = self.length var values = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout.size)) - var nulls = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout.size)) + var nulls = ArrowNullBuffer.createBuffer(length, size: UInt(MemoryLayout.size), + nullCount: nullCount) ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity) ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) return [nulls, values] @@ -196,7 +201,7 @@ public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder let binaryStride = MemoryLayout.stride public required init() throws { let values = ArrowBuffer.createBuffer(0, size: UInt(binaryStride)) - let nulls = ArrowBuffer.createBuffer(0, size: UInt(binaryStride)) + let nulls = ArrowNullBuffer.createBuffer(0, size: UInt(binaryStride), nullCount: 0) self.offsets = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) super.init(values: values, nulls: nulls, stride: binaryStride) } @@ -229,7 +234,7 @@ public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder } if isNull { - self.nullCount += 1 + self.nulls.nullCount += 1 BitUtility.clearBit(index + self.offset, buffer: self.nulls) } else { BitUtility.setBit(index + self.offset, buffer: self.nulls) @@ -257,7 +262,8 @@ public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder public func resize(_ length: UInt) { if length > self.offsets.length { let resizeLength = resizeLength(self.offsets, len: length) - var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size)) + var nulls = ArrowNullBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size), + nullCount: self.nullCount) var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout.size)) ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity) @@ -269,7 +275,8 @@ public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder public func finish() -> [ArrowBuffer] { let length = self.length var values = ArrowBuffer.createBuffer(self.values.length, size: UInt(MemoryLayout.size)) - var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size)) + var nulls = ArrowNullBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size), + nullCount: self.nullCount) var offsets = ArrowBuffer.createBuffer(length, size: UInt(MemoryLayout.size)) ArrowBuffer.copyCurrent(self.values, to: &values, len: values.capacity) ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) diff --git a/swift/Arrow/Sources/Arrow/ArrowReader.swift b/swift/Arrow/Sources/Arrow/ArrowReader.swift index d9dc1bdb470e6..76dc8edb8cfbf 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReader.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReader.swift @@ -57,10 +57,12 @@ public class ArrowReader { private func loadPrimitiveData(_ loadInfo: DataLoadInfo) -> Result { do { let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)! + let nullLength = UInt(ceil(Double(node.length) / 8)) try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex) let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)! let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData, - length: UInt(node.nullCount), messageOffset: loadInfo.messageOffset) + length: nullLength, messageOffset: loadInfo.messageOffset, + nullCount: UInt(node.nullCount)) try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1) let valueBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)! let arrowValueBuffer = makeBuffer(valueBuffer, fileData: loadInfo.fileData, @@ -76,10 +78,12 @@ public class ArrowReader { private func loadVariableData(_ loadInfo: DataLoadInfo) -> Result { let node = loadInfo.recordBatch.nodes(at: loadInfo.nodeIndex)! do { + let nullLength = UInt(ceil(Double(node.length) / 8)) try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex) let nullBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex)! let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData, - length: UInt(node.nullCount), messageOffset: loadInfo.messageOffset) + length: nullLength, messageOffset: loadInfo.messageOffset, + nullCount: UInt(node.nullCount)) try validateBufferIndex(loadInfo.recordBatch, index: loadInfo.bufferIndex + 1) let offsetBuffer = loadInfo.recordBatch.buffers(at: loadInfo.bufferIndex + 1)! let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData, diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift index fa52160478f24..c0def8d2e7cbc 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -18,10 +18,18 @@ import FlatBuffers import Foundation +private func getNullCount(_ buffer: ArrowBuffer) -> UInt { + if let nullBuffer = buffer as? ArrowNullBuffer { + return nullBuffer.nullCount + } + + fatalError("null buffer expected but not found") +} + private func makeBinaryHolder(_ buffers: [ArrowBuffer]) -> Result { do { let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBinary), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(BinaryArray(arrowData))) } catch let error as ArrowError { return .failure(error) @@ -33,7 +41,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer]) -> Result Result { do { let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(StringArray(arrowData))) } catch let error as ArrowError { return .failure(error) @@ -42,31 +50,31 @@ private func makeStringHolder(_ buffers: [ArrowBuffer]) -> Result Result { - switch floatType.precision { - case .single: + switch floatType.id { + case .float: return makeFixedHolder(Float.self, buffers: buffers, arrowType: ArrowType.ArrowFloat) case .double: return makeFixedHolder(Double.self, buffers: buffers, arrowType: ArrowType.ArrowDouble) default: - return .failure(.unknownType("Float precision \(floatType.precision) currently not supported")) + return .failure(.unknownType("\(floatType) currently not supported")) } } -private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date, +private func makeDateHolder(_ dateType: ArrowType, buffers: [ArrowBuffer] ) -> Result { do { - if dateType.unit == .day { - let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + if dateType.id == .date32 { + let arrowData = try ArrowData(dateType, buffers: buffers, + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(Date32Array(arrowData))) } - let arrowData = try ArrowData(ArrowType(ArrowType.ArrowString), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + let arrowData = try ArrowData(dateType, buffers: buffers, + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(Date64Array(arrowData))) } catch let error as ArrowError { return .failure(error) @@ -75,20 +83,18 @@ private func makeDateHolder(_ dateType: org_apache_arrow_flatbuf_Date, } } -private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time, +private func makeTimeHolder(_ timeType: ArrowType, buffers: [ArrowBuffer] ) -> Result { do { - if timeType.unit == .second || timeType.unit == .millisecond { - let arrowUnit: ArrowTime32Unit = timeType.unit == .second ? .seconds : .milliseconds - let arrowData = try ArrowData(ArrowTypeTime32(arrowUnit), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + if timeType is ArrowTypeTime32 { + let arrowData = try ArrowData(timeType, buffers: buffers, + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(FixedArray(arrowData))) } - let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds - let arrowData = try ArrowData(ArrowTypeTime64(arrowUnit), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + let arrowData = try ArrowData(timeType, buffers: buffers, + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) @@ -100,7 +106,7 @@ private func makeTimeHolder(_ timeType: org_apache_arrow_flatbuf_Time, private func makeBoolHolder(_ buffers: [ArrowBuffer]) -> Result { do { let arrowData = try ArrowData(ArrowType(ArrowType.ArrowBool), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(BoolArray(arrowData))) } catch let error as ArrowError { return .failure(error) @@ -115,7 +121,7 @@ private func makeFixedHolder( ) -> Result { do { let arrowData = try ArrowData(ArrowType(arrowType), buffers: buffers, - nullCount: buffers[0].length, stride: MemoryLayout.stride) + nullCount: getNullCount(buffers[0]), stride: MemoryLayout.stride) return .success(ArrowArrayHolder(FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) @@ -125,65 +131,117 @@ private func makeFixedHolder( } func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity + arrowType: ArrowType, + buffers: [ArrowBuffer] +) -> Result { + switch arrowType.id { + case .int8: + return makeFixedHolder(Int8.self, buffers: buffers, arrowType: ArrowType.ArrowInt16) + case .int16: + return makeFixedHolder(Int16.self, buffers: buffers, arrowType: ArrowType.ArrowInt16) + case .int32: + return makeFixedHolder(Int32.self, buffers: buffers, arrowType: ArrowType.ArrowInt32) + case .int64: + return makeFixedHolder(Int64.self, buffers: buffers, arrowType: ArrowType.ArrowInt64) + case .uint8: + return makeFixedHolder(UInt8.self, buffers: buffers, arrowType: ArrowType.ArrowUInt8) + case .uint16: + return makeFixedHolder(UInt16.self, buffers: buffers, arrowType: ArrowType.ArrowUInt16) + case .uint32: + return makeFixedHolder(UInt32.self, buffers: buffers, arrowType: ArrowType.ArrowUInt32) + case .uint64: + return makeFixedHolder(UInt64.self, buffers: buffers, arrowType: ArrowType.ArrowUInt64) + case .binary: + return makeBinaryHolder(buffers) + case .boolean: + return makeBoolHolder(buffers) + case .date32: + return makeDateHolder(ArrowType(ArrowType.ArrowDate32), buffers: buffers) + case .date64: + return makeDateHolder(ArrowType(ArrowType.ArrowDate64), buffers: buffers) + case .float: + return makeFixedHolder(Float.self, buffers: buffers, arrowType: ArrowType.ArrowFloat) + case .double: + return makeFixedHolder(Double.self, buffers: buffers, arrowType: ArrowType.ArrowDouble) + case .string: + return makeStringHolder(buffers) + case .time32: + return makeTimeHolder(arrowType, buffers: buffers) + case .time64: + return makeTimeHolder(arrowType, buffers: buffers) + default: + return .failure(.unknownType("Type \(arrowType) currently not supported")) + } +} + +func makeArrayHolder( // swiftlint:disable:this cyclomatic_complexity function_body_length _ field: org_apache_arrow_flatbuf_Field, buffers: [ArrowBuffer] ) -> Result { let type = field.typeType + var arrowType: ArrowType? switch type { case .int: let intType = field.type(type: org_apache_arrow_flatbuf_Int.self)! let bitWidth = intType.bitWidth if bitWidth == 8 { - if intType.isSigned { - return makeFixedHolder(Int8.self, buffers: buffers, arrowType: ArrowType.ArrowInt8) - } else { - return makeFixedHolder(UInt8.self, buffers: buffers, arrowType: ArrowType.ArrowUInt8) - } + arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt8 : ArrowType.ArrowUInt8) } else if bitWidth == 16 { - if intType.isSigned { - return makeFixedHolder(Int16.self, buffers: buffers, arrowType: ArrowType.ArrowInt16) - } else { - return makeFixedHolder(UInt16.self, buffers: buffers, arrowType: ArrowType.ArrowUInt16) - } + arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt16 : ArrowType.ArrowUInt16) } else if bitWidth == 32 { - if intType.isSigned { - return makeFixedHolder(Int32.self, buffers: buffers, arrowType: ArrowType.ArrowInt32) - } else { - return makeFixedHolder(UInt32.self, buffers: buffers, arrowType: ArrowType.ArrowUInt32) - } + arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt32 : ArrowType.ArrowUInt32) } else if bitWidth == 64 { - if intType.isSigned { - return makeFixedHolder(Int64.self, buffers: buffers, arrowType: ArrowType.ArrowInt64) - } else { - return makeFixedHolder(UInt64.self, buffers: buffers, arrowType: ArrowType.ArrowUInt64) - } + arrowType = ArrowType(intType.isSigned ? ArrowType.ArrowInt64 : ArrowType.ArrowUInt64) + } else { + return .failure(.unknownType("Int width \(bitWidth) currently not supported")) } - return .failure(.unknownType("Int width \(bitWidth) currently not supported")) case .bool: - return makeBoolHolder(buffers) + arrowType = ArrowType(ArrowType.ArrowBool) case .floatingpoint: let floatType = field.type(type: org_apache_arrow_flatbuf_FloatingPoint.self)! - return makeFloatHolder(floatType, buffers: buffers) + switch floatType.precision { + case .single: + arrowType = ArrowType(ArrowType.ArrowFloat) + case .double: + arrowType = ArrowType(ArrowType.ArrowDouble) + default: + return .failure(.unknownType("Float precision \(floatType.precision) currently not supported")) + } case .utf8: - return makeStringHolder(buffers) + arrowType = ArrowType(ArrowType.ArrowString) case .binary: - return makeBinaryHolder(buffers) + arrowType = ArrowType(ArrowType.ArrowBinary) case .date: let dateType = field.type(type: org_apache_arrow_flatbuf_Date.self)! - return makeDateHolder(dateType, buffers: buffers) + if dateType.unit == .day { + arrowType = ArrowType(ArrowType.ArrowDate32) + } else { + arrowType = ArrowType(ArrowType.ArrowDate64) + } case .time: let timeType = field.type(type: org_apache_arrow_flatbuf_Time.self)! - return makeTimeHolder(timeType, buffers: buffers) + if timeType.unit == .second || timeType.unit == .millisecond { + let arrowUnit: ArrowTime32Unit = timeType.unit == .second ? .seconds : .milliseconds + arrowType = ArrowTypeTime32(arrowUnit) + } else { + let arrowUnit: ArrowTime64Unit = timeType.unit == .microsecond ? .microseconds : .nanoseconds + arrowType = ArrowTypeTime64(arrowUnit) + } default: return .failure(.unknownType("Type \(type) currently not supported")) } + + return makeArrayHolder(arrowType: arrowType!, buffers: buffers) } func makeBuffer(_ buffer: org_apache_arrow_flatbuf_Buffer, fileData: Data, - length: UInt, messageOffset: Int64) -> ArrowBuffer { + length: UInt, messageOffset: Int64, nullCount: UInt? = nil) -> ArrowBuffer { let startOffset = messageOffset + buffer.offset let endOffset = startOffset + buffer.length let bufferData = [UInt8](fileData[startOffset ..< endOffset]) + if nullCount != nil { + return ArrowNullBuffer.createBuffer(bufferData, length: length, nullCount: nullCount!) + } return ArrowBuffer.createBuffer(bufferData, length: length) } diff --git a/swift/Arrow/Tests/ArrowTests/IPCTests.swift b/swift/Arrow/Tests/ArrowTests/IPCTests.swift index 59cad94ef4da5..103c3b24c7b93 100644 --- a/swift/Arrow/Tests/ArrowTests/IPCTests.swift +++ b/swift/Arrow/Tests/ArrowTests/IPCTests.swift @@ -64,14 +64,16 @@ func makeSchema() -> ArrowSchema { return schemaBuilder.addField("col1", type: ArrowType(ArrowType.ArrowUInt8), isNullable: true) .addField("col2", type: ArrowType(ArrowType.ArrowString), isNullable: false) .addField("col3", type: ArrowType(ArrowType.ArrowDate32), isNullable: false) + .addField("col4", type: ArrowType(ArrowType.ArrowInt32), isNullable: false) + .addField("col5", type: ArrowType(ArrowType.ArrowFloat), isNullable: false) .finish() } func makeRecordBatch() throws -> RecordBatch { let uint8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() uint8Builder.append(10) - uint8Builder.append(22) - uint8Builder.append(33) + uint8Builder.append(nil) + uint8Builder.append(nil) uint8Builder.append(44) let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() stringBuilder.append("test10") @@ -85,13 +87,28 @@ func makeRecordBatch() throws -> RecordBatch { date32Builder.append(date2) date32Builder.append(date1) date32Builder.append(date2) - let intHolder = ArrowArrayHolder(try uint8Builder.finish()) + let int32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + int32Builder.append(1) + int32Builder.append(2) + int32Builder.append(3) + int32Builder.append(4) + let floatBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + floatBuilder.append(211.112) + floatBuilder.append(322.223) + floatBuilder.append(433.334) + floatBuilder.append(544.445) + + let uint8Holder = ArrowArrayHolder(try uint8Builder.finish()) let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) let date32Holder = ArrowArrayHolder(try date32Builder.finish()) + let int32Holder = ArrowArrayHolder(try int32Builder.finish()) + let floatHolder = ArrowArrayHolder(try floatBuilder.finish()) let result = RecordBatch.Builder() - .addColumn("col1", arrowArray: intHolder) + .addColumn("col1", arrowArray: uint8Holder) .addColumn("col2", arrowArray: stringHolder) .addColumn("col3", arrowArray: date32Holder) + .addColumn("col4", arrowArray: int32Holder) + .addColumn("col5", arrowArray: floatHolder) .finish() switch result { case .success(let recordBatch): @@ -182,15 +199,20 @@ final class IPCFileReaderTests: XCTestCase { XCTAssertEqual(recordBatches.count, 1) for recordBatch in recordBatches { XCTAssertEqual(recordBatch.length, 4) - XCTAssertEqual(recordBatch.columns.count, 3) - XCTAssertEqual(recordBatch.schema.fields.count, 3) + XCTAssertEqual(recordBatch.columns.count, 5) + XCTAssertEqual(recordBatch.schema.fields.count, 5) XCTAssertEqual(recordBatch.schema.fields[0].name, "col1") XCTAssertEqual(recordBatch.schema.fields[0].type.info, ArrowType.ArrowUInt8) XCTAssertEqual(recordBatch.schema.fields[1].name, "col2") XCTAssertEqual(recordBatch.schema.fields[1].type.info, ArrowType.ArrowString) XCTAssertEqual(recordBatch.schema.fields[2].name, "col3") XCTAssertEqual(recordBatch.schema.fields[2].type.info, ArrowType.ArrowDate32) + XCTAssertEqual(recordBatch.schema.fields[3].name, "col4") + XCTAssertEqual(recordBatch.schema.fields[3].type.info, ArrowType.ArrowInt32) + XCTAssertEqual(recordBatch.schema.fields[4].name, "col5") + XCTAssertEqual(recordBatch.schema.fields[4].type.info, ArrowType.ArrowFloat) let columns = recordBatch.columns + XCTAssertEqual(columns[0].nullCount, 2) let dateVal = "\((columns[2].array as! AsString).asString(0))" // swiftlint:disable:this force_cast XCTAssertEqual(dateVal, "2014-09-10 00:00:00 +0000") @@ -227,13 +249,17 @@ final class IPCFileReaderTests: XCTestCase { case .success(let result): XCTAssertNotNil(result.schema) let schema = result.schema! - XCTAssertEqual(schema.fields.count, 3) + XCTAssertEqual(schema.fields.count, 5) XCTAssertEqual(schema.fields[0].name, "col1") XCTAssertEqual(schema.fields[0].type.info, ArrowType.ArrowUInt8) XCTAssertEqual(schema.fields[1].name, "col2") XCTAssertEqual(schema.fields[1].type.info, ArrowType.ArrowString) XCTAssertEqual(schema.fields[2].name, "col3") XCTAssertEqual(schema.fields[2].type.info, ArrowType.ArrowDate32) + XCTAssertEqual(schema.fields[3].name, "col4") + XCTAssertEqual(schema.fields[3].type.info, ArrowType.ArrowInt32) + XCTAssertEqual(schema.fields[4].name, "col5") + XCTAssertEqual(schema.fields[4].type.info, ArrowType.ArrowFloat) case.failure(let error): throw error }