Skip to content

Commit

Permalink
apacheGH-37938: [Swift] initial impl of C Data interface
Browse files Browse the repository at this point in the history
  • Loading branch information
abandy committed May 24, 2024
1 parent fb61e9f commit 869898c
Show file tree
Hide file tree
Showing 28 changed files with 1,229 additions and 55 deletions.
2 changes: 1 addition & 1 deletion ci/docker/ubuntu-swift.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM swift:5.7.3
FROM swift:5.9.0

# Go is needed for generating test data
RUN apt-get update -y -q && \
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,4 @@ r/tools/nixlibs-allowlist.txt
ruby/red-arrow/.yardopts
.github/pull_request_template.md
swift/data-generator/swift-datagen/go.sum
swift/CDataWGo/go.sum
4 changes: 4 additions & 0 deletions swift/.swiftlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@
# under the License.

included:
- Arrow/Package.swift
- Arrow/Sources
- Arrow/Tests
- ArrowFlight/Package.swift
- ArrowFlight/Sources
- ArrowFlight/Tests
- CDataWGo/Package.swift
- CDataWGo/Sources/go-swift
excluded:
- Arrow/Sources/Arrow/File_generated.swift
- Arrow/Sources/Arrow/Message_generated.swift
Expand Down
22 changes: 14 additions & 8 deletions swift/Arrow/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,34 @@ let package = Package(
.macOS(.v10_14)
],
products: [
// Products define the executables and libraries a package produces, and make them visible to other packages.
.library(
name: "Arrow",
targets: ["Arrow"]),
targets: ["Arrow"])
],
dependencies: [
// The latest version of flatbuffers v23.5.26 was built in May 26, 2023
// and therefore doesn't include the unaligned buffer swift changes.
// This can be changed back to using the tag once a new version of
// flatbuffers has been released.
.package(url: "https://github.com/google/flatbuffers.git", branch: "master")
.package(url: "https://github.com/google/flatbuffers.git", branch: "master"),
.package(
url: "https://github.com/apple/swift-atomics.git",
.upToNextMajor(from: "1.2.0") // or `.upToNextMinor
)
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "ArrowC",
path: "Sources/ArrowC"
),
.target(
name: "Arrow",
dependencies: [
.product(name: "FlatBuffers", package: "flatbuffers")
dependencies: ["ArrowC",
.product(name: "FlatBuffers", package: "flatbuffers"),
.product(name: "Atomics", package: "swift-atomics")
]),
.testTarget(
name: "ArrowTests",
dependencies: ["Arrow"]),
dependencies: ["Arrow", "ArrowC"])
]
)
39 changes: 25 additions & 14 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,29 @@

import Foundation

public class ArrowArrayHolder {
public protocol ArrowArrayHolder {
var type: ArrowType {get}
var length: UInt {get}
var nullCount: UInt {get}
var array: Any {get}
var data: ArrowData {get}
var getBufferData: () -> [Data] {get}
var getBufferDataSizes: () -> [Int] {get}
var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get}
}

public class ArrowArrayHolderImpl: ArrowArrayHolder {
public let array: Any
public let data: ArrowData
public let type: ArrowType
public let length: UInt
public let nullCount: UInt
public let array: Any
public let getBufferData: () -> [Data]
public let getBufferDataSizes: () -> [Int]
private let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
public init<T>(_ arrowArray: ArrowArray<T>) {
self.array = arrowArray
self.data = arrowArray.arrowData
self.length = arrowArray.length
self.type = arrowArray.arrowData.type
self.nullCount = arrowArray.nullCount
Expand Down Expand Up @@ -60,19 +73,9 @@ public class ArrowArrayHolder {
return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray<T>(arrays)))
}
}

public static func makeArrowColumn(_ field: ArrowField,
holders: [ArrowArrayHolder]
) -> Result<ArrowColumn, ArrowError> {
do {
return .success(try holders[0].getArrowColumn(field, holders))
} catch {
return .failure(.runtimeError("\(error)"))
}
}
}

public class ArrowArray<T>: AsString {
public class ArrowArray<T>: AsString, AnyArray {
public typealias ItemType = T
public let arrowData: ArrowData
public var nullCount: UInt {return self.arrowData.nullCount}
Expand Down Expand Up @@ -101,6 +104,14 @@ public class ArrowArray<T>: AsString {

return "\(self[index]!)"
}

public func asAny(_ index: UInt) -> Any? {
if self[index] == nil {
return nil
}

return self[index]!
}
}

public class FixedArray<T>: ArrowArray<T> {
Expand Down
17 changes: 14 additions & 3 deletions swift/Arrow/Sources/Arrow/ArrowBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,34 @@ public class ArrowBuffer {
static let maxLength = UInt.max
fileprivate(set) var length: UInt
let capacity: UInt
let rawPointer: UnsafeMutableRawPointer
public let rawPointer: UnsafeMutableRawPointer
let isMemoryOwner: Bool

init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) {
init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) {
self.length = length
self.capacity = capacity
self.rawPointer = rawPointer
self.isMemoryOwner = isMemoryOwner
}

deinit {
self.rawPointer.deallocate()
if isMemoryOwner {
self.rawPointer.deallocate()
}
}

func append(to data: inout Data) {
let ptr = UnsafePointer(rawPointer.assumingMemoryBound(to: UInt8.self))
data.append(ptr, count: Int(capacity))
}

static func createEmptyBuffer() -> ArrowBuffer {
return ArrowBuffer(
length: 0,
capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
}

static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer {
let byteCount = UInt(data.count)
let capacity = alignTo64(byteCount)
Expand Down
133 changes: 133 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowCExporter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation
import ArrowC
import Atomics

// The memory used by UnsafeAtomic is not automatically
// reclaimed. Since this value is initialized once
// and used until the program/app is closed it's
// memory will be released on program/app exit
let exportDataCounter: UnsafeAtomic<Int> = .create(0)

public class ArrowCExporter {
private class ExportData {
let id: Int
init() {
id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed)
ArrowCExporter.exportedData[id] = self
}
}

private class ExportSchema: ExportData {
public let arrowTypeName: UnsafePointer<CChar>
public let nameCstr: UnsafePointer<CChar>
private let arrowType: ArrowType
private let name: String
init(_ arrowType: ArrowType, name: String = "") throws {
self.arrowType = arrowType
// keeping the name str to ensure the cstring buffer remains valid
self.name = name
self.arrowTypeName = (try arrowType.cDataFormatId as NSString).utf8String!
self.nameCstr = (name as NSString).utf8String!
super.init()
}
}

private class ExportArray: ExportData {
private let arrowData: ArrowData
private(set) var data = [UnsafeRawPointer?]()
private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
init(_ arrowData: ArrowData) {
// keep a reference to the ArrowData
// obj so the memory doesn't get
// deallocated
self.arrowData = arrowData
for arrowBuffer in arrowData.buffers {
data.append(arrowBuffer.rawPointer)
}

self.buffers = UnsafeMutablePointer(mutating: data)
super.init()
}
}

private static var exportedData = [Int: ExportData]()
public init() {}

public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") ->
Result<Bool, ArrowError> {
do {
let exportSchema = try ExportSchema(arrowType, name: name)
cSchema.format = exportSchema.arrowTypeName
cSchema.name = exportSchema.nameCstr
cSchema.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportSchema.id))
cSchema.release = {(data: UnsafeMutablePointer<ArrowC.ArrowSchema>?) in
let arraySchema = data!.pointee
let exportId = Int(bitPattern: arraySchema.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export schema not found with id \(exportId)")
}

// the data associated with this exportSchema object
// which includes the C strings for the format and name
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ArrowSwiftClearReleaseSchema(data)
}
} catch {
return .failure(.unknownError("\(error)"))
}
return .success(true)
}

public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) ->
Result<Bool, ArrowError> {
return exportType(&schema, arrowType: field.type, name: field.name)
}

public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) {
let exportArray = ExportArray(arrowData)
cArray.buffers = exportArray.buffers
cArray.length = Int64(arrowData.length)
cArray.null_count = Int64(arrowData.nullCount)
cArray.n_buffers = Int64(arrowData.buffers.count)
// Swift Arrow does not currently support children or dictionaries
// This will need to be updated once support has been added
cArray.n_children = 0
cArray.children = nil
cArray.dictionary = nil
cArray.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportArray.id))
cArray.release = {(data: UnsafeMutablePointer<ArrowC.ArrowArray>?) in
let arrayData = data!.pointee
let exportId = Int(bitPattern: arrayData.private_data)
guard ArrowCExporter.exportedData[exportId] != nil else {
fatalError("Export data not found with id \(exportId)")
}

// the data associated with this exportArray object
// which includes the entire arrowData object
// and the buffers UnsafeMutablePointer[] will
// be deallocated upon removal
ArrowCExporter.exportedData.removeValue(forKey: exportId)
ArrowC.ArrowSwiftClearReleaseArray(data)
}
}
}
Loading

0 comments on commit 869898c

Please sign in to comment.