Skip to content

Commit

Permalink
apacheGH-37938: [Swift] initial impl of C Data interface
Browse files Browse the repository at this point in the history
  • Loading branch information
abandy committed Feb 7, 2024
1 parent f609bb1 commit a1404c4
Show file tree
Hide file tree
Showing 19 changed files with 987 additions and 10 deletions.
2 changes: 1 addition & 1 deletion ci/docker/ubuntu-swift.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM swift:5.7.3
FROM swift:5.9.0

# Go is needed for generating test data
RUN apt-get update -y -q && \
Expand Down
1 change: 1 addition & 0 deletions dev/release/rat_exclude_files.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,4 @@ r/tools/nixlibs-allowlist.txt
ruby/red-arrow/.yardopts
.github/pull_request_template.md
swift/data-generator/swift-datagen/go.sum
swift/CDataWGo/go.sum
1 change: 1 addition & 0 deletions swift/.swiftlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ included:
- Arrow/Tests
- ArrowFlight/Sources
- ArrowFlight/Tests
- CDataWGo/Sources/go-swift
excluded:
- Arrow/Sources/Arrow/File_generated.swift
- Arrow/Sources/Arrow/Message_generated.swift
Expand Down
17 changes: 13 additions & 4 deletions swift/Arrow/Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,27 @@ let package = Package(
// and therefore doesn't include the unaligned buffer swift changes.
// This can be changed back to using the tag once a new version of
// flatbuffers has been released.
.package(url: "https://github.com/google/flatbuffers.git", branch: "master")
.package(url: "https://github.com/google/flatbuffers.git", branch: "master"),
.package(
url: "https://github.com/apple/swift-atomics.git",
.upToNextMajor(from: "1.2.0") // or `.upToNextMinor
)
],
targets: [
// Targets are the basic building blocks of a package. A target can define a module or a test suite.
// Targets can depend on other targets in this package, and on products in packages this package depends on.
.target(
name: "ArrowC", //your C/C++ library's name
path: "Sources/ArrowC" //your path to the C/C++ library
),
.target(
name: "Arrow",
dependencies: [
.product(name: "FlatBuffers", package: "flatbuffers")
dependencies: ["ArrowC",
.product(name: "FlatBuffers", package: "flatbuffers"),
.product(name: "Atomics", package: "swift-atomics")
]),
.testTarget(
name: "ArrowTests",
dependencies: ["Arrow"]),
dependencies: ["Arrow", "ArrowC"]),
]
)
15 changes: 13 additions & 2 deletions swift/Arrow/Sources/Arrow/ArrowBuffer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,33 @@ public class ArrowBuffer {
fileprivate(set) var length: UInt
let capacity: UInt
let rawPointer: UnsafeMutableRawPointer
let isMemoryOwner: Bool

init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) {
init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) {
self.length = length
self.capacity = capacity
self.rawPointer = rawPointer
self.isMemoryOwner = isMemoryOwner
}

deinit {
self.rawPointer.deallocate()
if isMemoryOwner {
self.rawPointer.deallocate()
}
}

func append(to data: inout Data) {
let ptr = UnsafePointer(rawPointer.assumingMemoryBound(to: UInt8.self))
data.append(ptr, count: Int(capacity))
}

static func createEmptyBuffer() -> ArrowBuffer {
return ArrowBuffer(
length: 0,
capacity: 0,
rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero))
}

static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer {
let byteCount = UInt(data.count)
let capacity = alignTo64(byteCount)
Expand Down
108 changes: 108 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowCExporter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation
import ArrowC
import Atomics

extension String {
var cstring: UnsafePointer<CChar> {
(self as NSString).cString(using: String.Encoding.utf8.rawValue)!
}
}

// The memory used by UnsafeAtomic is not automatically
// reclaimed. Since this value is initialized once
// and used until the program/app is closed it's
// memory will be released on program/app exit
let exportDataCounter: UnsafeAtomic<Int> = .create(0)

public class ArrowCExporter {
private class ExportData {
let id: Int
var cArray = ArrowC.ArrowArray()
private let arrowData: ArrowData
private(set) var data = [UnsafeRawPointer?]()
private(set) var buffers: UnsafeMutablePointer<UnsafeRawPointer?>
init(_ arrowData: ArrowData) {
id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed)

// keep a reference to the ArrowData
// obj so the memory doesn't get
// deallocated
self.arrowData = arrowData
for arrowBuffer in arrowData.buffers {
data.append(arrowBuffer.rawPointer)
}

self.buffers = UnsafeMutablePointer(mutating: data)
ArrowCExporter.exportedData[id] = self
}

func release() {
// the data associated with this export data
// does not need to be released as they are
// still associated with the ArrowBuffer
// and it will deallocate this memory
ArrowCExporter.exportedData.removeValue(forKey: id.hashValue)
}
}

private static var exportedData = [Int: ExportData]()

public init() {}

public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") ->
Result<Bool, ArrowError> {
do {
cSchema.format = try arrowType.cDataFormatId.cstring
cSchema.name = name.cstring
cSchema.release = {data in
data?.pointee.release = nil
}
} catch {
return .failure(.unknownError("\(error)"))
}
return .success(true)
}

public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) ->
Result<Bool, ArrowError> {
return exportType(&schema, arrowType: field.type, name: field.name)
}

public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) {
let exportData = ExportData(arrowData)
cArray.buffers = exportData.buffers
cArray.length = Int64(arrowData.length)
cArray.null_count = Int64(arrowData.nullCount)
cArray.n_buffers = Int64(arrowData.buffers.count)
cArray.n_children = 0
cArray.children = nil
cArray.dictionary = nil
cArray.private_data =
UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportData.id.hashValue))
cArray.release = {data in
var arrayData = data?.pointee
let exportId = Int(bitPattern: arrayData?.private_data)
if let exportData = ArrowCExporter.exportedData[exportId] {
exportData.release()
}
arrayData?.release = nil
}
}
}
144 changes: 144 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowCImporter.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation
import ArrowC

public class ArrowCImporter {
private func appendToBuffer(
_ cBuffer: UnsafeRawPointer?,
arrowBuffers: inout [ArrowBuffer],
byteCount: Int,
length: UInt,
nullCount: UInt? = nil) {
if cBuffer == nil {
arrowBuffers.append(ArrowBuffer.createEmptyBuffer())
return
}

let pointer = UnsafeMutableRawPointer(mutating: cBuffer)!
arrowBuffers.append(
ArrowBuffer(length: length, capacity: UInt(byteCount), rawPointer: pointer, isMemoryOwner: false))
}

public init() {}

public func importType(_ cArrow: String, name: String = "") ->
Result<ArrowField, ArrowError> {
do {
let type = try ArrowType.fromCDataFormatId(cArrow)
return .success(ArrowField(name, type: ArrowType(type.info), isNullable: true))
} catch {
return .failure(.invalid("\(error)"))
}
}

public func importField(_ cSchema: ArrowC.ArrowSchema) ->
Result<ArrowField, ArrowError> {
if cSchema.n_children > 0 {
return .failure(.invalid("Children currently not supported"))
} else if cSchema.dictionary != nil {
return .failure(.invalid("Dictinoary types currently not supported"))
}

switch importType(
String(cString: cSchema.format), name: String(cString: cSchema.name)) {
case .success(let field):
release(cSchema)
return .success(field)
case .failure(let err):
return .failure(err)
}
}

public func importArray(
_ cArray: ArrowC.ArrowArray,
arrowType: ArrowType,
isNullable: Bool = false) -> Result<ArrowArrayHolder, ArrowError> {
let arrowField = ArrowField("", type: arrowType, isNullable: isNullable)
return importArray(cArray, arrowField: arrowField)
}

public func importArray(
_ cArray: ArrowC.ArrowArray,
arrowField: ArrowField) -> Result<ArrowArrayHolder, ArrowError> {
if cArray.null_count < 0 {
return .failure(.invalid("Uncomputed null count is not supported"))
} else if cArray.n_children > 0 {
return .failure(.invalid("Children currently not supported"))
} else if cArray.dictionary != nil {
return .failure(.invalid("Dictionary types currently not supported"))
} else if cArray.offset != 0 {
return .failure(.invalid("Offset of 0 is required but found offset: \(cArray.offset)"))
}

let arrowType = arrowField.type
let length = UInt(cArray.length)
let nullCount = UInt(cArray.null_count)
let nullBytes = Int(ceil(Double(length) / 8))
var arrowBuffers = [ArrowBuffer]()

if cArray.n_buffers > 0 {
if cArray.buffers == nil {
return .failure(.invalid("C array buffers is nil"))
}

switch arrowType.info {
case .variableInfo:
if cArray.n_buffers != 3 {
return .failure(
.invalid("Variable buffer count expected 3 but found \(cArray.n_buffers)"))
}

appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, byteCount: nullBytes, length: length,
nullCount: nullCount)
let byteCount = MemoryLayout<Int32>.stride * Int(length)
appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, byteCount: byteCount, length: length)
let offsetIndex = MemoryLayout<Int32>.stride * Int(length - 1)
let endIndex = arrowBuffers[1].rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
appendToBuffer(cArray.buffers[2], arrowBuffers: &arrowBuffers, byteCount: Int(endIndex), length: length)
default:
if cArray.n_buffers != 2 {
return .failure(.invalid("Expected buffer count 2 but found \(cArray.n_buffers)"))
}

appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, byteCount: nullBytes, length: length,
nullCount: nullCount)
let byteCount = arrowType.getStride() * Int(length)
appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, byteCount: byteCount, length: length)
}
}

return makeArrayHolder(arrowField, buffers: arrowBuffers, nullCount: nullCount)
}

public func release(_ cArray: ArrowC.ArrowArray) {
if cArray.release != nil {
let cArrayPtr = UnsafeMutablePointer<ArrowC.ArrowArray>.allocate(capacity: 1)
cArrayPtr.initialize(to: cArray)
cArray.release(cArrayPtr)
}
}

public func release(_ cSchema: ArrowC.ArrowSchema) {
if cSchema.release != nil {
let cSchemaPtr = UnsafeMutablePointer<ArrowC.ArrowSchema>.allocate(capacity: 1)
cSchemaPtr.initialize(to: cSchema)
cSchema.release(cSchemaPtr)
}
}
}
6 changes: 3 additions & 3 deletions swift/Arrow/Sources/Arrow/ArrowSchema.swift
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

import Foundation
public class ArrowField {
let type: ArrowType
let name: String
let isNullable: Bool
public let type: ArrowType
public let name: String
public let isNullable: Bool

init(_ name: String, type: ArrowType, isNullable: Bool) {
self.name = name
Expand Down
Loading

0 comments on commit a1404c4

Please sign in to comment.