From 3999384c3e05ef8ef804ab651e1bebee8bf7670c Mon Sep 17 00:00:00 2001
From: Thomas A Caswell
Date: Sat, 8 Jun 2024 16:08:01 -0400
Subject: [PATCH 01/11] MINOR: [Python] spell "language" correctly in trove
classifier (#42031)
### Rationale for this change
Newer (possibly unreleased) version of the Python build tools check that the classifiers are valid and the build failed due to this typo.
### What changes are included in this PR?
Fix the spelling of a word
### Are these changes tested?
Build will fail without these changes, has no run-time effect.
### Are there any user-facing changes?
no
Authored-by: Thomas A Caswell
Signed-off-by: Sutou Kouhei
---
python/pyproject.toml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/pyproject.toml b/python/pyproject.toml
index f72c3a91eb436..86a90906d02f9 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -47,7 +47,7 @@ classifiers = [
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
- 'Programming Langauge :: Python :: 3.12',
+ 'Programming Language :: Python :: 3.12',
]
maintainers = [
{name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}
From 601be7687ba89f711b876397746b5f49503c0871 Mon Sep 17 00:00:00 2001
From: abandy
Date: Sat, 8 Jun 2024 17:25:28 -0400
Subject: [PATCH 02/11] GH-42020: [Swift] Add Arrow decoding implementation for
Swift Codable (#42023)
### Rationale for this change
This change implements decode for the Arrow Swift Codable implementation. This allows the data in a RecordBatch to be copied to properties in a struct/class.
The PR is a bit longer than desired but all three container types are required in order to implement the Decoder protocol.
### What changes are included in this PR?
The ArrowDecoder class is included in this PR along with a class for each container type (keyed, unkeyed, and single). Most of the logic is encapsulated in the ArrowDecoder with minimal logic in each container class (Most of the methods in the container classes are a single line that calls the ArrowDecoder doDecode methods)
### Are these changes tested?
Yes, a test has been added to test the three types of containers provided by the decoder.
* GitHub Issue: #42020
Authored-by: Alva Bandy
Signed-off-by: Sutou Kouhei
---
swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 347 ++++++++++++++++++
.../Arrow/Tests/ArrowTests/CodableTests.swift | 170 +++++++++
2 files changed, 517 insertions(+)
create mode 100644 swift/Arrow/Sources/Arrow/ArrowDecoder.swift
create mode 100644 swift/Arrow/Tests/ArrowTests/CodableTests.swift
diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift
new file mode 100644
index 0000000000000..7e0c69b1e79e8
--- /dev/null
+++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift
@@ -0,0 +1,347 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import Foundation
+
+public class ArrowDecoder: Decoder {
+ var rbIndex: UInt = 0
+ public var codingPath: [CodingKey] = []
+ public var userInfo: [CodingUserInfoKey: Any] = [:]
+ public let rb: RecordBatch
+ public let nameToCol: [String: ArrowArrayHolder]
+ public let columns: [ArrowArrayHolder]
+ public init(_ decoder: ArrowDecoder) {
+ self.userInfo = decoder.userInfo
+ self.codingPath = decoder.codingPath
+ self.rb = decoder.rb
+ self.columns = decoder.columns
+ self.nameToCol = decoder.nameToCol
+ self.rbIndex = decoder.rbIndex
+ }
+
+ public init(_ rb: RecordBatch) {
+ self.rb = rb
+ var colMapping = [String: ArrowArrayHolder]()
+ var columns = [ArrowArrayHolder]()
+ for index in 0..(_ type: T.Type) throws -> [T] {
+ var output = [T]()
+ for index in 0..(keyedBy type: Key.Type
+ ) -> KeyedDecodingContainer where Key: CodingKey {
+ let container = ArrowKeyedDecoding(self, codingPath: codingPath)
+ return KeyedDecodingContainer(container)
+ }
+
+ public func unkeyedContainer() -> UnkeyedDecodingContainer {
+ return ArrowUnkeyedDecoding(self, codingPath: codingPath)
+ }
+
+ public func singleValueContainer() -> SingleValueDecodingContainer {
+ return ArrowSingleValueDecoding(self, codingPath: codingPath)
+ }
+
+ func getCol(_ name: String) throws -> AnyArray {
+ guard let col = self.nameToCol[name] else {
+ throw ArrowError.invalid("Column for key \"\(name)\" not found")
+ }
+
+ guard let anyArray = col.array as? AnyArray else {
+ throw ArrowError.invalid("Unable to convert array to AnyArray")
+ }
+
+ return anyArray
+ }
+
+ func getCol(_ index: Int) throws -> AnyArray {
+ if index >= self.columns.count {
+ throw ArrowError.outOfBounds(index: Int64(index))
+ }
+
+ guard let anyArray = self.columns[index].array as? AnyArray else {
+ throw ArrowError.invalid("Unable to convert array to AnyArray")
+ }
+
+ return anyArray
+ }
+
+ func doDecode(_ key: CodingKey) throws -> T? {
+ let array: AnyArray = try self.getCol(key.stringValue)
+ return array.asAny(self.rbIndex) as? T
+ }
+
+ func doDecode(_ col: Int) throws -> T? {
+ let array: AnyArray = try self.getCol(col)
+ return array.asAny(self.rbIndex) as? T
+ }
+}
+
+private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer {
+ var codingPath: [CodingKey]
+ var count: Int? = 0
+ var isAtEnd: Bool = false
+ var currentIndex: Int = 0
+ let decoder: ArrowDecoder
+
+ init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) {
+ self.decoder = decoder
+ self.codingPath = codingPath
+ self.count = self.decoder.columns.count
+ }
+
+ mutating func increment() {
+ self.currentIndex += 1
+ self.isAtEnd = self.currentIndex >= self.count!
+ }
+
+ mutating func decodeNil() throws -> Bool {
+ defer {increment()}
+ return try self.decoder.doDecode(self.currentIndex) == nil
+ }
+
+ mutating func decode(_ type: T.Type) throws -> T where T: Decodable {
+ if type == Int8.self || type == Int16.self ||
+ type == Int32.self || type == Int64.self ||
+ type == UInt8.self || type == UInt16.self ||
+ type == UInt32.self || type == UInt64.self ||
+ type == String.self || type == Double.self ||
+ type == Float.self || type == Date.self {
+ defer {increment()}
+ return try self.decoder.doDecode(self.currentIndex)!
+ } else {
+ throw ArrowError.invalid("Type \(type) is currently not supported")
+ }
+ }
+
+ func nestedContainer(
+ keyedBy type: NestedKey.Type
+ ) throws -> KeyedDecodingContainer where NestedKey: CodingKey {
+ throw ArrowError.invalid("Nested decoding is currently not supported.")
+ }
+
+ func nestedUnkeyedContainer() throws -> UnkeyedDecodingContainer {
+ throw ArrowError.invalid("Nested decoding is currently not supported.")
+ }
+
+ func superDecoder() throws -> Decoder {
+ throw ArrowError.invalid("super decoding is currently not supported.")
+ }
+}
+
+private struct ArrowKeyedDecoding: KeyedDecodingContainerProtocol {
+ var codingPath = [CodingKey]()
+ var allKeys = [Key]()
+ let decoder: ArrowDecoder
+
+ init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) {
+ self.decoder = decoder
+ self.codingPath = codingPath
+ }
+
+ func contains(_ key: Key) -> Bool {
+ return self.decoder.nameToCol.keys.contains(key.stringValue)
+ }
+
+ func decodeNil(forKey key: Key) throws -> Bool {
+ return try self.decoder.doDecode(key) == nil
+ }
+
+ func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: String.Type, forKey key: Key) throws -> String {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: Double.Type, forKey key: Key) throws -> Double {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: Float.Type, forKey key: Key) throws -> Float {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: Int.Type, forKey key: Key) throws -> Int {
+ throw ArrowError.invalid(
+ "Int type is not supported (please use Int8, Int16, Int32 or Int64)")
+ }
+
+ func decode(_ type: Int8.Type, forKey key: Key) throws -> Int8 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: Int16.Type, forKey key: Key) throws -> Int16 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: Int32.Type, forKey key: Key) throws -> Int32 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: Int64.Type, forKey key: Key) throws -> Int64 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: UInt.Type, forKey key: Key) throws -> UInt {
+ throw ArrowError.invalid(
+ "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)")
+ }
+
+ func decode(_ type: UInt8.Type, forKey key: Key) throws -> UInt8 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: UInt16.Type, forKey key: Key) throws -> UInt16 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: UInt32.Type, forKey key: Key) throws -> UInt32 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: UInt64.Type, forKey key: Key) throws -> UInt64 {
+ return try self.decoder.doDecode(key)!
+ }
+
+ func decode(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable {
+ if type == Date.self {
+ return try self.decoder.doDecode(key)!
+ } else {
+ throw ArrowError.invalid("Type \(type) is currently not supported")
+ }
+ }
+
+ func nestedContainer(
+ keyedBy type: NestedKey.Type,
+ forKey key: Key
+ ) throws -> KeyedDecodingContainer where NestedKey: CodingKey {
+ throw ArrowError.invalid("Nested decoding is currently not supported.")
+ }
+
+ func nestedUnkeyedContainer(forKey key: Key) throws -> UnkeyedDecodingContainer {
+ throw ArrowError.invalid("Nested decoding is currently not supported.")
+ }
+
+ func superDecoder() throws -> Decoder {
+ throw ArrowError.invalid("super decoding is currently not supported.")
+ }
+
+ func superDecoder(forKey key: Key) throws -> Decoder {
+ throw ArrowError.invalid("super decoding is currently not supported.")
+ }
+}
+
+private struct ArrowSingleValueDecoding: SingleValueDecodingContainer {
+ var codingPath = [CodingKey]()
+ let decoder: ArrowDecoder
+
+ init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) {
+ self.decoder = decoder
+ self.codingPath = codingPath
+ }
+
+ func decodeNil() -> Bool {
+ do {
+ return try self.decoder.doDecode(0) == nil
+ } catch {
+ return false
+ }
+ }
+
+ func decode(_ type: Bool.Type) throws -> Bool {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: String.Type) throws -> String {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: Double.Type) throws -> Double {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: Float.Type) throws -> Float {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: Int.Type) throws -> Int {
+ throw ArrowError.invalid(
+ "Int type is not supported (please use Int8, Int16, Int32 or Int64)")
+ }
+
+ func decode(_ type: Int8.Type) throws -> Int8 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: Int16.Type) throws -> Int16 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: Int32.Type) throws -> Int32 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: Int64.Type) throws -> Int64 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: UInt.Type) throws -> UInt {
+ throw ArrowError.invalid(
+ "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)")
+ }
+
+ func decode(_ type: UInt8.Type) throws -> UInt8 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: UInt16.Type) throws -> UInt16 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: UInt32.Type) throws -> UInt32 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: UInt64.Type) throws -> UInt64 {
+ return try self.decoder.doDecode(0)!
+ }
+
+ func decode(_ type: T.Type) throws -> T where T: Decodable {
+ if type == Date.self {
+ return try self.decoder.doDecode(0)!
+ } else {
+ throw ArrowError.invalid("Type \(type) is currently not supported")
+ }
+ }
+}
diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift
new file mode 100644
index 0000000000000..e7359467ae1c5
--- /dev/null
+++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift
@@ -0,0 +1,170 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import XCTest
+@testable import Arrow
+
+final class CodableTests: XCTestCase {
+ public class TestClass: Codable {
+ public var propBool: Bool
+ public var propInt8: Int8
+ public var propInt16: Int16
+ public var propInt32: Int32
+ public var propInt64: Int64
+ public var propUInt8: UInt8
+ public var propUInt16: UInt16
+ public var propUInt32: UInt32
+ public var propUInt64: UInt64
+ public var propFloat: Float
+ public var propDouble: Double
+ public var propString: String
+ public var propDate: Date
+
+ public required init() {
+ self.propBool = false
+ self.propInt8 = 1
+ self.propInt16 = 2
+ self.propInt32 = 3
+ self.propInt64 = 4
+ self.propUInt8 = 5
+ self.propUInt16 = 6
+ self.propUInt32 = 7
+ self.propUInt64 = 8
+ self.propFloat = 9
+ self.propDouble = 10
+ self.propString = "11"
+ self.propDate = Date.now
+ }
+ }
+
+ func testArrowKeyedDecoder() throws { // swiftlint:disable:this function_body_length
+ let date1 = Date(timeIntervalSinceReferenceDate: 86400 * 5000 + 352)
+
+ let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder()
+ let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let int32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let int64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let uint8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let uint16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let uint32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let uint64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let floatBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
+ let dateBuilder = try ArrowArrayBuilders.loadDate64ArrayBuilder()
+
+ boolBuilder.append(false, true, false)
+ int8Builder.append(10, 11, 12)
+ int16Builder.append(20, 21, 22)
+ int32Builder.append(30, 31, 32)
+ int64Builder.append(40, 41, 42)
+ uint8Builder.append(50, 51, 52)
+ uint16Builder.append(60, 61, 62)
+ uint32Builder.append(70, 71, 72)
+ uint64Builder.append(80, 81, 82)
+ floatBuilder.append(90.1, 91.1, 92.1)
+ doubleBuilder.append(100.1, 101.1, 102.1)
+ stringBuilder.append("test0", "test1", "test2")
+ dateBuilder.append(date1, date1, date1)
+ let result = RecordBatch.Builder()
+ .addColumn("propBool", arrowArray: try boolBuilder.toHolder())
+ .addColumn("propInt8", arrowArray: try int8Builder.toHolder())
+ .addColumn("propInt16", arrowArray: try int16Builder.toHolder())
+ .addColumn("propInt32", arrowArray: try int32Builder.toHolder())
+ .addColumn("propInt64", arrowArray: try int64Builder.toHolder())
+ .addColumn("propUInt8", arrowArray: try uint8Builder.toHolder())
+ .addColumn("propUInt16", arrowArray: try uint16Builder.toHolder())
+ .addColumn("propUInt32", arrowArray: try uint32Builder.toHolder())
+ .addColumn("propUInt64", arrowArray: try uint64Builder.toHolder())
+ .addColumn("propFloat", arrowArray: try floatBuilder.toHolder())
+ .addColumn("propDouble", arrowArray: try doubleBuilder.toHolder())
+ .addColumn("propString", arrowArray: try stringBuilder.toHolder())
+ .addColumn("propDate", arrowArray: try dateBuilder.toHolder())
+ .finish()
+ switch result {
+ case .success(let rb):
+ let decoder = ArrowDecoder(rb)
+ var testClasses = try decoder.decode(TestClass.self)
+ for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ int8Builder.append(10, 11, 12, nil)
+ let result = RecordBatch.Builder()
+ .addColumn("propInt8", arrowArray: try int8Builder.toHolder())
+ .finish()
+ switch result {
+ case .success(let rb):
+ let decoder = ArrowDecoder(rb)
+ let testData = try decoder.decode(Int8?.self)
+ for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
+ int8Builder.append(10, 11, 12)
+ stringBuilder.append("test0", "test1", "test2")
+ let result = RecordBatch.Builder()
+ .addColumn("propInt8", arrowArray: try int8Builder.toHolder())
+ .addColumn("propString", arrowArray: try stringBuilder.toHolder())
+ .finish()
+ switch result {
+ case .success(let rb):
+ let decoder = ArrowDecoder(rb)
+ let testData = try decoder.decode([Int8: String].self)
+ var index: Int8 = 0
+ for data in testData {
+ let str = data[10 + index]
+ XCTAssertEqual(str, "test\(index)")
+ index += 1
+ }
+ case .failure(let err):
+ throw err
+ }
+ }
+
+}
From 399408cb273c47f490f65cdad95bc184a652826c Mon Sep 17 00:00:00 2001
From: Hyunseok Seo
Date: Sun, 9 Jun 2024 14:50:25 +0900
Subject: [PATCH 03/11] GH-42039: [Docs][Go] Fix broken link (#42040)
### Rationale for this change
Fix the broken link to the correct link due to a change in the path.
### What changes are included in this PR?
Updating link from the incorrect `go/` path to change in the path.
- old link: https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options
- new link: https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options
### Are these changes tested?
Yes. I have checked the link.
### Are there any user-facing changes?
Yes, the updated link will be visible to users.
* GitHub Issue: #42039
Authored-by: Hyunseok Seo
Signed-off-by: Sutou Kouhei
---
go/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/go/README.md b/go/README.md
index 4f97c49e1c7e8..220b0a230a615 100644
--- a/go/README.md
+++ b/go/README.md
@@ -48,7 +48,7 @@ func main() {
DSN option keys are expressed as `k=v`, delimited with `;`.
Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver.
-- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options)
+- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options)
- ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158)
- FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81)
From 7aaea3d9bb65ad37a17a9d3a52341f0fe2478903 Mon Sep 17 00:00:00 2001
From: abandy
Date: Sun, 9 Jun 2024 19:55:16 -0400
Subject: [PATCH 04/11] GH-42041: [Swift] Fix nullable type decoder issue
(#42043)
### Rationale for this change
There is an issue when decoding nullable types. The previous method of checking for nil values always returned false for nullable types due too the ArrowArray types being non nullable.
### What changes are included in this PR?
This PR adds a IsNull method to the ArrowDecoder to be used for null checks. Also, a check for nullable types has been added to the Unkeyed decode method.
### Are these changes tested?
Yes, tests have been added/modified to test this fix.
* GitHub Issue: #42041
Authored-by: Alva Bandy
Signed-off-by: Sutou Kouhei
---
swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 31 ++++++--
.../Arrow/Tests/ArrowTests/CodableTests.swift | 73 ++++++++++++++++---
2 files changed, 88 insertions(+), 16 deletions(-)
diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift
index 7e0c69b1e79e8..9aa8a65137d28 100644
--- a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift
+++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift
@@ -104,6 +104,16 @@ public class ArrowDecoder: Decoder {
let array: AnyArray = try self.getCol(col)
return array.asAny(self.rbIndex) as? T
}
+
+ func isNull(_ key: CodingKey) throws -> Bool {
+ let array: AnyArray = try self.getCol(key.stringValue)
+ return array.asAny(self.rbIndex) == nil
+ }
+
+ func isNull(_ col: Int) throws -> Bool {
+ let array: AnyArray = try self.getCol(col)
+ return array.asAny(self.rbIndex) == nil
+ }
}
private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer {
@@ -126,11 +136,17 @@ private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer {
mutating func decodeNil() throws -> Bool {
defer {increment()}
- return try self.decoder.doDecode(self.currentIndex) == nil
+ return try self.decoder.isNull(self.currentIndex)
}
mutating func decode(_ type: T.Type) throws -> T where T: Decodable {
- if type == Int8.self || type == Int16.self ||
+ if type == Int8?.self || type == Int16?.self ||
+ type == Int32?.self || type == Int64?.self ||
+ type == UInt8?.self || type == UInt16?.self ||
+ type == UInt32?.self || type == UInt64?.self ||
+ type == String?.self || type == Double?.self ||
+ type == Float?.self || type == Date?.self ||
+ type == Int8.self || type == Int16.self ||
type == Int32.self || type == Int64.self ||
type == UInt8.self || type == UInt16.self ||
type == UInt32.self || type == UInt64.self ||
@@ -173,7 +189,7 @@ private struct ArrowKeyedDecoding: KeyedDecodingContainerProtoco
}
func decodeNil(forKey key: Key) throws -> Bool {
- return try self.decoder.doDecode(key) == nil
+ try self.decoder.isNull(key)
}
func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool {
@@ -273,7 +289,7 @@ private struct ArrowSingleValueDecoding: SingleValueDecodingContainer {
func decodeNil() -> Bool {
do {
- return try self.decoder.doDecode(0) == nil
+ return try self.decoder.isNull(0)
} catch {
return false
}
@@ -338,7 +354,12 @@ private struct ArrowSingleValueDecoding: SingleValueDecodingContainer {
}
func decode(_ type: T.Type) throws -> T where T: Decodable {
- if type == Date.self {
+ if type == Int8.self || type == Int16.self ||
+ type == Int32.self || type == Int64.self ||
+ type == UInt8.self || type == UInt16.self ||
+ type == UInt32.self || type == UInt64.self ||
+ type == String.self || type == Double.self ||
+ type == Float.self || type == Date.self {
return try self.decoder.doDecode(0)!
} else {
throw ArrowError.invalid("Type \(type) is currently not supported")
diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift
index e7359467ae1c5..d7d3414cf6250 100644
--- a/swift/Arrow/Tests/ArrowTests/CodableTests.swift
+++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift
@@ -30,7 +30,7 @@ final class CodableTests: XCTestCase {
public var propUInt32: UInt32
public var propUInt64: UInt64
public var propFloat: Float
- public var propDouble: Double
+ public var propDouble: Double?
public var propString: String
public var propDate: Date
@@ -53,7 +53,6 @@ final class CodableTests: XCTestCase {
func testArrowKeyedDecoder() throws { // swiftlint:disable:this function_body_length
let date1 = Date(timeIntervalSinceReferenceDate: 86400 * 5000 + 352)
-
let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder()
let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
@@ -78,7 +77,7 @@ final class CodableTests: XCTestCase {
uint32Builder.append(70, 71, 72)
uint64Builder.append(80, 81, 82)
floatBuilder.append(90.1, 91.1, 92.1)
- doubleBuilder.append(100.1, 101.1, 102.1)
+ doubleBuilder.append(101.1, nil, nil)
stringBuilder.append("test0", "test1", "test2")
dateBuilder.append(date1, date1, date1)
let result = RecordBatch.Builder()
@@ -102,7 +101,6 @@ final class CodableTests: XCTestCase {
var testClasses = try decoder.decode(TestClass.self)
for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder()
- int8Builder.append(10, 11, 12, nil)
+ int8Builder.append(10, 11, 12)
let result = RecordBatch.Builder()
.addColumn("propInt8", arrowArray: try int8Builder.toHolder())
.finish()
@@ -134,7 +136,28 @@ final class CodableTests: XCTestCase {
let testData = try decoder.decode(Int8?.self)
for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ int8WNilBuilder.append(10, nil, 12, nil)
+ let resultWNil = RecordBatch.Builder()
+ .addColumn("propInt8", arrowArray: try int8WNilBuilder.toHolder())
+ .finish()
+ switch resultWNil {
+ case .success(let rb):
+ let decoder = ArrowDecoder(rb)
+ let testData = try decoder.decode(Int8?.self)
+ for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder()
let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
- int8Builder.append(10, 11, 12)
- stringBuilder.append("test0", "test1", "test2")
+ int8Builder.append(10, 11, 12, 13)
+ stringBuilder.append("test0", "test1", "test2", "test3")
let result = RecordBatch.Builder()
.addColumn("propInt8", arrowArray: try int8Builder.toHolder())
.addColumn("propString", arrowArray: try stringBuilder.toHolder())
@@ -167,4 +190,32 @@ final class CodableTests: XCTestCase {
}
}
+ func testArrowUnkeyedDecoderWithNull() throws {
+ let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder()
+ let stringWNilBuilder = try ArrowArrayBuilders.loadStringArrayBuilder()
+ int8Builder.append(10, 11, 12, 13)
+ stringWNilBuilder.append(nil, "test1", nil, "test3")
+ let resultWNil = RecordBatch.Builder()
+ .addColumn("propInt8", arrowArray: try int8Builder.toHolder())
+ .addColumn("propString", arrowArray: try stringWNilBuilder.toHolder())
+ .finish()
+ switch resultWNil {
+ case .success(let rb):
+ let decoder = ArrowDecoder(rb)
+ let testData = try decoder.decode([Int8: String?].self)
+ var index: Int8 = 0
+ for data in testData {
+ let str = data[10 + index]
+ if index % 2 == 0 {
+ XCTAssertNil(str!)
+ } else {
+ XCTAssertEqual(str, "test\(index)")
+ }
+ index += 1
+ }
+ case .failure(let err):
+ throw err
+ }
+
+ }
}
From 7c15568aa71c1366af5eadb6140fa445f6ce4cd0 Mon Sep 17 00:00:00 2001
From: Hyunseok Seo
Date: Mon, 10 Jun 2024 09:48:05 +0900
Subject: [PATCH 05/11] GH-42042: [Java] Update Unit Tests for Compressions
Module (#42044)
### Rationale for this change
Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`).
### What changes are included in this PR?
- [x] Replacing `org.junit` with `org.junit.jupiter.api`.
- [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports
- [x] Updating annotations such as `@ After`.
- `@ After` -> `@ AfterEach`
- [x] Doing self review
### Are these changes tested?
Yes, existing tests have passed.
### Are there any user-facing changes?
No.
* GitHub Issue: #42042
Authored-by: Hyunseok Seo
Signed-off-by: David Li
---
.../TestArrowReaderWriterWithCompression.java | 59 ++++++++++---------
1 file changed, 31 insertions(+), 28 deletions(-)
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
index af28333746290..24d6abf3cb7c3 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java
@@ -17,6 +17,11 @@
package org.apache.arrow.compression;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.channels.Channels;
@@ -46,9 +51,7 @@
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -67,7 +70,7 @@ public void setup() {
root = null;
}
- @After
+ @AfterEach
public void tearDown() {
if (root != null) {
root.close();
@@ -134,19 +137,19 @@ public void testArrowFileZstdRoundTrip() throws Exception {
try (ArrowFileReader reader =
new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
CommonsCompressionFactory.INSTANCE)) {
- Assertions.assertEquals(1, reader.getRecordBlocks().size());
- Assertions.assertTrue(reader.loadNextBatch());
- Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
- Assertions.assertFalse(reader.loadNextBatch());
+ assertEquals(1, reader.getRecordBlocks().size());
+ assertTrue(reader.loadNextBatch());
+ assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ assertFalse(reader.loadNextBatch());
}
// without compression
try (ArrowFileReader reader =
new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
NoCompressionCodec.Factory.INSTANCE)) {
- Assertions.assertEquals(1, reader.getRecordBlocks().size());
- Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ assertEquals(1, reader.getRecordBlocks().size());
+ Exception exception = assertThrows(IllegalArgumentException.class,
reader::loadNextBatch);
- Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
exception.getMessage());
}
}
@@ -158,17 +161,17 @@ public void testArrowStreamZstdRoundTrip() throws Exception {
try (ArrowStreamReader reader =
new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
CommonsCompressionFactory.INSTANCE)) {
- Assert.assertTrue(reader.loadNextBatch());
- Assert.assertTrue(root.equals(reader.getVectorSchemaRoot()));
- Assert.assertFalse(reader.loadNextBatch());
+ assertTrue(reader.loadNextBatch());
+ assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ assertFalse(reader.loadNextBatch());
}
// without compression
try (ArrowStreamReader reader =
new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
NoCompressionCodec.Factory.INSTANCE)) {
- Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ Exception exception = assertThrows(IllegalArgumentException.class,
reader::loadNextBatch);
- Assert.assertEquals(
+ assertEquals(
"Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
exception.getMessage()
);
@@ -189,19 +192,19 @@ public void testArrowFileZstdRoundTripWithDictionary() throws Exception {
try (ArrowFileReader reader =
new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
CommonsCompressionFactory.INSTANCE)) {
- Assertions.assertEquals(1, reader.getRecordBlocks().size());
- Assertions.assertTrue(reader.loadNextBatch());
- Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
- Assertions.assertFalse(reader.loadNextBatch());
+ assertEquals(1, reader.getRecordBlocks().size());
+ assertTrue(reader.loadNextBatch());
+ assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ assertFalse(reader.loadNextBatch());
}
// without compression
try (ArrowFileReader reader =
new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
NoCompressionCodec.Factory.INSTANCE)) {
- Assertions.assertEquals(1, reader.getRecordBlocks().size());
- Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ assertEquals(1, reader.getRecordBlocks().size());
+ Exception exception = assertThrows(IllegalArgumentException.class,
reader::loadNextBatch);
- Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
exception.getMessage());
}
dictionaryVector.close();
@@ -221,17 +224,17 @@ public void testArrowStreamZstdRoundTripWithDictionary() throws Exception {
try (ArrowStreamReader reader =
new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
CommonsCompressionFactory.INSTANCE)) {
- Assertions.assertTrue(reader.loadNextBatch());
- Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot()));
- Assertions.assertFalse(reader.loadNextBatch());
+ assertTrue(reader.loadNextBatch());
+ assertTrue(root.equals(reader.getVectorSchemaRoot()));
+ assertFalse(reader.loadNextBatch());
}
// without compression
try (ArrowStreamReader reader =
new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator,
NoCompressionCodec.Factory.INSTANCE)) {
- Exception exception = Assert.assertThrows(IllegalArgumentException.class,
+ Exception exception = assertThrows(IllegalArgumentException.class,
reader::loadNextBatch);
- Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
+ assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD",
exception.getMessage());
}
dictionaryVector.close();
From f086b76fdd6bd3693bf3b5c9ac89081772d61e26 Mon Sep 17 00:00:00 2001
From: Vibhatha Lakmal Abeykoon
Date: Mon, 10 Jun 2024 06:26:39 +0530
Subject: [PATCH 06/11] GH-40819: [Java] Adding Spotless to Algorithm module
(#41825)
### Rationale for this change
Adding code style and formatting options for Algorithm module.
### What changes are included in this PR?
Code formatting spotless plugin has been added.
### Are these changes tested?
Yes, but doesn't involve test cases, the plugin itself corrects.
### Are there any user-facing changes?
No
* GitHub Issue: #40819
Lead-authored-by: Vibhatha Abeykoon
Co-authored-by: Vibhatha Lakmal Abeykoon
Co-authored-by: David Li
Signed-off-by: David Li
---
.gitignore | 4 +-
docs/source/developers/java/development.rst | 46 ++-
{.mvn => java/.mvn}/develocity.xml | 0
{.mvn => java/.mvn}/extensions.xml | 0
java/algorithm/pom.xml | 7 +-
.../deduplicate/DeduplicationUtils.java | 16 +-
.../deduplicate/VectorRunDeduplicator.java | 27 +-
.../dictionary/DictionaryBuilder.java | 30 +-
.../dictionary/DictionaryEncoder.java | 11 +-
.../HashTableBasedDictionaryBuilder.java | 34 +--
.../HashTableDictionaryEncoder.java | 69 ++---
.../dictionary/LinearDictionaryEncoder.java | 44 +--
.../dictionary/SearchDictionaryEncoder.java | 41 ++-
.../SearchTreeBasedDictionaryBuilder.java | 46 ++-
.../arrow/algorithm/misc/PartialSumUtils.java | 41 ++-
.../arrow/algorithm/rank/VectorRank.java | 15 +-
.../algorithm/search/ParallelSearcher.java | 187 ++++++------
.../algorithm/search/VectorRangeSearcher.java | 213 +++++++------
.../algorithm/search/VectorSearcher.java | 26 +-
.../sort/CompositeVectorComparator.java | 17 +-
.../sort/DefaultVectorComparators.java | 126 ++++----
.../sort/FixedWidthInPlaceVectorSorter.java | 25 +-
.../FixedWidthOutOfPlaceVectorSorter.java | 35 ++-
.../sort/GeneralOutOfPlaceVectorSorter.java | 20 +-
.../algorithm/sort/InPlaceVectorSorter.java | 7 +-
.../arrow/algorithm/sort/IndexSorter.java | 33 +-
.../arrow/algorithm/sort/InsertionSorter.java | 23 +-
.../arrow/algorithm/sort/OffHeapIntStack.java | 5 +-
.../sort/OutOfPlaceVectorSorter.java | 8 +-
.../sort/StableVectorComparator.java | 13 +-
.../VariableWidthOutOfPlaceVectorSorter.java | 56 ++--
.../algorithm/sort/VectorValueComparator.java | 56 ++--
.../deduplicate/TestDeduplicationUtils.java | 46 +--
.../TestVectorRunDeduplicator.java | 19 +-
.../TestHashTableBasedDictionaryBuilder.java | 62 ++--
.../TestHashTableDictionaryEncoder.java | 72 +++--
.../TestLinearDictionaryEncoder.java | 72 +++--
.../TestSearchDictionaryEncoder.java | 84 ++---
.../TestSearchTreeBasedDictionaryBuilder.java | 90 ++++--
.../algorithm/misc/TestPartialSumUtils.java | 18 +-
.../arrow/algorithm/rank/TestVectorRank.java | 20 +-
.../search/TestParallelSearcher.java | 36 ++-
.../search/TestVectorRangeSearcher.java | 30 +-
.../algorithm/search/TestVectorSearcher.java | 30 +-
.../sort/TestCompositeVectorComparator.java | 18 +-
.../sort/TestDefaultVectorComparator.java | 167 ++++++----
.../TestFixedWidthInPlaceVectorSorter.java | 48 ++-
.../TestFixedWidthOutOfPlaceVectorSorter.java | 69 +++--
.../algorithm/sort/TestFixedWidthSorting.java | 126 +++++---
.../TestGeneralOutOfPlaceVectorSorter.java | 79 ++---
.../arrow/algorithm/sort/TestIndexSorter.java | 31 +-
.../algorithm/sort/TestInsertionSorter.java | 9 +-
.../algorithm/sort/TestOffHeapIntStack.java | 5 +-
.../sort/TestOutOfPlaceVectorSorter.java | 6 +-
.../arrow/algorithm/sort/TestSortingUtil.java | 136 +++++----
.../sort/TestStableVectorComparator.java | 50 +--
...stVariableWidthOutOfPlaceVectorSorter.java | 40 +--
.../sort/TestVariableWidthSorting.java | 44 +--
java/dev/checkstyle/checkstyle-spotless.xml | 286 ++++++++++++++++++
.../asf-java.license} | 0
java/dev/license/asf-xml.license | 11 +
java/maven/pom.xml | 2 +-
java/pom.xml | 22 +-
63 files changed, 1716 insertions(+), 1293 deletions(-)
rename {.mvn => java/.mvn}/develocity.xml (100%)
rename {.mvn => java/.mvn}/extensions.xml (100%)
create mode 100644 java/dev/checkstyle/checkstyle-spotless.xml
rename java/dev/{checkstyle/checkstyle.license => license/asf-java.license} (100%)
create mode 100644 java/dev/license/asf-xml.license
diff --git a/.gitignore b/.gitignore
index 3192069d1ac7a..52ffa6c6124c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -102,8 +102,8 @@ __debug_bin
.envrc
# Develocity
-.mvn/.gradle-enterprise/
-.mvn/.develocity/
+java/.mvn/.gradle-enterprise/
+java/.mvn/.develocity/
# rat
filtered_rat.txt
diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst
index 9f78eccf6c525..dd1839257a30e 100644
--- a/docs/source/developers/java/development.rst
+++ b/docs/source/developers/java/development.rst
@@ -110,7 +110,46 @@ integration tests, you would do:
Code Style
==========
-Java code style is enforced with Checkstyle. The configuration is located at `checkstyle`_.
+The current Java code follows the `Google Java Style`_ with Apache license headers.
+
+Java code style is checked by `Spotless`_ during the build, and the continuous integration build will verify
+that changes adhere to the style guide.
+
+Automatically fixing code style issues
+--------------------------------------
+
+- You can check the style without building the project with ``mvn spotless:check``.
+- You can autoformat the source with ``mvn spotless:apply``.
+
+Example:
+
+.. code-block:: bash
+
+ The following files had format violations:
+ src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
+ @@ -15,7 +15,6 @@
+ ·*·limitations·under·the·License.
+ ·*/
+
+ -
+ package·org.apache.arrow.algorithm.rank;
+
+ import·java.util.stream.IntStream;
+ Run 'mvn spotless:apply' to fix these violations.
+
+Code Formatter for Intellij IDEA and Eclipse
+--------------------------------------------
+
+Follow the instructions to set up google-java-format for:
+
+- `Eclipse`_
+- `IntelliJ`_
+
+
+Checkstyle
+----------
+
+Checkstyle is also used for general linting. The configuration is located at `checkstyle`_.
You can also just check the style without building the project.
This checks the code style of all source code under the current directory or from within an individual module.
@@ -137,7 +176,10 @@ This applies the style to all pom.xml files under the current directory or from
.. _conbench: https://github.com/conbench/conbench
.. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml
.. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention
-
+.. _Spotless: https://github.com/diffplug/spotless
+.. _Google Java Style: https://google.github.io/styleguide/javaguide.html
+.. _Eclipse: https://github.com/google/google-java-format?tab=readme-ov-file#eclipse
+.. _IntelliJ: https://github.com/google/google-java-format?tab=readme-ov-file#intellij-android-studio-and-other-jetbrains-ides
Build Caching
=============
diff --git a/.mvn/develocity.xml b/java/.mvn/develocity.xml
similarity index 100%
rename from .mvn/develocity.xml
rename to java/.mvn/develocity.xml
diff --git a/.mvn/extensions.xml b/java/.mvn/extensions.xml
similarity index 100%
rename from .mvn/extensions.xml
rename to java/.mvn/extensions.xml
diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml
index 0854da48b718a..5984cce766d9e 100644
--- a/java/algorithm/pom.xml
+++ b/java/algorithm/pom.xml
@@ -20,6 +20,11 @@
Arrow Algorithms
(Experimental/Contrib) A collection of algorithms for working with ValueVectors.
+
+ dev/checkstyle/checkstyle-spotless.xml
+ none
+
+
org.apache.arrow
@@ -47,6 +52,4 @@
value-annotations
-
-
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
index 8811e43d3d08d..e9364b2a85b7b 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import org.apache.arrow.memory.ArrowBuf;
@@ -26,18 +25,18 @@
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
-/**
- * Utilities for vector deduplication.
- */
+/** Utilities for vector deduplication. */
class DeduplicationUtils {
/**
* Gets the start positions of the first distinct values in a vector.
+ *
* @param vector the target vector.
* @param runStarts the bit set to hold the start positions.
* @param vector type.
*/
- public static void populateRunStartIndicators(V vector, ArrowBuf runStarts) {
+ public static void populateRunStartIndicators(
+ V vector, ArrowBuf runStarts) {
int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount());
Preconditions.checkArgument(runStarts.capacity() >= bufSize);
runStarts.setZero(0, bufSize);
@@ -55,6 +54,7 @@ public static void populateRunStartIndicators(V vector,
/**
* Gets the run lengths, given the start positions.
+ *
* @param runStarts the bit set for start positions.
* @param runLengths the run length vector to populate.
* @param valueCount the number of values in the bit set.
@@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths,
}
/**
- * Gets distinct values from the input vector by removing adjacent
- * duplicated values.
+ * Gets distinct values from the input vector by removing adjacent duplicated values.
+ *
* @param indicators the bit set containing the start positions of distinct values.
* @param inputVector the input vector.
* @param outputVector the output vector.
* @param vector type.
*/
public static void populateDeduplicatedValues(
- ArrowBuf indicators, V inputVector, V outputVector) {
+ ArrowBuf indicators, V inputVector, V outputVector) {
int dstIdx = 0;
for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) {
if (BitVectorHelper.get(indicators, srcIdx) != 0) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
index 5ef03cbe4a734..4e49de14f5956 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import org.apache.arrow.memory.ArrowBuf;
@@ -26,29 +25,28 @@
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
/**
- * Remove adjacent equal elements from a vector.
- * If the vector is sorted, it removes all duplicated values in the vector.
+ * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated
+ * values in the vector.
+ *
* @param vector type.
*/
public class VectorRunDeduplicator implements AutoCloseable {
/**
- * Bit set for distinct values.
- * If the value at some index is not equal to the previous value,
- * its bit is set to 1, otherwise its bit is set to 0.
+ * Bit set for distinct values. If the value at some index is not equal to the previous value, its
+ * bit is set to 1, otherwise its bit is set to 0.
*/
private ArrowBuf distinctValueBuffer;
- /**
- * The vector to deduplicate.
- */
+ /** The vector to deduplicate. */
private final V vector;
private final BufferAllocator allocator;
/**
* Constructs a vector run deduplicator for a given vector.
- * @param vector the vector to deduplicate. Ownership is NOT taken.
+ *
+ * @param vector the vector to deduplicate. Ownership is NOT taken.
* @param allocator the allocator used for allocating buffers for start indices.
*/
public VectorRunDeduplicator(V vector, BufferAllocator allocator) {
@@ -65,17 +63,20 @@ private void createDistinctValueBuffer() {
/**
* Gets the number of values which are different from their predecessor.
+ *
* @return the run count.
*/
public int getRunCount() {
if (distinctValueBuffer == null) {
createDistinctValueBuffer();
}
- return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
+ return vector.getValueCount()
+ - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount());
}
/**
* Gets the vector with deduplicated adjacent values removed.
+ *
* @param outVector the output vector.
*/
public void populateDeduplicatedValues(V outVector) {
@@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) {
/**
* Gets the length of each distinct value.
+ *
* @param lengthVector the vector for holding length values.
*/
public void populateRunLengths(IntVector lengthVector) {
@@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) {
createDistinctValueBuffer();
}
- DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount());
+ DeduplicationUtils.populateRunLengths(
+ distinctValueBuffer, lengthVector, vector.getValueCount());
}
@Override
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
index 398368d1fc612..88c4e4dc65450 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java
@@ -14,33 +14,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.ValueVector;
/**
- * A dictionary builder is intended for the scenario frequently encountered in practice:
- * the dictionary is not known a priori, so it is generated dynamically.
- * In particular, when a new value arrives, it is tested to check if it is already
- * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary.
- *
- * The dictionary builder is intended to build a single dictionary.
- * So it cannot be used for different dictionaries.
- *
+ * A dictionary builder is intended for the scenario frequently encountered in practice: the
+ * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value
+ * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected,
+ * otherwise, it is added to the dictionary.
+ *
+ * The dictionary builder is intended to build a single dictionary. So it cannot be used for
+ * different dictionaries.
+ *
*
Below gives the sample code for using the dictionary builder
+ *
*
{@code
* DictionaryBuilder dictionaryBuilder = ...
* ...
* dictionaryBuild.addValue(newValue);
* ...
* }
- *
- *
- * With the above code, the dictionary vector will be populated,
- * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method.
- * After that, dictionary encoding can proceed with the populated dictionary..
- *
+ *
+ * With the above code, the dictionary vector will be populated, and it can be retrieved by the
+ * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed
+ * with the populated dictionary..
*
* @param the dictionary vector type.
*/
@@ -58,7 +56,7 @@ public interface DictionaryBuilder {
* Try to add an element from the target vector to the dictionary.
*
* @param targetVector the target vector containing new element.
- * @param targetIndex the index of the new element in the target vector.
+ * @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
*/
int addValue(V targetVector, int targetIndex);
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
index cda7b3bf9540e..16e27c3a23e72 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.BaseIntVector;
@@ -22,8 +21,9 @@
/**
* A dictionary encoder translates one vector into another one based on a dictionary vector.
- * According to Arrow specification, the encoded vector must be an integer based vector, which
- * is the index of the original vector element in the dictionary.
+ * According to Arrow specification, the encoded vector must be an integer based vector, which is
+ * the index of the original vector element in the dictionary.
+ *
* @param type of the encoded vector.
* @param type of the vector to encode. It is also the type of the dictionary vector.
*/
@@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type.
*/
-public class HashTableBasedDictionaryBuilder implements DictionaryBuilder {
+public class HashTableBasedDictionaryBuilder
+ implements DictionaryBuilder {
- /**
- * The dictionary to be built.
- */
+ /** The dictionary to be built. */
private final V dictionary;
- /**
- * If null should be encoded.
- */
+ /** If null should be encoded. */
private final boolean encodeNull;
/**
- * The hash map for distinct dictionary entries.
- * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+ * whereas the value is the index in the dictionary.
*/
private HashMap hashMap = new HashMap<>();
- /**
- * The hasher used for calculating the hash code.
- */
+ /** The hasher used for calculating the hash code. */
private final ArrowBufHasher hasher;
- /**
- * Next pointer to try to add to the hash table.
- */
+ /** Next pointer to try to add to the hash table. */
private ArrowBufPointer nextPointer;
/**
@@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) {
*
* @param dictionary the dictionary to populate.
* @param encodeNull if null values should be added to the dictionary.
- * @param hasher the hasher used to compute the hash code.
+ * @param hasher the hasher used to compute the hash code.
*/
public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) {
this.dictionary = dictionary;
@@ -125,7 +115,7 @@ public int addValues(V targetVector) {
* Try to add an element from the target vector to the dictionary.
*
* @param targetVector the target vector containing new element.
- * @param targetIndex the index of the new element in the target vector.
+ * @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
*/
@Override
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
index bea1a784c3d6a..ac7a7d32bf597 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java
@@ -14,11 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import java.util.HashMap;
-
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.hash.ArrowBufHasher;
import org.apache.arrow.memory.util.hash.SimpleHasher;
@@ -27,43 +25,35 @@
/**
* Dictionary encoder based on hash table.
+ *
* @param encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class HashTableDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding/decoding.
- * It must be sorted.
- */
+ /** The dictionary for encoding/decoding. It must be sorted. */
private final D dictionary;
- /**
- * The hasher used to compute the hash code.
- */
+ /** The hasher used to compute the hash code. */
private final ArrowBufHasher hasher;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
/**
- * The hash map for distinct dictionary entries.
- * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary.
+ * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element,
+ * whereas the value is the index in the dictionary.
*/
private HashMap hashMap = new HashMap<>();
- /**
- * The pointer used to probe each element to encode.
- */
+ /** The pointer used to probe each element to encode. */
private ArrowBufPointer reusablePointer;
/**
* Constructs a dictionary encoder.
- * @param dictionary the dictionary.
*
+ * @param dictionary the dictionary.
*/
public HashTableDictionaryEncoder(D dictionary) {
this(dictionary, false);
@@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) {
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding/decoding.
- *
- * For encoding, when a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
- *
- *
- * For decoding, when a null is encountered in the input,
- * 1) If the flag is set to true, the decoder should never expect a null in the input.
- * 2) If set to false, the decoder simply produces a null in the output.
- *
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding/decoding.
+ * For encoding, when a null is encountered in the input, 1) If the flag is set to true,
+ * the encoder searches for the value in the dictionary, and outputs the index in the
+ * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the
+ * output.
+ * For decoding, when a null is encountered in the input, 1) If the flag is set to true,
+ * the decoder should never expect a null in the input. 2) If set to false, the decoder
+ * simply produces a null in the output.
*/
public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
this(dictionary, encodeNull, SimpleHasher.INSTANCE);
@@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) {
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding.
- * When a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding. When a null is encountered in the
+ * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply
+ * produces a null in the output.
* @param hasher the hasher used to calculate the hash code.
*/
public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) {
@@ -120,12 +107,12 @@ private void buildHashMap() {
}
/**
- * Encodes an input vector by a hash table.
- * So the algorithm takes O(n) time, where n is the length of the input vector.
+ * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the
+ * length of the input vector.
*
- * @param input the input vector.
+ * @param input the input vector.
* @param output the output vector.
- **/
+ */
@Override
public void encode(D input, E output) {
for (int i = 0; i < input.getValueCount(); i++) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
index 84a3a96af8ef1..9aeff22005751 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import org.apache.arrow.vector.BaseIntVector;
@@ -24,20 +23,17 @@
/**
* Dictionary encoder based on linear search.
+ *
* @param encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class LinearDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding.
- */
+ /** The dictionary for encoding. */
private final D dictionary;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
private RangeEqualsVisitor equalizer;
@@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type.
* @param decoded vector type, which is also the dictionary type.
*/
public class SearchDictionaryEncoder
implements DictionaryEncoder {
- /**
- * The dictionary for encoding/decoding.
- * It must be sorted.
- */
+ /** The dictionary for encoding/decoding. It must be sorted. */
private final D dictionary;
- /**
- * The criteria by which the dictionary is sorted.
- */
+ /** The criteria by which the dictionary is sorted. */
private final VectorValueComparator comparator;
- /**
- * A flag indicating if null should be encoded.
- */
+ /** A flag indicating if null should be encoded. */
private final boolean encodeNull;
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary. It must be in sorted order.
* @param comparator the criteria for sorting.
*/
@@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator
/**
* Constructs a dictionary encoder.
+ *
* @param dictionary the dictionary. It must be in sorted order.
* @param comparator the criteria for sorting.
- * @param encodeNull a flag indicating if null should be encoded.
- * It determines the behaviors for processing null values in the input during encoding.
- * When a null is encountered in the input,
- * 1) If the flag is set to true, the encoder searches for the value in the dictionary,
- * and outputs the index in the dictionary.
- * 2) If the flag is set to false, the encoder simply produces a null in the output.
+ * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for
+ * processing null values in the input during encoding. When a null is encountered in the
+ * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary,
+ * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply
+ * produces a null in the output.
*/
- public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) {
+ public SearchDictionaryEncoder(
+ D dictionary, VectorValueComparator comparator, boolean encodeNull) {
this.dictionary = dictionary;
this.comparator = comparator;
this.encodeNull = encodeNull;
}
/**
- * Encodes an input vector by binary search.
- * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector,
- * and m is the length of the dictionary.
+ * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is
+ * the length of the input vector, and m is the length of the dictionary.
+ *
* @param input the input vector.
- * @param output the output vector. Note that it must be in a fresh state. At least,
- * all its validity bits should be clear.
+ * @param output the output vector. Note that it must be in a fresh state. At least, all its
+ * validity bits should be clear.
*/
@Override
public void encode(D input, E output) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
index f9cd77daa2e76..fca7df067dcff 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java
@@ -14,45 +14,36 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import java.util.TreeSet;
-
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
/**
- * This class builds the dictionary based on a binary search tree.
- * Each add operation can be finished in O(log(n)) time,
- * where n is the current dictionary size.
+ * This class builds the dictionary based on a binary search tree. Each add operation can be
+ * finished in O(log(n)) time, where n is the current dictionary size.
*
* @param the dictionary vector type.
*/
-public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder {
+public class SearchTreeBasedDictionaryBuilder
+ implements DictionaryBuilder {
- /**
- * The dictionary to be built.
- */
+ /** The dictionary to be built. */
private final V dictionary;
- /**
- * The criteria for sorting in the search tree.
- */
+ /** The criteria for sorting in the search tree. */
protected final VectorValueComparator comparator;
- /**
- * If null should be encoded.
- */
+ /** If null should be encoded. */
private final boolean encodeNull;
- /**
- * The search tree for storing the value index.
- */
+ /** The search tree for storing the value index. */
private TreeSet searchTree;
/**
* Construct a search tree-based dictionary builder.
+ *
* @param dictionary the dictionary vector.
* @param comparator the criteria for value equality.
*/
@@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c
/**
* Construct a search tree-based dictionary builder.
+ *
* @param dictionary the dictionary vector.
* @param comparator the criteria for value equality.
* @param encodeNull if null values should be added to the dictionary.
*/
- public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) {
+ public SearchTreeBasedDictionaryBuilder(
+ V dictionary, VectorValueComparator comparator, boolean encodeNull) {
this.dictionary = dictionary;
this.comparator = comparator;
this.encodeNull = encodeNull;
@@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c
}
/**
- * Gets the dictionary built.
- * Please note that the dictionary is not in sorted order.
- * Instead, its order is determined by the order of element insertion.
- * To get the dictionary in sorted order, please use
- * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its
+ * order is determined by the order of element insertion. To get the dictionary in sorted order,
+ * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}.
+ *
* @return the dictionary.
*/
@Override
@@ -90,6 +82,7 @@ public V getDictionary() {
/**
* Try to add all values from the target vector to the dictionary.
+ *
* @param targetVector the target vector containing values to probe.
* @return the number of values actually added to the dictionary.
*/
@@ -107,6 +100,7 @@ public int addValues(V targetVector) {
/**
* Try to add an element from the target vector to the dictionary.
+ *
* @param targetVector the target vector containing new element.
* @param targetIndex the index of the new element in the target vector.
* @return the index of the new element in the dictionary.
@@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) {
}
/**
- * Gets the sorted dictionary.
- * Note that given the binary search tree, the sort can finish in O(n).
+ * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in
+ * O(n).
*/
public void populateSortedDictionary(V sortedDictionary) {
int idx = 0;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
index f5e95cf1033f5..5492676af1a2e 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java
@@ -14,26 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.misc;
import org.apache.arrow.vector.BaseIntVector;
-/**
- * Partial sum related utilities.
- */
+/** Partial sum related utilities. */
public class PartialSumUtils {
/**
- * Converts an input vector to a partial sum vector.
- * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}.
- * Suppose we have input vector a and output vector b.
- * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ * Converts an input vector to a partial sum vector. This is an inverse operation of {@link
+ * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a
+ * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...).
+ *
* @param deltaVector the input vector.
* @param partialSumVector the output vector.
* @param sumBase the base of the partial sums.
*/
- public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
+ public static void toPartialSumVector(
+ BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) {
long sum = sumBase;
partialSumVector.setWithPossibleTruncate(0, sumBase);
@@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p
}
/**
- * Converts an input vector to the delta vector.
- * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}.
- * Suppose we have input vector a and output vector b.
- * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ * Converts an input vector to the delta vector. This is an inverse operation of {@link
+ * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input
+ * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...).
+ *
* @param partialSumVector the input vector.
* @param deltaVector the output vector.
*/
@@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d
}
/**
- * Given a value and a partial sum vector, finds its position in the partial sum vector.
- * In particular, given an integer value a and partial sum vector v, we try to find a
- * position i, so that v(i) <= a < v(i + 1).
- * The algorithm is based on binary search, so it takes O(log(n)) time, where n is
- * the length of the partial sum vector.
+ * Given a value and a partial sum vector, finds its position in the partial sum vector. In
+ * particular, given an integer value a and partial sum vector v, we try to find a position i, so
+ * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time,
+ * where n is the length of the partial sum vector.
+ *
* @param partialSumVector the input partial sum vector.
* @param value the value to search.
* @return the position in the partial sum vector, if any, or -1, if none is found.
*/
public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) {
- if (value < partialSumVector.getValueAsLong(0) ||
- value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
+ if (value < partialSumVector.getValueAsLong(0)
+ || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) {
return -1;
}
@@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector,
throw new IllegalStateException("Should never get here");
}
- private PartialSumUtils() {
- }
+ private PartialSumUtils() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
index 43c9a5b010e8c..baa2058ffc51f 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java
@@ -14,11 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.rank;
import java.util.stream.IntStream;
-
import org.apache.arrow.algorithm.sort.IndexSorter;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -28,21 +26,21 @@
/**
* Utility for calculating ranks of vector elements.
+ *
* @param the vector type
*/
public class VectorRank {
private VectorValueComparator comparator;
- /**
- * Vector indices.
- */
+ /** Vector indices. */
private IntVector indices;
private final BufferAllocator allocator;
/**
* Constructs a vector rank utility.
+ *
* @param allocator the allocator to use.
*/
public VectorRank(BufferAllocator allocator) {
@@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) {
}
/**
- * Given a rank r, gets the index of the element that is the rth smallest in the vector.
- * The operation is performed without changing the vector, and takes O(n) time,
- * where n is the length of the vector.
+ * Given a rank r, gets the index of the element that is the rth smallest in the vector. The
+ * operation is performed without changing the vector, and takes O(n) time, where n is the length
+ * of the vector.
+ *
* @param vector the vector from which to get the element index.
* @param comparator the criteria for vector element comparison.
* @param rank the rank to determine.
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
index 6226921b22ed6..6a48019edc3eb 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java
@@ -14,49 +14,40 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
-
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.compare.Range;
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
/**
- * Search for a value in the vector by multiple threads.
- * This is often used in scenarios where the vector is large or
- * low response time is required.
+ * Search for a value in the vector by multiple threads. This is often used in scenarios where the
+ * vector is large or low response time is required.
+ *
* @param the vector type.
*/
public class ParallelSearcher {
- /**
- * The target vector to search.
- */
+ /** The target vector to search. */
private final V vector;
- /**
- * The thread pool.
- */
+ /** The thread pool. */
private final ExecutorService threadPool;
- /**
- * The number of threads to use.
- */
+ /** The number of threads to use. */
private final int numThreads;
- /**
- * The position of the key in the target vector, if any.
- */
+ /** The position of the key in the target vector, if any. */
private volatile int keyPosition = -1;
/**
* Constructs a parallel searcher.
+ *
* @param vector the vector to search.
* @param threadPool the thread pool to use.
* @param numThreads the number of threads to use.
@@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() {
}
/**
- * Search for the key in the target vector. The element-wise comparison is based on
- * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise
- * comparison: equal and un-equal.
+ * Search for the key in the target vector. The element-wise comparison is based on {@link
+ * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal
+ * and un-equal.
+ *
* @param keyVector the vector containing the search key.
* @param keyIndex the index of the search key in the key vector.
- * @return the position of a matched value in the target vector,
- * or -1 if none is found. Please note that if there are multiple
- * matches of the key in the target vector, this method makes no
- * guarantees about which instance is returned.
- * For an alternative search implementation that always finds the first match of the key,
- * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @return the position of a matched value in the target vector, or -1 if none is found. Please
+ * note that if there are multiple matches of the key in the target vector, this method makes
+ * no guarantees about which instance is returned. For an alternative search implementation
+ * that always finds the first match of the key, see {@link
+ * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
* @throws ExecutionException if an exception occurs in a thread.
* @throws InterruptedException if a thread is interrupted.
*/
@@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
final int valueCount = vector.getValueCount();
for (int i = 0; i < numThreads; i++) {
final int tid = i;
- Future> unused = threadPool.submit(() -> {
- // convert to long to avoid overflow
- int start = (int) (((long) valueCount) * tid / numThreads);
- int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
-
- if (start >= end) {
- // no data assigned to this task.
- futures[tid].complete(false);
- return;
- }
-
- RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
- Range range = new Range(0, 0, 1);
- for (int pos = start; pos < end; pos++) {
- if (keyPosition != -1) {
- // the key has been found by another task
- futures[tid].complete(false);
- return;
- }
- range.setLeftStart(pos).setRightStart(keyIndex);
- if (visitor.rangeEquals(range)) {
- keyPosition = pos;
- futures[tid].complete(true);
- return;
- }
- }
-
- // no match value is found.
- futures[tid].complete(false);
- });
+ Future> unused =
+ threadPool.submit(
+ () -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null);
+ Range range = new Range(0, 0, 1);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ range.setLeftStart(pos).setRightStart(keyIndex);
+ if (visitor.rangeEquals(range)) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
}
CompletableFuture.allOf(futures).get();
@@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup
}
/**
- * Search for the key in the target vector. The element-wise comparison is based on
- * {@link VectorValueComparator}, so there are three possible results for each element-wise
- * comparison: less than, equal to and greater than.
+ * Search for the key in the target vector. The element-wise comparison is based on {@link
+ * VectorValueComparator}, so there are three possible results for each element-wise comparison:
+ * less than, equal to and greater than.
+ *
* @param keyVector the vector containing the search key.
* @param keyIndex the index of the search key in the key vector.
* @param comparator the comparator for comparing the key against vector elements.
- * @return the position of a matched value in the target vector,
- * or -1 if none is found. Please note that if there are multiple
- * matches of the key in the target vector, this method makes no
- * guarantees about which instance is returned.
- * For an alternative search implementation that always finds the first match of the key,
- * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
+ * @return the position of a matched value in the target vector, or -1 if none is found. Please
+ * note that if there are multiple matches of the key in the target vector, this method makes
+ * no guarantees about which instance is returned. For an alternative search implementation
+ * that always finds the first match of the key, see {@link
+ * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}.
* @throws ExecutionException if an exception occurs in a thread.
* @throws InterruptedException if a thread is interrupted.
*/
- public int search(
- V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException {
+ public int search(V keyVector, int keyIndex, VectorValueComparator comparator)
+ throws ExecutionException, InterruptedException {
final CompletableFuture[] futures = initSearch();
final int valueCount = vector.getValueCount();
for (int i = 0; i < numThreads; i++) {
final int tid = i;
- Future> unused = threadPool.submit(() -> {
- // convert to long to avoid overflow
- int start = (int) (((long) valueCount) * tid / numThreads);
- int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
-
- if (start >= end) {
- // no data assigned to this task.
- futures[tid].complete(false);
- return;
- }
-
- VectorValueComparator localComparator = comparator.createNew();
- localComparator.attachVectors(vector, keyVector);
- for (int pos = start; pos < end; pos++) {
- if (keyPosition != -1) {
- // the key has been found by another task
- futures[tid].complete(false);
- return;
- }
- if (localComparator.compare(pos, keyIndex) == 0) {
- keyPosition = pos;
- futures[tid].complete(true);
- return;
- }
- }
-
- // no match value is found.
- futures[tid].complete(false);
- });
+ Future> unused =
+ threadPool.submit(
+ () -> {
+ // convert to long to avoid overflow
+ int start = (int) (((long) valueCount) * tid / numThreads);
+ int end = (int) ((long) valueCount) * (tid + 1) / numThreads;
+
+ if (start >= end) {
+ // no data assigned to this task.
+ futures[tid].complete(false);
+ return;
+ }
+
+ VectorValueComparator localComparator = comparator.createNew();
+ localComparator.attachVectors(vector, keyVector);
+ for (int pos = start; pos < end; pos++) {
+ if (keyPosition != -1) {
+ // the key has been found by another task
+ futures[tid].complete(false);
+ return;
+ }
+ if (localComparator.compare(pos, keyIndex) == 0) {
+ keyPosition = pos;
+ futures[tid].complete(true);
+ return;
+ }
+ }
+
+ // no match value is found.
+ futures[tid].complete(false);
+ });
}
CompletableFuture.allOf(futures).get();
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
index 249194843f101..c7905dd8956c8 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java
@@ -1,108 +1,105 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.arrow.algorithm.search;
-
-import org.apache.arrow.algorithm.sort.VectorValueComparator;
-import org.apache.arrow.vector.ValueVector;
-
-/**
- * Search for the range of a particular element in the target vector.
- */
-public class VectorRangeSearcher {
-
- /**
- * Result returned when a search fails.
- */
- public static final int SEARCH_FAIL_RESULT = -1;
-
- /**
- * Search for the first occurrence of an element.
- * The search is based on the binary search algorithm. So the target vector must be sorted.
- * @param targetVector the vector from which to perform the search.
- * @param comparator the criterion for the comparison.
- * @param keyVector the vector containing the element to search.
- * @param keyIndex the index of the search key in the key vector.
- * @param the vector type.
- * @return the index of the first matched element if any, and -1 otherwise.
- */
- public static int getFirstMatch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
- comparator.attachVectors(keyVector, targetVector);
-
- int ret = SEARCH_FAIL_RESULT;
-
- int low = 0;
- int high = targetVector.getValueCount() - 1;
-
- while (low <= high) {
- int mid = low + (high - low) / 2;
- int result = comparator.compare(keyIndex, mid);
- if (result < 0) {
- // the key is smaller
- high = mid - 1;
- } else if (result > 0) {
- // the key is larger
- low = mid + 1;
- } else {
- // an equal element is found
- // continue to go left-ward
- ret = mid;
- high = mid - 1;
- }
- }
- return ret;
- }
-
- /**
- * Search for the last occurrence of an element.
- * The search is based on the binary search algorithm. So the target vector must be sorted.
- * @param targetVector the vector from which to perform the search.
- * @param comparator the criterion for the comparison.
- * @param keyVector the vector containing the element to search.
- * @param keyIndex the index of the search key in the key vector.
- * @param the vector type.
- * @return the index of the last matched element if any, and -1 otherwise.
- */
- public static int getLastMatch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
- comparator.attachVectors(keyVector, targetVector);
-
- int ret = SEARCH_FAIL_RESULT;
-
- int low = 0;
- int high = targetVector.getValueCount() - 1;
-
- while (low <= high) {
- int mid = low + (high - low) / 2;
- int result = comparator.compare(keyIndex, mid);
- if (result < 0) {
- // the key is smaller
- high = mid - 1;
- } else if (result > 0) {
- // the key is larger
- low = mid + 1;
- } else {
- // an equal element is found,
- // continue to go right-ward
- ret = mid;
- low = mid + 1;
- }
- }
- return ret;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.algorithm.search;
+
+import org.apache.arrow.algorithm.sort.VectorValueComparator;
+import org.apache.arrow.vector.ValueVector;
+
+/** Search for the range of a particular element in the target vector. */
+public class VectorRangeSearcher {
+
+ /** Result returned when a search fails. */
+ public static final int SEARCH_FAIL_RESULT = -1;
+
+ /**
+ * Search for the first occurrence of an element. The search is based on the binary search
+ * algorithm. So the target vector must be sorted.
+ *
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param the vector type.
+ * @return the index of the first matched element if any, and -1 otherwise.
+ */
+ public static int getFirstMatch(
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found
+ // continue to go left-ward
+ ret = mid;
+ high = mid - 1;
+ }
+ }
+ return ret;
+ }
+
+ /**
+ * Search for the last occurrence of an element. The search is based on the binary search
+ * algorithm. So the target vector must be sorted.
+ *
+ * @param targetVector the vector from which to perform the search.
+ * @param comparator the criterion for the comparison.
+ * @param keyVector the vector containing the element to search.
+ * @param keyIndex the index of the search key in the key vector.
+ * @param the vector type.
+ * @return the index of the last matched element if any, and -1 otherwise.
+ */
+ public static int getLastMatch(
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ comparator.attachVectors(keyVector, targetVector);
+
+ int ret = SEARCH_FAIL_RESULT;
+
+ int low = 0;
+ int high = targetVector.getValueCount() - 1;
+
+ while (low <= high) {
+ int mid = low + (high - low) / 2;
+ int result = comparator.compare(keyIndex, mid);
+ if (result < 0) {
+ // the key is smaller
+ high = mid - 1;
+ } else if (result > 0) {
+ // the key is larger
+ low = mid + 1;
+ } else {
+ // an equal element is found,
+ // continue to go right-ward
+ ret = mid;
+ low = mid + 1;
+ }
+ }
+ return ret;
+ }
+}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
index 646bca01bb81d..dd0b4de5d8677 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java
@@ -14,25 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.vector.ValueVector;
-/**
- * Search for a particular element in the vector.
- */
+/** Search for a particular element in the vector. */
public final class VectorSearcher {
- /**
- * Result returned when a search fails.
- */
+ /** Result returned when a search fails. */
public static final int SEARCH_FAIL_RESULT = -1;
/**
- * Search for a particular element from the key vector in the target vector by binary search.
- * The target vector must be sorted.
+ * Search for a particular element from the key vector in the target vector by binary search. The
+ * target vector must be sorted.
+ *
* @param targetVector the vector from which to perform the sort.
* @param comparator the criterion for the sort.
* @param keyVector the vector containing the element to search.
@@ -41,7 +37,7 @@ public final class VectorSearcher {
* @return the index of a matched element if any, and -1 otherwise.
*/
public static int binarySearch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
comparator.attachVectors(keyVector, targetVector);
// perform binary search
@@ -63,7 +59,9 @@ public static int binarySearch(
}
/**
- * Search for a particular element from the key vector in the target vector by traversing the vector in sequence.
+ * Search for a particular element from the key vector in the target vector by traversing the
+ * vector in sequence.
+ *
* @param targetVector the vector from which to perform the search.
* @param comparator the criterion for element equality.
* @param keyVector the vector containing the element to search.
@@ -72,7 +70,7 @@ public static int binarySearch(
* @return the index of a matched element if any, and -1 otherwise.
*/
public static int linearSearch(
- V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
+ V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) {
comparator.attachVectors(keyVector, targetVector);
for (int i = 0; i < targetVector.getValueCount(); i++) {
if (comparator.compare(keyIndex, i) == 0) {
@@ -82,7 +80,5 @@ public static int linearSearch(
return SEARCH_FAIL_RESULT;
}
- private VectorSearcher() {
-
- }
+ private VectorSearcher() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
index ec74598e0eebf..77093d87bc489 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java
@@ -14,20 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * A composite vector comparator compares a number of vectors
- * by a number of inner comparators.
- *
- * It works by first using the first comparator, if a non-zero value
- * is returned, it simply returns it. Otherwise, it uses the second comparator,
- * and so on, until a non-zero value is produced, or all inner comparators have
- * been used.
- *
+ * A composite vector comparator compares a number of vectors by a number of inner comparators.
+ *
+ * It works by first using the first comparator, if a non-zero value is returned, it simply
+ * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is
+ * produced, or all inner comparators have been used.
*/
public class CompositeVectorComparator extends VectorValueComparator {
@@ -62,7 +58,8 @@ public int compare(int index1, int index2) {
@Override
public VectorValueComparator createNew() {
- VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length];
+ VectorValueComparator[] newInnerComparators =
+ new VectorValueComparator[innerComparators.length];
for (int i = 0; i < innerComparators.length; i++) {
newInnerComparators[i] = innerComparators[i].createNew();
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
index 588876aa99059..ec650cd9dc88b 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH;
import java.math.BigDecimal;
import java.time.Duration;
-
import org.apache.arrow.memory.util.ArrowBufPointer;
import org.apache.arrow.memory.util.ByteFunctionHelpers;
import org.apache.arrow.vector.BaseFixedWidthVector;
@@ -56,13 +54,12 @@
import org.apache.arrow.vector.complex.RepeatedValueVector;
import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
-/**
- * Default comparator implementations for different types of vectors.
- */
+/** Default comparator implementations for different types of vectors. */
public class DefaultVectorComparators {
/**
* Create the default comparator for the vector.
+ *
* @param vector the vector.
* @param the vector type.
* @return the default comparator.
@@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp
} else if (vector instanceof IntervalDayVector) {
return (VectorValueComparator) new IntervalDayComparator();
} else if (vector instanceof IntervalMonthDayNanoVector) {
- throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ throw new IllegalArgumentException(
+ "No default comparator for " + vector.getClass().getCanonicalName());
} else if (vector instanceof TimeMicroVector) {
return (VectorValueComparator) new TimeMicroComparator();
} else if (vector instanceof TimeMilliVector) {
@@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp
return (VectorValueComparator) new VariableWidthComparator();
} else if (vector instanceof RepeatedValueVector) {
VectorValueComparator> innerComparator =
- createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
+ createDefaultComparator(((RepeatedValueVector) vector).getDataVector());
return new RepeatedValueComparator(innerComparator);
} else if (vector instanceof FixedSizeListVector) {
VectorValueComparator> innerComparator =
@@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp
return (VectorValueComparator) new NullComparator();
}
- throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName());
+ throw new IllegalArgumentException(
+ "No default comparator for " + vector.getClass().getCanonicalName());
}
- /**
- * Default comparator for bytes.
- * The comparison is based on values, with null comes first.
- */
+ /** Default comparator for bytes. The comparison is based on values, with null comes first. */
public static class ByteComparator extends VectorValueComparator {
public ByteComparator() {
@@ -159,8 +155,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for short integers.
- * The comparison is based on values, with null comes first.
+ * Default comparator for short integers. The comparison is based on values, with null comes
+ * first.
*/
public static class ShortComparator extends VectorValueComparator {
@@ -182,8 +178,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for 32-bit integers.
- * The comparison is based on int values, with null comes first.
+ * Default comparator for 32-bit integers. The comparison is based on int values, with null comes
+ * first.
*/
public static class IntComparator extends VectorValueComparator {
@@ -205,8 +201,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for long integers.
- * The comparison is based on values, with null comes first.
+ * Default comparator for long integers. The comparison is based on values, with null comes first.
*/
public static class LongComparator extends VectorValueComparator {
@@ -229,8 +224,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned bytes.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned bytes. The comparison is based on values, with null comes
+ * first.
*/
public static class UInt1Comparator extends VectorValueComparator {
@@ -253,8 +248,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned short integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned short integer. The comparison is based on values, with null
+ * comes first.
*/
public static class UInt2Comparator extends VectorValueComparator {
@@ -280,8 +275,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned integer. The comparison is based on values, with null comes
+ * first.
*/
public static class UInt4Comparator extends VectorValueComparator {
@@ -303,8 +298,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for unsigned long integer.
- * The comparison is based on values, with null comes first.
+ * Default comparator for unsigned long integer. The comparison is based on values, with null
+ * comes first.
*/
public static class UInt8Comparator extends VectorValueComparator {
@@ -326,8 +321,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for float type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for float type. The comparison is based on values, with null comes first.
*/
public static class Float4Comparator extends VectorValueComparator {
@@ -363,8 +357,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for double type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for double type. The comparison is based on values, with null comes first.
*/
public static class Float8Comparator extends VectorValueComparator {
@@ -399,10 +392,7 @@ public VectorValueComparator createNew() {
}
}
- /**
- * Default comparator for bit type.
- * The comparison is based on values, with null comes first.
- */
+ /** Default comparator for bit type. The comparison is based on values, with null comes first. */
public static class BitComparator extends VectorValueComparator {
public BitComparator() {
@@ -424,8 +414,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for DateDay type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for DateDay type. The comparison is based on values, with null comes first.
*/
public static class DateDayComparator extends VectorValueComparator {
@@ -447,8 +436,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for DateMilli type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for DateMilli type. The comparison is based on values, with null comes
+ * first.
*/
public static class DateMilliComparator extends VectorValueComparator {
@@ -471,8 +460,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Decimal256 type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Decimal256 type. The comparison is based on values, with null comes
+ * first.
*/
public static class Decimal256Comparator extends VectorValueComparator {
@@ -495,8 +484,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Decimal type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Decimal type. The comparison is based on values, with null comes first.
*/
public static class DecimalComparator extends VectorValueComparator {
@@ -519,8 +507,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for Duration type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for Duration type. The comparison is based on values, with null comes first.
*/
public static class DurationComparator extends VectorValueComparator {
@@ -543,8 +530,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for IntervalDay type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for IntervalDay type. The comparison is based on values, with null comes
+ * first.
*/
public static class IntervalDayComparator extends VectorValueComparator {
@@ -567,8 +554,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeMicro type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeMicro type. The comparison is based on values, with null comes
+ * first.
*/
public static class TimeMicroComparator extends VectorValueComparator {
@@ -591,8 +578,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeMilli type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeMilli type. The comparison is based on values, with null comes
+ * first.
*/
public static class TimeMilliComparator extends VectorValueComparator {
@@ -615,8 +602,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeNano type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeNano type. The comparison is based on values, with null comes first.
*/
public static class TimeNanoComparator extends VectorValueComparator {
@@ -639,8 +625,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeSec type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeSec type. The comparison is based on values, with null comes first.
*/
public static class TimeSecComparator extends VectorValueComparator {
@@ -663,8 +648,7 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for TimeSec type.
- * The comparison is based on values, with null comes first.
+ * Default comparator for TimeSec type. The comparison is based on values, with null comes first.
*/
public static class TimeStampComparator extends VectorValueComparator {
@@ -687,10 +671,11 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}.
- * The comparison is in lexicographic order, with null comes first.
+ * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is
+ * in lexicographic order, with null comes first.
*/
- public static class FixedSizeBinaryComparator extends VectorValueComparator {
+ public static class FixedSizeBinaryComparator
+ extends VectorValueComparator {
@Override
public int compare(int index1, int index2) {
@@ -720,9 +705,7 @@ public VectorValueComparator createNew() {
}
}
- /**
- * Default comparator for {@link org.apache.arrow.vector.NullVector}.
- */
+ /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */
public static class NullComparator extends VectorValueComparator {
@Override
public int compare(int index1, int index2) {
@@ -742,8 +725,8 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}.
- * The comparison is in lexicographic order, with null comes first.
+ * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is
+ * in lexicographic order, with null comes first.
*/
public static class VariableWidthComparator extends VectorValueComparator {
@@ -772,12 +755,13 @@ public VectorValueComparator createNew() {
}
/**
- * Default comparator for {@link RepeatedValueVector}.
- * It works by comparing the underlying vector in a lexicographic order.
+ * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector
+ * in a lexicographic order.
+ *
* @param inner vector type.
*/
public static class RepeatedValueComparator
- extends VectorValueComparator {
+ extends VectorValueComparator {
private final VectorValueComparator innerComparator;
@@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto
}
/**
- * Default comparator for {@link RepeatedValueVector}.
- * It works by comparing the underlying vector in a lexicographic order.
+ * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector
+ * in a lexicographic order.
+ *
* @param inner vector type.
*/
public static class FixedSizeListComparator
@@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto
}
}
- private DefaultVectorComparators() {
- }
+ private DefaultVectorComparators() {}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
index aaa7ba117c3ba..ea2b344a1eabb 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java
@@ -14,20 +14,22 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.BaseFixedWidthVector;
/**
- * Default in-place sorter for fixed-width vectors.
- * It is based on quick-sort, with average time complexity O(n*log(n)).
+ * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time
+ * complexity O(n*log(n)).
+ *
* @param vector type.
*/
-public class FixedWidthInPlaceVectorSorter implements InPlaceVectorSorter {
+public class FixedWidthInPlaceVectorSorter
+ implements InPlaceVectorSorter {
/**
- * If the number of items is smaller than this threshold, we will use another algorithm to sort the data.
+ * If the number of items is smaller than this threshold, we will use another algorithm to sort
+ * the data.
*/
public static final int CHANGE_ALGORITHM_THRESHOLD = 15;
@@ -35,15 +37,10 @@ public class FixedWidthInPlaceVectorSorter imple
VectorValueComparator comparator;
- /**
- * The vector to sort.
- */
+ /** The vector to sort. */
V vec;
- /**
- * The buffer to hold the pivot.
- * It always has length 1.
- */
+ /** The buffer to hold the pivot. It always has length 1. */
V pivotBuffer;
@Override
@@ -99,9 +96,7 @@ private void quickSort() {
}
}
- /**
- * Select the pivot as the median of 3 samples.
- */
+ /** Select the pivot as the median of 3 samples. */
void choosePivot(int low, int high) {
// we need at least 3 items
if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
index 05a4585792dc2..817e890a5abe1 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.memory.ArrowBuf;
@@ -26,18 +25,21 @@
import org.apache.arrow.vector.IntVector;
/**
- * Default out-of-place sorter for fixed-width vectors.
- * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time
+ * complexity O(n*log(n)).
+ *
* @param vector type.
*/
-public class FixedWidthOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter {
+public class FixedWidthOutOfPlaceVectorSorter
+ implements OutOfPlaceVectorSorter {
protected IndexSorter indexSorter = new IndexSorter<>();
@Override
public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) {
if (srcVector instanceof BitVector) {
- throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter.");
+ throw new IllegalArgumentException(
+ "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter.");
}
comparator.attachVector(srcVector);
@@ -49,15 +51,18 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
ArrowBuf dstValueBuffer = dstVector.getDataBuffer();
// check buffer size
- Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
- "Not enough capacity for the validity buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
+ Preconditions.checkArgument(
+ dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
+ "Not enough capacity for the validity buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 7) / 8,
+ dstValidityBuffer.capacity());
Preconditions.checkArgument(
dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()),
- "Not enough capacity for the data buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity());
+ "Not enough capacity for the data buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ srcVector.getValueCount() * srcVector.getTypeWidth(),
+ dstValueBuffer.capacity());
// sort value indices
try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
@@ -73,9 +78,9 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
} else {
BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
MemoryUtil.UNSAFE.copyMemory(
- srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth),
- dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth),
- valueWidth);
+ srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth),
+ dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth),
+ valueWidth);
}
}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
index 9ea39f638aebe..18f5e94314f83 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.util.Preconditions;
@@ -22,23 +21,26 @@
import org.apache.arrow.vector.ValueVector;
/**
- * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)).
- * Since it does not make any assumptions about the memory layout of the vector, its performance
- * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}),
- * it should be used in preference.
+ * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it
+ * does not make any assumptions about the memory layout of the vector, its performance can be
+ * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it
+ * should be used in preference.
*
* @param vector type.
*/
-public class GeneralOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter {
+public class GeneralOutOfPlaceVectorSorter
+ implements OutOfPlaceVectorSorter {
@Override
public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) {
comparator.attachVector(srcVector);
// check vector capacity
- Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(),
- "Not enough capacity for the target vector. " +
- "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity());
+ Preconditions.checkArgument(
+ dstVector.getValueCapacity() >= srcVector.getValueCount(),
+ "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s",
+ srcVector.getValueCount(),
+ dstVector.getValueCapacity());
// sort value indices
try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
index 19817fe76b8ec..ba41bb9e4eac7 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java
@@ -14,15 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * Basic interface for sorting a vector in-place.
- * That is, the sorting is performed by modifying the input vector,
- * without creating a new sorted vector.
+ * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the
+ * input vector, without creating a new sorted vector.
*
* @param the vector type.
*/
@@ -30,6 +28,7 @@ public interface InPlaceVectorSorter {
/**
* Sort a vector in-place.
+ *
* @param vec the vector to sort.
* @param comparator the criteria for sort.
*/
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
index 3072717f43123..b8ce3289d2889 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java
@@ -14,39 +14,35 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import java.util.stream.IntStream;
-
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
/**
* Sorter for the indices of a vector.
+ *
* @param vector type.
*/
public class IndexSorter {
/**
- * If the number of items is smaller than this threshold, we will use another algorithm to sort the data.
+ * If the number of items is smaller than this threshold, we will use another algorithm to sort
+ * the data.
*/
public static final int CHANGE_ALGORITHM_THRESHOLD = 15;
- /**
- * Comparator for vector indices.
- */
+ /** Comparator for vector indices. */
private VectorValueComparator comparator;
- /**
- * Vector indices to sort.
- */
+ /** Vector indices to sort. */
private IntVector indices;
/**
- * Sorts indices, by quick-sort. Suppose the vector is denoted by v.
- * After calling this method, the following relations hold:
- * v(indices[0]) <= v(indices[1]) <= ...
+ * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method,
+ * the following relations hold: v(indices[0]) <= v(indices[1]) <= ...
+ *
* @param vector the vector whose indices need to be sorted.
* @param indices the vector for storing the sorted indices.
* @param comparator the comparator to sort indices.
@@ -100,11 +96,9 @@ private void quickSort() {
}
}
- /**
- * Select the pivot as the median of 3 samples.
- */
+ /** Select the pivot as the median of 3 samples. */
static int choosePivot(
- int low, int high, IntVector indices, VectorValueComparator comparator) {
+ int low, int high, IntVector indices, VectorValueComparator comparator) {
// we need at least 3 items
if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) {
return indices.get(low);
@@ -149,8 +143,9 @@ static int choosePivot(
/**
* Partition a range of values in a vector into two parts, with elements in one part smaller than
- * elements from the other part. The partition is based on the element indices, so it does
- * not modify the underlying vector.
+ * elements from the other part. The partition is based on the element indices, so it does not
+ * modify the underlying vector.
+ *
* @param low the lower bound of the range.
* @param high the upper bound of the range.
* @param indices vector element indices.
@@ -159,7 +154,7 @@ static int choosePivot(
* @return the index of the split point.
*/
public static int partition(
- int low, int high, IntVector indices, VectorValueComparator comparator) {
+ int low, int high, IntVector indices, VectorValueComparator comparator) {
int pivotIndex = choosePivot(low, high, indices, comparator);
while (low < high) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
index dc12a5fefdb65..c058636d66d1e 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java
@@ -14,27 +14,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.BaseFixedWidthVector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.ValueVector;
-/**
- * Insertion sorter.
- */
+/** Insertion sorter. */
class InsertionSorter {
/**
* Sorts the range of a vector by insertion sort.
*
- * @param vector the vector to be sorted.
- * @param startIdx the start index of the range (inclusive).
- * @param endIdx the end index of the range (inclusive).
- * @param buffer an extra buffer with capacity 1 to hold the current key.
+ * @param vector the vector to be sorted.
+ * @param startIdx the start index of the range (inclusive).
+ * @param endIdx the end index of the range (inclusive).
+ * @param buffer an extra buffer with capacity 1 to hold the current key.
* @param comparator the criteria for vector element comparison.
- * @param the vector type.
+ * @param the vector type.
*/
static void insertionSort(
V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) {
@@ -53,11 +50,11 @@ static void insertionSort(
/**
* Sorts the range of vector indices by insertion sort.
*
- * @param indices the vector indices.
- * @param startIdx the start index of the range (inclusive).
- * @param endIdx the end index of the range (inclusive).
+ * @param indices the vector indices.
+ * @param startIdx the start index of the range (inclusive).
+ * @param endIdx the end index of the range (inclusive).
* @param comparator the criteria for vector element comparison.
- * @param the vector type.
+ * @param the vector type.
*/
static void insertionSort(
IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) {
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
index df96121f1f8f7..ccb7bea4e2bd3 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java
@@ -14,15 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.IntVector;
-/**
- * An off heap implementation of stack with int elements.
- */
+/** An off heap implementation of stack with int elements. */
class OffHeapIntStack implements AutoCloseable {
private static final int INIT_SIZE = 128;
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
index 41d6dadc49147..b18e9b35d0895 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java
@@ -14,21 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * Basic interface for sorting a vector out-of-place.
- * That is, the sorting is performed on a newly-created vector,
- * and the original vector is not modified.
+ * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a
+ * newly-created vector, and the original vector is not modified.
+ *
* @param the vector type.
*/
public interface OutOfPlaceVectorSorter {
/**
* Sort a vector out-of-place.
+ *
* @param inVec the input vector.
* @param outVec the output vector, which has the same size as the input vector.
* @param comparator the criteria for sort.
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
index 0b0c3bd55b271..3fcfa5f8f215c 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java
@@ -14,17 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.ValueVector;
/**
- * Stable sorter. It compares values like ordinary comparators.
- * However, when values are equal, it breaks ties by the value indices.
- * Therefore, sort algorithms using this comparator always produce
+ * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it
+ * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce
* stable sort results.
+ *
* @param type of the vector.
*/
public class StableVectorComparator extends VectorValueComparator {
@@ -33,6 +32,7 @@ public class StableVectorComparator extends VectorValueCo
/**
* Constructs a stable comparator from a given comparator.
+ *
* @param innerComparator the comparator to convert to stable comparator..
*/
public StableVectorComparator(VectorValueComparator innerComparator) {
@@ -47,8 +47,9 @@ public void attachVector(V vector) {
@Override
public void attachVectors(V vector1, V vector2) {
- Preconditions.checkArgument(vector1 == vector2,
- "Stable comparator only supports comparing values from the same vector");
+ Preconditions.checkArgument(
+ vector1 == vector2,
+ "Stable comparator only supports comparing values from the same vector");
super.attachVectors(vector1, vector2);
innerComparator.attachVectors(vector1, vector2);
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
index 863b07c348ef2..8f58dc0dcee0f 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.memory.ArrowBuf;
@@ -25,12 +24,13 @@
import org.apache.arrow.vector.IntVector;
/**
- * Default sorter for variable-width vectors.
- * It is an out-of-place sort, with time complexity O(n*log(n)).
+ * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity
+ * O(n*log(n)).
+ *
* @param vector type.
*/
public class VariableWidthOutOfPlaceVectorSorter
- implements OutOfPlaceVectorSorter {
+ implements OutOfPlaceVectorSorter {
protected IndexSorter indexSorter = new IndexSorter<>();
@@ -46,20 +46,29 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer();
// check buffer size
- Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
- "Not enough capacity for the validity buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity());
Preconditions.checkArgument(
- dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH),
- "Not enough capacity for the offset buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s",
- (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity());
- long dataSize = srcVector.getOffsetBuffer().getInt(
- srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
+ dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(),
+ "Not enough capacity for the validity buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 7) / 8,
+ dstValidityBuffer.capacity());
+ Preconditions.checkArgument(
+ dstOffsetBuffer.capacity()
+ >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH),
+ "Not enough capacity for the offset buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH,
+ dstOffsetBuffer.capacity());
+ long dataSize =
+ srcVector
+ .getOffsetBuffer()
+ .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
Preconditions.checkArgument(
- dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " +
- "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity());
+ dstValueBuffer.capacity() >= dataSize,
+ "No enough capacity for the data buffer of the dst vector. "
+ + "Expected capacity %s, actual capacity %s",
+ dataSize,
+ dstValueBuffer.capacity());
// sort value indices
try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) {
@@ -77,16 +86,19 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co
BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex);
} else {
BitVectorHelper.setBit(dstValidityBuffer, dstIndex);
- int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
+ int srcOffset =
+ srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH));
int valueLength =
- srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset;
+ srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH))
+ - srcOffset;
MemoryUtil.UNSAFE.copyMemory(
- srcValueBuffer.memoryAddress() + srcOffset,
- dstValueBuffer.memoryAddress() + dstOffset,
- valueLength);
+ srcValueBuffer.memoryAddress() + srcOffset,
+ dstValueBuffer.memoryAddress() + dstOffset,
+ valueLength);
dstOffset += valueLength;
}
- dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset);
+ dstOffsetBuffer.setInt(
+ (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset);
}
}
}
diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
index d2c772ca8a819..0472f04109b1c 100644
--- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
+++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java
@@ -14,54 +14,44 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.sort;
import org.apache.arrow.vector.ValueVector;
/**
- * Compare two values at the given indices in the vectors.
- * This is used for vector sorting.
+ * Compare two values at the given indices in the vectors. This is used for vector sorting.
+ *
* @param type of the vector.
*/
public abstract class VectorValueComparator {
- /**
- * The first vector to compare.
- */
+ /** The first vector to compare. */
protected V vector1;
- /**
- * The second vector to compare.
- */
+ /** The second vector to compare. */
protected V vector2;
- /**
- * Width of the vector value. For variable-length vectors, this value makes no sense.
- */
+ /** Width of the vector value. For variable-length vectors, this value makes no sense. */
protected int valueWidth;
-
private boolean checkNullsOnCompare = true;
/**
- * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot
- * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited
- * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc.
+ * This value is true by default and re-computed when vectors are attached to the comparator. If
+ * both vectors cannot contain nulls then this value is {@code false} and calls to {@code
+ * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up
+ * comparisons resulting in faster sorts etc.
*/
public boolean checkNullsOnCompare() {
return this.checkNullsOnCompare;
}
- /**
- * Constructor for variable-width vectors.
- */
- protected VectorValueComparator() {
-
- }
+ /** Constructor for variable-width vectors. */
+ protected VectorValueComparator() {}
/**
* Constructor for fixed-width vectors.
+ *
* @param valueWidth the record width (in bytes).
*/
protected VectorValueComparator(int valueWidth) {
@@ -74,6 +64,7 @@ public int getValueWidth() {
/**
* Attach both vectors to compare to the same input vector.
+ *
* @param vector the vector to attach.
*/
public void attachVector(V vector) {
@@ -82,6 +73,7 @@ public void attachVector(V vector) {
/**
* Attach vectors to compare.
+ *
* @param vector1 the first vector to compare.
* @param vector2 the second vector to compare.
*/
@@ -99,7 +91,7 @@ private boolean mayHaveNulls(V v) {
if (v.getValueCount() == 0) {
return true;
}
- if (! v.getField().isNullable()) {
+ if (!v.getField().isNullable()) {
return false;
}
return v.getNullCount() > 0;
@@ -107,11 +99,11 @@ private boolean mayHaveNulls(V v) {
/**
* Compare two values, given their indices.
+ *
* @param index1 index of the first value to compare.
* @param index2 index of the second value to compare.
- * @return an integer greater than 0, if the first value is greater;
- * an integer smaller than 0, if the first value is smaller; or 0, if both
- * values are equal.
+ * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if
+ * the first value is smaller; or 0, if both values are equal.
*/
public int compare(int index1, int index2) {
if (checkNullsOnCompare) {
@@ -133,19 +125,19 @@ public int compare(int index1, int index2) {
}
/**
- * Compare two values, given their indices.
- * This is a fast path for comparing non-null values, so the caller
- * must make sure that values at both indices are not null.
+ * Compare two values, given their indices. This is a fast path for comparing non-null values, so
+ * the caller must make sure that values at both indices are not null.
+ *
* @param index1 index of the first value to compare.
* @param index2 index of the second value to compare.
- * @return an integer greater than 0, if the first value is greater;
- * an integer smaller than 0, if the first value is smaller; or 0, if both
- * values are equal.
+ * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if
+ * the first value is smaller; or 0, if both values are equal.
*/
public abstract int compareNotNull(int index1, int index2);
/**
* Creates a comparator of the same type.
+ *
* @return the newly created comparator.
*/
public abstract VectorValueComparator createNew();
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
index ac083b84f1611..537189013a731 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import java.nio.charset.StandardCharsets;
-
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
@@ -33,9 +31,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link DeduplicationUtils}.
- */
+/** Test cases for {@link DeduplicationUtils}. */
public class TestDeduplicationUtils {
private static final int VECTOR_LENGTH = 100;
@@ -57,10 +53,11 @@ public void shutdown() {
@Test
public void testDeduplicateFixedWidth() {
try (IntVector origVec = new IntVector("original vec", allocator);
- IntVector dedupVec = new IntVector("deduplicated vec", allocator);
- IntVector lengthVec = new IntVector("length vec", allocator);
- ArrowBuf distinctBuf = allocator.buffer(
- DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
+ IntVector dedupVec = new IntVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ ArrowBuf distinctBuf =
+ allocator.buffer(
+ DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
@@ -73,9 +70,10 @@ public void testDeduplicateFixedWidth() {
}
DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf);
- assertEquals( VECTOR_LENGTH,
- VECTOR_LENGTH * REPETITION_COUNT -
- BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
+ assertEquals(
+ VECTOR_LENGTH,
+ VECTOR_LENGTH * REPETITION_COUNT
+ - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec);
assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
@@ -84,7 +82,8 @@ public void testDeduplicateFixedWidth() {
assertEquals(i, dedupVec.get(i));
}
- DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
+ DeduplicationUtils.populateRunLengths(
+ distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
@@ -96,12 +95,12 @@ public void testDeduplicateFixedWidth() {
@Test
public void testDeduplicateVariableWidth() {
try (VarCharVector origVec = new VarCharVector("original vec", allocator);
- VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
- IntVector lengthVec = new IntVector("length vec", allocator);
- ArrowBuf distinctBuf = allocator.buffer(
- DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
- origVec.allocateNew(
- VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
+ VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ ArrowBuf distinctBuf =
+ allocator.buffer(
+ DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) {
+ origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
@@ -114,9 +113,10 @@ public void testDeduplicateVariableWidth() {
}
DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf);
- assertEquals(VECTOR_LENGTH,
- VECTOR_LENGTH * REPETITION_COUNT -
- BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
+ assertEquals(
+ VECTOR_LENGTH,
+ VECTOR_LENGTH * REPETITION_COUNT
+ - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT));
DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec);
assertEquals(VECTOR_LENGTH, dedupVec.getValueCount());
@@ -126,7 +126,7 @@ public void testDeduplicateVariableWidth() {
}
DeduplicationUtils.populateRunLengths(
- distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
+ distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT);
assertEquals(VECTOR_LENGTH, lengthVec.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
index 788213b162870..820cadccae537 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.deduplicate;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import java.nio.charset.StandardCharsets;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -30,9 +28,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link VectorRunDeduplicator}.
- */
+/** Test cases for {@link VectorRunDeduplicator}. */
public class TestVectorRunDeduplicator {
private static final int VECTOR_LENGTH = 100;
@@ -57,7 +53,7 @@ public void testDeduplicateFixedWidth() {
IntVector dedupVec = new IntVector("deduplicated vec", allocator);
IntVector lengthVec = new IntVector("length vec", allocator);
VectorRunDeduplicator deduplicator =
- new VectorRunDeduplicator<>(origVec, allocator)) {
+ new VectorRunDeduplicator<>(origVec, allocator)) {
origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
@@ -93,12 +89,11 @@ public void testDeduplicateFixedWidth() {
@Test
public void testDeduplicateVariableWidth() {
try (VarCharVector origVec = new VarCharVector("original vec", allocator);
- VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
- IntVector lengthVec = new IntVector("length vec", allocator);
- VectorRunDeduplicator deduplicator =
- new VectorRunDeduplicator<>(origVec, allocator)) {
- origVec.allocateNew(
- VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
+ VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator);
+ IntVector lengthVec = new IntVector("length vec", allocator);
+ VectorRunDeduplicator deduplicator =
+ new VectorRunDeduplicator<>(origVec, allocator)) {
+ origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT);
origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT);
lengthVec.allocateNew();
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
index 45c47626b720e..bfda86f26883d 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -23,7 +22,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Objects;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -32,9 +30,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link HashTableBasedDictionaryBuilder}.
- */
+/** Test cases for {@link HashTableBasedDictionaryBuilder}. */
public class TestHashTableBasedDictionaryBuilder {
private BufferAllocator allocator;
@@ -52,7 +48,7 @@ public void shutdown() {
@Test
public void testBuildVariableWidthDictionaryWithNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -72,27 +68,34 @@ public void testBuildVariableWidthDictionaryWithNull() {
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, true);
+ new HashTableBasedDictionaryBuilder<>(dictionary, true);
int result = dictionaryBuilder.addValues(vec);
assertEquals(7, result);
assertEquals(7, dictionary.getValueCount());
- assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
assertNull(dictionary.get(2));
- assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildVariableWidthDictionaryWithoutNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -112,27 +115,33 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, false);
+ new HashTableBasedDictionaryBuilder<>(dictionary, false);
int result = dictionaryBuilder.addValues(vec);
assertEquals(6, result);
assertEquals(6, dictionary.getValueCount());
- assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
- assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8));
- assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
-
+ assertEquals(
+ "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildFixedWidthDictionaryWithNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -151,7 +160,7 @@ public void testBuildFixedWidthDictionaryWithNull() {
vec.setNull(9);
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, true);
+ new HashTableBasedDictionaryBuilder<>(dictionary, true);
int result = dictionaryBuilder.addValues(vec);
@@ -169,7 +178,7 @@ public void testBuildFixedWidthDictionaryWithNull() {
@Test
public void testBuildFixedWidthDictionaryWithoutNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -188,7 +197,7 @@ public void testBuildFixedWidthDictionaryWithoutNull() {
vec.setNull(9);
HashTableBasedDictionaryBuilder dictionaryBuilder =
- new HashTableBasedDictionaryBuilder<>(dictionary, false);
+ new HashTableBasedDictionaryBuilder<>(dictionary, false);
int result = dictionaryBuilder.addValues(vec);
@@ -199,7 +208,6 @@ public void testBuildFixedWidthDictionaryWithoutNull() {
assertEquals(8, dictionary.get(1));
assertEquals(32, dictionary.get(2));
assertEquals(16, dictionary.get(3));
-
}
}
}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
index 60efbf58bebda..b9646284a015b 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -25,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -38,9 +36,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link HashTableDictionaryEncoder}.
- */
+/** Test cases for {@link HashTableDictionaryEncoder}. */
public class TestHashTableDictionaryEncoder {
private final int VECTOR_LENGTH = 50;
@@ -53,7 +49,7 @@ public class TestHashTableDictionaryEncoder {
byte[] one = "111".getBytes(StandardCharsets.UTF_8);
byte[] two = "222".getBytes(StandardCharsets.UTF_8);
- byte[][] data = new byte[][]{zero, one, two};
+ byte[][] data = new byte[][] {zero, one, two};
@Before
public void prepare() {
@@ -69,8 +65,8 @@ public void shutdown() {
public void testEncodeAndDecode() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -89,7 +85,7 @@ public void testEncodeAndDecode() {
rawVector.setValueCount(VECTOR_LENGTH);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionary, false);
+ new HashTableDictionaryEncoder<>(dictionary, false);
// perform encoding
encodedVector.allocateNew();
@@ -98,17 +94,21 @@ public void testEncodeAndDecode() {
// verify encoding results
assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -119,8 +119,8 @@ public void testEncodeAndDecode() {
public void testEncodeAndDecodeWithNull() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -144,7 +144,7 @@ public void testEncodeAndDecodeWithNull() {
rawVector.setValueCount(VECTOR_LENGTH);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionary, true);
+ new HashTableDictionaryEncoder<>(dictionary, true);
// perform encoding
encodedVector.allocateNew();
@@ -156,20 +156,24 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertEquals(0, encodedVector.get(i));
} else {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
if (i % 10 == 0) {
assertTrue(decodedVector.isNull(i));
} else {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -180,8 +184,8 @@ public void testEncodeAndDecodeWithNull() {
@Test
public void testEncodeNullWithoutNullInDictionary() {
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary, with no null in it.
dictionary.allocateNew();
@@ -199,13 +203,15 @@ public void testEncodeNullWithoutNullInDictionary() {
encodedVector.allocateNew();
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionary, true);
+ new HashTableDictionaryEncoder<>(dictionary, true);
// the encoder should encode null, but no null in the dictionary,
// so an exception should be thrown.
- assertThrows(IllegalArgumentException.class, () -> {
- encoder.encode(rawVector, encodedVector);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
}
}
@@ -213,8 +219,8 @@ public void testEncodeNullWithoutNullInDictionary() {
public void testEncodeStrings() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew(512, 5);
encoded.allocateNew();
@@ -235,7 +241,7 @@ public void testEncodeStrings() {
dictionaryVector.setValueCount(3);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionaryVector);
+ new HashTableDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
// verify indices
@@ -262,8 +268,8 @@ public void testEncodeStrings() {
public void testEncodeLargeVector() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew();
encoded.allocateNew();
@@ -281,7 +287,7 @@ public void testEncodeLargeVector() {
dictionaryVector.setValueCount(3);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionaryVector);
+ new HashTableDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(count, encoded.getValueCount());
@@ -305,8 +311,8 @@ public void testEncodeLargeVector() {
public void testEncodeBinaryVector() {
// Create a new value vector
try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
- final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
- final IntVector encoded = new IntVector("encoded", allocator)) {
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
vector.allocateNew(512, 5);
vector.allocateNew();
encoded.allocateNew();
@@ -327,7 +333,7 @@ public void testEncodeBinaryVector() {
dictionaryVector.setValueCount(3);
HashTableDictionaryEncoder encoder =
- new HashTableDictionaryEncoder<>(dictionaryVector);
+ new HashTableDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(5, encoded.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
index a76aedffa308d..a4641704198cb 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -25,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
-
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.IntVector;
@@ -39,9 +37,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link LinearDictionaryEncoder}.
- */
+/** Test cases for {@link LinearDictionaryEncoder}. */
public class TestLinearDictionaryEncoder {
private final int VECTOR_LENGTH = 50;
@@ -54,7 +50,7 @@ public class TestLinearDictionaryEncoder {
byte[] one = "111".getBytes(StandardCharsets.UTF_8);
byte[] two = "222".getBytes(StandardCharsets.UTF_8);
- byte[][] data = new byte[][]{zero, one, two};
+ byte[][] data = new byte[][] {zero, one, two};
@Before
public void prepare() {
@@ -70,8 +66,8 @@ public void shutdown() {
public void testEncodeAndDecode() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -90,7 +86,7 @@ public void testEncodeAndDecode() {
rawVector.setValueCount(VECTOR_LENGTH);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionary, false);
+ new LinearDictionaryEncoder<>(dictionary, false);
// perform encoding
encodedVector.allocateNew();
@@ -99,17 +95,21 @@ public void testEncodeAndDecode() {
// verify encoding results
assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -120,8 +120,8 @@ public void testEncodeAndDecode() {
public void testEncodeAndDecodeWithNull() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -145,7 +145,7 @@ public void testEncodeAndDecodeWithNull() {
rawVector.setValueCount(VECTOR_LENGTH);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionary, true);
+ new LinearDictionaryEncoder<>(dictionary, true);
// perform encoding
encodedVector.allocateNew();
@@ -157,13 +157,16 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertEquals(0, encodedVector.get(i));
} else {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
@@ -171,7 +174,8 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertTrue(decodedVector.isNull(i));
} else {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -182,8 +186,8 @@ public void testEncodeAndDecodeWithNull() {
@Test
public void testEncodeNullWithoutNullInDictionary() {
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary, with no null in it.
dictionary.allocateNew();
@@ -201,13 +205,15 @@ public void testEncodeNullWithoutNullInDictionary() {
encodedVector.allocateNew();
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionary, true);
+ new LinearDictionaryEncoder<>(dictionary, true);
// the encoder should encode null, but no null in the dictionary,
// so an exception should be thrown.
- assertThrows(IllegalArgumentException.class, () -> {
- encoder.encode(rawVector, encodedVector);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
}
}
@@ -215,8 +221,8 @@ public void testEncodeNullWithoutNullInDictionary() {
public void testEncodeStrings() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew(512, 5);
encoded.allocateNew();
@@ -237,7 +243,7 @@ public void testEncodeStrings() {
dictionaryVector.setValueCount(3);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionaryVector);
+ new LinearDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
// verify indices
@@ -263,8 +269,8 @@ public void testEncodeStrings() {
public void testEncodeLargeVector() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew();
encoded.allocateNew();
@@ -282,7 +288,7 @@ public void testEncodeLargeVector() {
dictionaryVector.setValueCount(3);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionaryVector);
+ new LinearDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(count, encoded.getValueCount());
@@ -306,8 +312,8 @@ public void testEncodeLargeVector() {
public void testEncodeBinaryVector() {
// Create a new value vector
try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
- final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
- final IntVector encoded = new IntVector("encoded", allocator)) {
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
vector.allocateNew(512, 5);
vector.allocateNew();
encoded.allocateNew();
@@ -328,7 +334,7 @@ public void testEncodeBinaryVector() {
dictionaryVector.setValueCount(3);
LinearDictionaryEncoder encoder =
- new LinearDictionaryEncoder<>(dictionaryVector);
+ new LinearDictionaryEncoder<>(dictionaryVector);
encoder.encode(vector, encoded);
assertEquals(5, encoded.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
index e01c2e7905b46..e783e1f76818c 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static junit.framework.TestCase.assertTrue;
@@ -25,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
@@ -40,9 +38,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link SearchDictionaryEncoder}.
- */
+/** Test cases for {@link SearchDictionaryEncoder}. */
public class TestSearchDictionaryEncoder {
private final int VECTOR_LENGTH = 50;
@@ -55,7 +51,7 @@ public class TestSearchDictionaryEncoder {
byte[] one = "111".getBytes(StandardCharsets.UTF_8);
byte[] two = "222".getBytes(StandardCharsets.UTF_8);
- byte[][] data = new byte[][]{zero, one, two};
+ byte[][] data = new byte[][] {zero, one, two};
@Before
public void prepare() {
@@ -71,8 +67,8 @@ public void shutdown() {
public void testEncodeAndDecode() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -91,8 +87,8 @@ public void testEncodeAndDecode() {
rawVector.setValueCount(VECTOR_LENGTH);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false);
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false);
// perform encoding
encodedVector.allocateNew();
@@ -101,17 +97,21 @@ public void testEncodeAndDecode() {
// verify encoding results
assertEquals(rawVector.getValueCount(), encodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
for (int i = 0; i < VECTOR_LENGTH; i++) {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -122,8 +122,8 @@ public void testEncodeAndDecode() {
public void testEncodeAndDecodeWithNull() {
Random random = new Random();
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary
dictionary.allocateNew();
@@ -147,8 +147,8 @@ public void testEncodeAndDecodeWithNull() {
rawVector.setValueCount(VECTOR_LENGTH);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
// perform encoding
encodedVector.allocateNew();
@@ -160,13 +160,16 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertEquals(0, encodedVector.get(i));
} else {
- assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
+ assertArrayEquals(
+ rawVector.get(i),
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8));
}
}
// perform decoding
Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null));
- try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
+ try (VarCharVector decodedVector =
+ (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) {
// verify decoding results
assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount());
@@ -174,7 +177,8 @@ public void testEncodeAndDecodeWithNull() {
if (i % 10 == 0) {
assertTrue(decodedVector.isNull(i));
} else {
- assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
+ assertArrayEquals(
+ String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8),
decodedVector.get(i));
}
}
@@ -185,8 +189,8 @@ public void testEncodeAndDecodeWithNull() {
@Test
public void testEncodeNullWithoutNullInDictionary() {
try (VarCharVector rawVector = new VarCharVector("original vector", allocator);
- IntVector encodedVector = new IntVector("encoded vector", allocator);
- VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
+ IntVector encodedVector = new IntVector("encoded vector", allocator);
+ VarCharVector dictionary = new VarCharVector("dictionary", allocator)) {
// set up dictionary, with no null in it.
dictionary.allocateNew();
@@ -204,14 +208,16 @@ public void testEncodeNullWithoutNullInDictionary() {
encodedVector.allocateNew();
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
+ new SearchDictionaryEncoder<>(
+ dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true);
// the encoder should encode null, but no null in the dictionary,
// so an exception should be thrown.
- assertThrows(IllegalArgumentException.class, () -> {
- encoder.encode(rawVector, encodedVector);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ encoder.encode(rawVector, encodedVector);
+ });
}
}
@@ -219,8 +225,8 @@ public void testEncodeNullWithoutNullInDictionary() {
public void testEncodeStrings() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew(512, 5);
encoded.allocateNew();
@@ -241,8 +247,8 @@ public void testEncodeStrings() {
dictionaryVector.setValueCount(3);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
encoder.encode(vector, encoded);
// verify indices
@@ -268,8 +274,8 @@ public void testEncodeStrings() {
public void testEncodeLargeVector() {
// Create a new value vector
try (final VarCharVector vector = new VarCharVector("foo", allocator);
- final IntVector encoded = new IntVector("encoded", allocator);
- final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
+ final IntVector encoded = new IntVector("encoded", allocator);
+ final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) {
vector.allocateNew();
encoded.allocateNew();
@@ -287,8 +293,8 @@ public void testEncodeLargeVector() {
dictionaryVector.setValueCount(3);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
encoder.encode(vector, encoded);
assertEquals(count, encoded.getValueCount());
@@ -312,8 +318,8 @@ public void testEncodeLargeVector() {
public void testEncodeBinaryVector() {
// Create a new value vector
try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator);
- final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
- final IntVector encoded = new IntVector("encoded", allocator)) {
+ final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator);
+ final IntVector encoded = new IntVector("encoded", allocator)) {
vector.allocateNew(512, 5);
vector.allocateNew();
encoded.allocateNew();
@@ -334,8 +340,8 @@ public void testEncodeBinaryVector() {
dictionaryVector.setValueCount(3);
SearchDictionaryEncoder encoder =
- new SearchDictionaryEncoder<>(
- dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
+ new SearchDictionaryEncoder<>(
+ dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector));
encoder.encode(vector, encoded);
assertEquals(5, encoded.getValueCount());
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
index 340b7e67e861f..6c8a57c1a4648 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.dictionary;
import static org.junit.Assert.assertEquals;
@@ -22,7 +21,6 @@
import java.nio.charset.StandardCharsets;
import java.util.Objects;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -33,9 +31,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link SearchTreeBasedDictionaryBuilder}.
- */
+/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */
public class TestSearchTreeBasedDictionaryBuilder {
private BufferAllocator allocator;
@@ -53,8 +49,8 @@ public void shutdown() {
@Test
public void testBuildVariableWidthDictionaryWithNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator);
- VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator);
+ VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -74,9 +70,10 @@ public void testBuildVariableWidthDictionaryWithNull() {
vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
int result = dictionaryBuilder.addValues(vec);
@@ -86,20 +83,32 @@ public void testBuildVariableWidthDictionaryWithNull() {
dictionaryBuilder.populateSortedDictionary(sortedDictionary);
assertTrue(sortedDictionary.isNull(0));
- assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
- assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12",
+ new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc",
+ new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good",
+ new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "hello",
+ new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world",
+ new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildVariableWidthDictionaryWithoutNull() {
try (VarCharVector vec = new VarCharVector("", allocator);
- VarCharVector dictionary = new VarCharVector("", allocator);
- VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
+ VarCharVector dictionary = new VarCharVector("", allocator);
+ VarCharVector sortedDictionary = new VarCharVector("", allocator)) {
vec.allocateNew(100, 10);
vec.setValueCount(10);
@@ -119,9 +128,10 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
vec.set(8, "good".getBytes(StandardCharsets.UTF_8));
vec.set(9, "abc".getBytes(StandardCharsets.UTF_8));
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
int result = dictionaryBuilder.addValues(vec);
@@ -130,20 +140,32 @@ public void testBuildVariableWidthDictionaryWithoutNull() {
dictionaryBuilder.populateSortedDictionary(sortedDictionary);
- assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8));
- assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
- assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
- assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
- assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
- assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
+ assertEquals(
+ "12",
+ new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8));
+ assertEquals(
+ "abc",
+ new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8));
+ assertEquals(
+ "dictionary",
+ new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8));
+ assertEquals(
+ "good",
+ new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8));
+ assertEquals(
+ "hello",
+ new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8));
+ assertEquals(
+ "world",
+ new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8));
}
}
@Test
public void testBuildFixedWidthDictionaryWithNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator);
- IntVector sortedDictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator);
+ IntVector sortedDictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -162,9 +184,10 @@ public void testBuildFixedWidthDictionaryWithNull() {
vec.set(8, 4);
vec.setNull(9);
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true);
int result = dictionaryBuilder.addValues(vec);
@@ -184,8 +207,8 @@ public void testBuildFixedWidthDictionaryWithNull() {
@Test
public void testBuildFixedWidthDictionaryWithoutNull() {
try (IntVector vec = new IntVector("", allocator);
- IntVector dictionary = new IntVector("", allocator);
- IntVector sortedDictionary = new IntVector("", allocator)) {
+ IntVector dictionary = new IntVector("", allocator);
+ IntVector sortedDictionary = new IntVector("", allocator)) {
vec.allocateNew(10);
vec.setValueCount(10);
@@ -204,9 +227,10 @@ public void testBuildFixedWidthDictionaryWithoutNull() {
vec.set(8, 4);
vec.setNull(9);
- VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec);
+ VectorValueComparator comparator =
+ DefaultVectorComparators.createDefaultComparator(vec);
SearchTreeBasedDictionaryBuilder dictionaryBuilder =
- new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
+ new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false);
int result = dictionaryBuilder.addValues(vec);
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
index 630dd80b44084..e3ab981670e9e 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.misc;
import static org.junit.Assert.assertEquals;
@@ -26,9 +25,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link PartialSumUtils}.
- */
+/** Test cases for {@link PartialSumUtils}. */
public class TestPartialSumUtils {
private static final int PARTIAL_SUM_VECTOR_LENGTH = 101;
@@ -50,7 +47,7 @@ public void shutdown() {
@Test
public void testToPartialSumVector() {
try (IntVector delta = new IntVector("delta", allocator);
- IntVector partialSum = new IntVector("partial sum", allocator)) {
+ IntVector partialSum = new IntVector("partial sum", allocator)) {
delta.allocateNew(DELTA_VECTOR_LENGTH);
delta.setValueCount(DELTA_VECTOR_LENGTH);
@@ -75,7 +72,7 @@ public void testToPartialSumVector() {
@Test
public void testToDeltaVector() {
try (IntVector partialSum = new IntVector("partial sum", allocator);
- IntVector delta = new IntVector("delta", allocator)) {
+ IntVector delta = new IntVector("delta", allocator)) {
partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH);
partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH);
@@ -111,7 +108,8 @@ public void testFindPositionInPartialSumVector() {
// search and verify results
for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) {
- assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1));
+ assertEquals(
+ i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1));
}
}
}
@@ -131,8 +129,10 @@ public void testFindPositionInPartialSumVectorNegative() {
// search and verify results
assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase));
assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1));
- assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum,
- sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1)));
+ assertEquals(
+ -1,
+ PartialSumUtils.findPositionInPartialSumVector(
+ partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1)));
}
}
}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
index 0e6627eb4822a..4b7c6a9756780 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.rank;
import static org.junit.Assert.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.nio.charset.StandardCharsets;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -32,9 +30,7 @@
import org.junit.Before;
import org.junit.Test;
-/**
- * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}.
- */
+/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */
public class TestVectorRank {
private BufferAllocator allocator;
@@ -70,7 +66,7 @@ public void testFixedWidthRank() {
vector.set(9, 6);
VectorValueComparator comparator =
- DefaultVectorComparators.createDefaultComparator(vector);
+ DefaultVectorComparators.createDefaultComparator(vector);
assertEquals(7, rank.indexAtRank(vector, comparator, 0));
assertEquals(0, rank.indexAtRank(vector, comparator, 1));
assertEquals(6, rank.indexAtRank(vector, comparator, 2));
@@ -103,7 +99,7 @@ public void testVariableWidthRank() {
vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8));
VectorValueComparator comparator =
- DefaultVectorComparators.createDefaultComparator(vector);
+ DefaultVectorComparators.createDefaultComparator(vector);
assertEquals(7, rank.indexAtRank(vector, comparator, 0));
assertEquals(0, rank.indexAtRank(vector, comparator, 1));
@@ -137,11 +133,13 @@ public void testRankNegative() {
vector.set(9, 6);
VectorValueComparator comparator =
- DefaultVectorComparators.createDefaultComparator(vector);
+ DefaultVectorComparators.createDefaultComparator(vector);
- assertThrows(IllegalArgumentException.class, () -> {
- rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1);
- });
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1);
+ });
}
}
}
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
index 9ccecfa84a73a..7ff86a743effd 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -26,7 +25,6 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -39,9 +37,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
-/**
- * Test cases for {@link ParallelSearcher}.
- */
+/** Test cases for {@link ParallelSearcher}. */
@RunWith(Parameterized.class)
public class TestParallelSearcher {
@@ -97,8 +93,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
keyVector.allocateNew(VECTOR_LENGTH);
// if we are comparing elements using equality semantics, we do not need a comparator here.
- VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null
- : DefaultVectorComparators.createDefaultComparator(targetVector);
+ VectorValueComparator comparator =
+ comparatorType == ComparatorType.EqualityComparator
+ ? null
+ : DefaultVectorComparators.createDefaultComparator(targetVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
targetVector.set(i, i);
@@ -107,9 +105,13 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
targetVector.setValueCount(VECTOR_LENGTH);
keyVector.setValueCount(VECTOR_LENGTH);
- ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
+ ParallelSearcher searcher =
+ new ParallelSearcher<>(targetVector, threadPool, threadCount);
for (int i = 0; i < VECTOR_LENGTH; i++) {
- int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
+ int pos =
+ comparator == null
+ ? searcher.search(keyVector, i)
+ : searcher.search(keyVector, i, comparator);
if (i * 2 < VECTOR_LENGTH) {
assertEquals(i * 2, pos);
} else {
@@ -122,13 +124,15 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept
@Test
public void testParallelStringSearch() throws ExecutionException, InterruptedException {
try (VarCharVector targetVector = new VarCharVector("targetVector", allocator);
- VarCharVector keyVector = new VarCharVector("keyVector", allocator)) {
+ VarCharVector keyVector = new VarCharVector("keyVector", allocator)) {
targetVector.allocateNew(VECTOR_LENGTH);
keyVector.allocateNew(VECTOR_LENGTH);
// if we are comparing elements using equality semantics, we do not need a comparator here.
- VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null
- : DefaultVectorComparators.createDefaultComparator(targetVector);
+ VectorValueComparator comparator =
+ comparatorType == ComparatorType.EqualityComparator
+ ? null
+ : DefaultVectorComparators.createDefaultComparator(targetVector);
for (int i = 0; i < VECTOR_LENGTH; i++) {
targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8));
@@ -137,9 +141,13 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc
targetVector.setValueCount(VECTOR_LENGTH);
keyVector.setValueCount(VECTOR_LENGTH);
- ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount);
+ ParallelSearcher searcher =
+ new ParallelSearcher<>(targetVector, threadPool, threadCount);
for (int i = 0; i < VECTOR_LENGTH; i++) {
- int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator);
+ int pos =
+ comparator == null
+ ? searcher.search(keyVector, i)
+ : searcher.search(keyVector, i, comparator);
if (i * 2 < VECTOR_LENGTH) {
assertEquals(i * 2, pos);
} else {
diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
index 18f4fa0355f4f..39f2f609f7df4 100644
--- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
+++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java
@@ -14,14 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.arrow.algorithm.search;
import static org.junit.Assert.assertEquals;
import java.util.Arrays;
import java.util.Collection;
-
import org.apache.arrow.algorithm.sort.DefaultVectorComparators;
import org.apache.arrow.algorithm.sort.VectorValueComparator;
import org.apache.arrow.memory.BufferAllocator;
@@ -33,9 +31,7 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
-/**
- * Test cases for {@link VectorRangeSearcher}.
- */
+/** Test cases for {@link VectorRangeSearcher}. */
@RunWith(Parameterized.class)
public class TestVectorRangeSearcher {
@@ -78,9 +74,11 @@ public void testGetLowerBounds() {
}
// do search
- VectorValueComparator