Make clippy happy

apache · liurenjie1024 · Aug 22, 2024 · Aug 23, 2024 · Aug 24, 2024 · Aug 26, 2024
commit 0c92b0c66b7d7eaaf585ba1e80157b22e4c635e2
diff --git a/Cargo.toml b/Cargo.toml
@@ -23,7 +23,7 @@ members = [
   "crates/iceberg",
   "crates/integrations/*",
   "crates/test_utils",
-  "crates/sqllogictest",
+  "crates/sqllogictests",
 ]
 exclude = ["bindings/python"]
 
@@ -40,6 +40,7 @@ rust-version = "1.77.1"
 anyhow = "1.0.72"
 apache-avro = "0.17"
 array-init = "2"
+arrow = { version = "52" }
 arrow-arith = { version = "52" }
 arrow-array = { version = "52" }
 arrow-ord = { version = "52" }

diff --git a/crates/iceberg/src/writer/file_writer/track_writer.rs b/crates/iceberg/src/writer/file_writer/track_writer.rs
@@ -42,10 +42,9 @@ impl TrackWriter {
 impl FileWrite for TrackWriter {
     async fn write(&mut self, bs: Bytes) -> Result<()> {
         let size = bs.len();
-        self.inner.write(bs).await.map(|v| {
+        self.inner.write(bs).await.inspect(|_| {
             self.written_size
                 .fetch_add(size as i64, std::sync::atomic::Ordering::Relaxed);
-            v
         })
     }
 

diff --git a/crates/sqllogictest/src/error.rs b/crates/sqllogictest/src/error.rs
diff --git a/crates/sqllogictest/Cargo.toml → crates/sqllogictests/Cargo.toml b/crates/sqllogictest/Cargo.toml → crates/sqllogictests/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "sqllogictest"
+name = "sqllogictests"
 version.workspace = true
 edition.workspace = true
 homepage.workspace = true
@@ -8,10 +8,11 @@ license.workspace = true
 rust-version.workspace = true
 
 [dependencies]
-arrow-schema = { workspace = true }
-arrow-array= { workspace = true }
+arrow = { workspace = true }
+# For spark-connect-rs
+arrow_51 = { version = "51", package = "arrow"}
 async-trait = { workspace = true }
-sqllogictest = "0.21.0"
+sqllogictest = "0.22"
 datafusion = { workspace = true, default-features = true}
 datafusion-common = { workspace = true, default-features = true}
 thiserror = "1.0.63"

diff --git a/crates/sqllogictests/src/display/conversion.rs b/crates/sqllogictests/src/display/conversion.rs
@@ -0,0 +1,82 @@
+use arrow::array::types::{Decimal128Type, Decimal256Type, DecimalType};
+use arrow::datatypes::i256;
+use bigdecimal::BigDecimal;
+use half::f16;
+use rust_decimal::prelude::*;
+
+/// Represents a constant for NULL string in your database.
+pub const NULL_STR: &str = "NULL";
+
+pub(crate) fn bool_to_str(value: bool) -> String {
+    if value {
+        "true".to_string()
+    } else {
+        "false".to_string()
+    }
+}
+
+pub(crate) fn varchar_to_str(value: &str) -> String {
+    if value.is_empty() {
+        "(empty)".to_string()
+    } else {
+        value.trim_end_matches('\n').to_string()
+    }
+}
+
+pub(crate) fn f16_to_str(value: f16) -> String {
+    if value.is_nan() {
+        // The sign of NaN can be different depending on platform.
+        // So the string representation of NaN ignores the sign.
+        "NaN".to_string()
+    } else if value == f16::INFINITY {
+        "Infinity".to_string()
+    } else if value == f16::NEG_INFINITY {
+        "-Infinity".to_string()
+    } else {
+        big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap())
+    }
+}
+
+pub(crate) fn f32_to_str(value: f32) -> String {
+    if value.is_nan() {
+        // The sign of NaN can be different depending on platform.
+        // So the string representation of NaN ignores the sign.
+        "NaN".to_string()
+    } else if value == f32::INFINITY {
+        "Infinity".to_string()
+    } else if value == f32::NEG_INFINITY {
+        "-Infinity".to_string()
+    } else {
+        big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap())
+    }
+}
+
+pub(crate) fn f64_to_str(value: f64) -> String {
+    if value.is_nan() {
+        // The sign of NaN can be different depending on platform.
+        // So the string representation of NaN ignores the sign.
+        "NaN".to_string()
+    } else if value == f64::INFINITY {
+        "Infinity".to_string()
+    } else if value == f64::NEG_INFINITY {
+        "-Infinity".to_string()
+    } else {
+        big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap())
+    }
+}
+
+pub(crate) fn i128_to_str(value: i128, precision: &u8, scale: &i8) -> String {
+    big_decimal_to_str(
+        BigDecimal::from_str(&Decimal128Type::format_decimal(value, *precision, *scale)).unwrap(),
+    )
+}
+
+pub(crate) fn i256_to_str(value: i256, precision: &u8, scale: &i8) -> String {
+    big_decimal_to_str(
+        BigDecimal::from_str(&Decimal256Type::format_decimal(value, *precision, *scale)).unwrap(),
+    )
+}
+
+pub(crate) fn big_decimal_to_str(value: BigDecimal) -> String {
+    value.round(12).normalized().to_string()
+}
diff --git a/crates/sqllogictest/src/engine/conversion.rs → ...qllogictests/src/display/conversion_51.rs b/crates/sqllogictest/src/engine/conversion.rs → ...qllogictests/src/display/conversion_51.rs
@@ -1,26 +1,10 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_array::types::{Decimal128Type, Decimal256Type, DecimalType};
+use arrow_51::array::types::{Decimal128Type, Decimal256Type, DecimalType};
+use arrow_51::datatypes::i256;
 use bigdecimal::BigDecimal;
-use datafusion_common::arrow::datatypes::i256;
 use half::f16;
 use rust_decimal::prelude::*;
 
+
 /// Represents a constant for NULL string in your database.
 pub const NULL_STR: &str = "NULL";
 
@@ -84,18 +68,16 @@ pub(crate) fn f64_to_str(value: f64) -> String {
 
 pub(crate) fn i128_to_str(value: i128, precision: &u8, scale: &i8) -> String {
     big_decimal_to_str(
-        BigDecimal::from_str(&Decimal128Type::format_decimal(value, *precision, *scale))
-            .unwrap(),
+        BigDecimal::from_str(&Decimal128Type::format_decimal(value, *precision, *scale)).unwrap(),
     )
 }
 
 pub(crate) fn i256_to_str(value: i256, precision: &u8, scale: &i8) -> String {
     big_decimal_to_str(
-        BigDecimal::from_str(&Decimal256Type::format_decimal(value, *precision, *scale))
-            .unwrap(),
+        BigDecimal::from_str(&Decimal256Type::format_decimal(value, *precision, *scale)).unwrap(),
     )
 }
 
 pub(crate) fn big_decimal_to_str(value: BigDecimal) -> String {
     value.round(12).normalized().to_string()
-}
+}
diff --git a/crates/sqllogictests/src/display/mod.rs b/crates/sqllogictests/src/display/mod.rs
@@ -0,0 +1,4 @@
+pub mod conversion;
+pub mod conversion_51;
+pub mod normalize;
+pub mod normalize_51;
diff --git a/crates/sqllogictest/src/engine/normalize.rs → ...es/sqllogictests/src/display/normalize.rs b/crates/sqllogictest/src/engine/normalize.rs → ...es/sqllogictests/src/display/normalize.rs
@@ -15,14 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::engine::output::DFColumnType;
 use anyhow::anyhow;
-use arrow_array::{ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array, LargeStringArray, RecordBatch, StringArray, StringViewArray};
-use arrow_schema::{DataType, Fields};
-use datafusion::arrow::util::display::ArrayFormatter;
+use arrow::array::{
+    ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Float16Array, Float32Array,
+    Float64Array, LargeStringArray, RecordBatch, StringArray, StringViewArray,
+};
+use arrow::datatypes::{DataType, Fields};
+use arrow::util::display::ArrayFormatter;
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-
-use crate::engine::conversion::*;
+use crate::display::conversion::*;
+use crate::engine::output::DFColumnType;
 
 /// Converts `batches` to a result as expected by sqllogicteset.
 pub(crate) fn convert_batches(batches: Vec<RecordBatch>) -> anyhow::Result<Vec<Vec<String>>> {
@@ -35,16 +37,13 @@ pub(crate) fn convert_batches(batches: Vec<RecordBatch>) -> anyhow::Result<Vec<V
             // Verify schema
             if !schema.contains(&batch.schema()) {
                 return Err(anyhow!(
-                        "Schema mismatch. Previously had\n{:#?}\n\nGot:\n{:#?}",
-                        &schema,
-                        batch.schema()
-                    ),
-                );
+                    "Schema mismatch. Previously had\n{:#?}\n\nGot:\n{:#?}",
+                    &schema,
+                    batch.schema()
+                ));
             }
 
-            let new_rows = convert_batch(batch)?
-                .into_iter()
-                .flat_map(expand_row);
+            let new_rows = convert_batch(batch)?.into_iter().flat_map(expand_row);
             rows.extend(new_rows);
         }
         Ok(rows)
@@ -73,10 +72,11 @@ pub(crate) fn convert_batches(batches: Vec<RecordBatch>) -> anyhow::Result<Vec<V
 ///   "|-- Projection: d.b, MAX(d.a) AS max_a",
 /// ]
 /// ```
-fn expand_row(mut row: Vec<String>) -> impl Iterator<Item=Vec<String>> {
-    use itertools::Either;
+fn expand_row(mut row: Vec<String>) -> impl Iterator<Item = Vec<String>> {
     use std::iter::once;
 
+    use itertools::Either;
+
     // check last cell
     if let Some(cell) = row.pop() {
         let lines: Vec<_> = cell.split('\n').collect();
@@ -93,7 +93,7 @@ fn expand_row(mut row: Vec<String>) -> impl Iterator<Item=Vec<String>> {
             .enumerate()
             .map(|(idx, l)| {
                 // replace any leading spaces with '-' as
-                // `sqllogictest` ignores whitespace differences
+                // `sqllogictests` ignores whitespace differences
                 //
                 // See https://github.com/apache/datafusion/issues/6328
                 let content = l.trim_start();
@@ -141,26 +141,17 @@ macro_rules! get_row_value {
 /// [NULL Values and empty strings]: https://duckdb.org/dev/sqllogictest/result_verification#null-values-and-empty-strings
 ///
 /// Floating numbers are rounded to have a consistent representation with the Postgres runner.
-///
 pub fn cell_to_string(col: &ArrayRef, row: usize) -> anyhow::Result<String> {
     if !col.is_valid(row) {
         // represent any null value with the string "NULL"
         Ok(NULL_STR.to_string())
     } else {
         match col.data_type() {
             DataType::Null => Ok(NULL_STR.to_string()),
-            DataType::Boolean => {
-                Ok(bool_to_str(get_row_value!(BooleanArray, col, row)))
-            }
-            DataType::Float16 => {
-                Ok(f16_to_str(get_row_value!(Float16Array, col, row)))
-            }
-            DataType::Float32 => {
-                Ok(f32_to_str(get_row_value!(Float32Array, col, row)))
-            }
-            DataType::Float64 => {
-                Ok(f64_to_str(get_row_value!(Float64Array, col, row)))
-            }
+            DataType::Boolean => Ok(bool_to_str(get_row_value!(BooleanArray, col, row))),
+            DataType::Float16 => Ok(f16_to_str(get_row_value!(Float16Array, col, row))),
+            DataType::Float32 => Ok(f32_to_str(get_row_value!(Float32Array, col, row))),
+            DataType::Float64 => Ok(f64_to_str(get_row_value!(Float64Array, col, row))),
             DataType::Decimal128(precision, scale) => {
                 let value = get_row_value!(Decimal128Array, col, row);
                 Ok(i128_to_str(value, precision, scale))
@@ -169,19 +160,9 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> anyhow::Result<String> {
                 let value = get_row_value!(Decimal256Array, col, row);
                 Ok(i256_to_str(value, precision, scale))
             }
-            DataType::LargeUtf8 => Ok(varchar_to_str(get_row_value!(
-                LargeStringArray,
-                col,
-                row
-            ))),
-            DataType::Utf8 => {
-                Ok(varchar_to_str(get_row_value!(StringArray, col, row)))
-            }
-            DataType::Utf8View => Ok(varchar_to_str(get_row_value!(
-                StringViewArray,
-                col,
-                row
-            ))),
+            DataType::LargeUtf8 => Ok(varchar_to_str(get_row_value!(LargeStringArray, col, row))),
+            DataType::Utf8 => Ok(varchar_to_str(get_row_value!(StringArray, col, row))),
+            DataType::Utf8View => Ok(varchar_to_str(get_row_value!(StringViewArray, col, row))),
             _ => {
                 let f = ArrayFormatter::try_new(col.as_ref(), &DEFAULT_FORMAT_OPTIONS);
                 Ok(f.unwrap().value(row).to_string())
@@ -210,15 +191,12 @@ pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec<DFColumnType> {
             | DataType::Float64
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _) => DFColumnType::Float,
-            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
-                DFColumnType::Text
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => DFColumnType::Text,
+            DataType::Date32 | DataType::Date64 | DataType::Time32(_) | DataType::Time64(_) => {
+                DFColumnType::DateTime
             }
-            DataType::Date32
-            | DataType::Date64
-            | DataType::Time32(_)
-            | DataType::Time64(_) => DFColumnType::DateTime,
             DataType::Timestamp(_, _) => DFColumnType::Timestamp,
             _ => DFColumnType::Another,
         })
         .collect()
-}
+}