From e516f04f049cc1e247d686c5f2cedc8055a08ff3 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Mon, 17 Jun 2024 18:21:00 +0200 Subject: [PATCH] [SPARK-48177][BUILD] Upgrade Apache Parquet to 1.14.1 --- dev/deps/spark-deps-hadoop-3-hive-2.3 | 13 +- pom.xml | 2 +- ...DataSourceWriteBenchmark-jdk21-results.txt | 60 +- ...uiltInDataSourceWriteBenchmark-results.txt | 60 +- .../DataSourceReadBenchmark-jdk21-results.txt | 574 ++++++++--------- .../DataSourceReadBenchmark-results.txt | 576 +++++++++--------- .../spark/sql/InjectRuntimeFilterSuite.scala | 4 +- .../parquet/ParquetVectorizedSuite.scala | 2 +- .../spark/sql/hive/StatisticsSuite.scala | 2 +- 9 files changed, 647 insertions(+), 646 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index be8b21206c76c..5478fbde929db 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -108,6 +108,7 @@ jackson-core/2.17.1//jackson-core-2.17.1.jar jackson-databind/2.17.1//jackson-databind-2.17.1.jar jackson-dataformat-cbor/2.17.1//jackson-dataformat-cbor-2.17.1.jar jackson-dataformat-yaml/2.17.1//jackson-dataformat-yaml-2.17.1.jar +jackson-datatype-jdk8/2.17.0//jackson-datatype-jdk8-2.17.0.jar jackson-datatype-jsr310/2.17.1//jackson-datatype-jsr310-2.17.1.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-scala_2.13/2.17.1//jackson-module-scala_2.13-2.17.1.jar @@ -235,12 +236,12 @@ orc-shims/2.0.1//orc-shims-2.0.1.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.13.1//parquet-column-1.13.1.jar -parquet-common/1.13.1//parquet-common-1.13.1.jar -parquet-encoding/1.13.1//parquet-encoding-1.13.1.jar -parquet-format-structures/1.13.1//parquet-format-structures-1.13.1.jar -parquet-hadoop/1.13.1//parquet-hadoop-1.13.1.jar -parquet-jackson/1.13.1//parquet-jackson-1.13.1.jar +parquet-column/1.14.1//parquet-column-1.14.1.jar +parquet-common/1.14.1//parquet-common-1.14.1.jar +parquet-encoding/1.14.1//parquet-encoding-1.14.1.jar +parquet-format-structures/1.14.1//parquet-format-structures-1.14.1.jar +parquet-hadoop/1.14.1//parquet-hadoop-1.14.1.jar +parquet-jackson/1.14.1//parquet-jackson-1.14.1.jar pickle/1.5//pickle-1.5.jar py4j/0.10.9.7//py4j-0.10.9.7.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar diff --git a/pom.xml b/pom.xml index 5e181cc38d31b..67ff14070b8bb 100644 --- a/pom.xml +++ b/pom.xml @@ -137,7 +137,7 @@ 3.7.0 10.16.1.1 - 1.13.1 + 1.14.1 2.0.1 shaded-protobuf 11.0.21 diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt index 9605ee82b6e29..a260bc0396455 100644 --- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt +++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt @@ -2,69 +2,69 @@ Parquet writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet(PARQUET_1_0) writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1839 1907 96 8.6 116.9 1.0X -Output Single Double Column 1832 1841 13 8.6 116.5 1.0X -Output Int and String Column 4356 4494 195 3.6 277.0 0.4X -Output Partitions 3233 3303 99 4.9 205.5 0.6X -Output Buckets 4393 4506 160 3.6 279.3 0.4X +Output Single Int Column 1732 1745 19 9.1 110.1 1.0X +Output Single Double Column 1754 1758 7 9.0 111.5 1.0X +Output Int and String Column 4309 4363 76 3.7 273.9 0.4X +Output Partitions 3252 3350 139 4.8 206.8 0.5X +Output Buckets 4487 4575 124 3.5 285.3 0.4X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet(PARQUET_2_0) writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 2057 2066 13 7.6 130.8 1.0X -Output Single Double Column 1805 1813 11 8.7 114.8 1.1X -Output Int and String Column 4771 4775 6 3.3 303.3 0.4X -Output Partitions 3337 3339 3 4.7 212.2 0.6X -Output Buckets 4441 4463 31 3.5 282.3 0.5X +Output Single Int Column 1938 1978 55 8.1 123.2 1.0X +Output Single Double Column 1762 1769 10 8.9 112.0 1.1X +Output Int and String Column 4920 4932 17 3.2 312.8 0.4X +Output Partitions 3385 3389 7 4.6 215.2 0.6X +Output Buckets 4528 4538 14 3.5 287.9 0.4X ================================================================================================ ORC writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor ORC writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1144 1168 35 13.8 72.7 1.0X -Output Single Double Column 1612 1628 22 9.8 102.5 0.7X -Output Int and String Column 3911 3915 7 4.0 248.6 0.3X -Output Partitions 2600 2648 67 6.0 165.3 0.4X -Output Buckets 3449 3477 40 4.6 219.3 0.3X +Output Single Int Column 1137 1142 7 13.8 72.3 1.0X +Output Single Double Column 1700 1705 6 9.3 108.1 0.7X +Output Int and String Column 4028 4096 97 3.9 256.1 0.3X +Output Partitions 2562 2582 28 6.1 162.9 0.4X +Output Buckets 3524 3530 9 4.5 224.1 0.3X ================================================================================================ JSON writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor JSON writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1627 1636 13 9.7 103.4 1.0X -Output Single Double Column 2389 2390 1 6.6 151.9 0.7X -Output Int and String Column 4283 4299 22 3.7 272.3 0.4X -Output Partitions 3171 3192 29 5.0 201.6 0.5X -Output Buckets 4120 4124 6 3.8 261.9 0.4X +Output Single Int Column 1618 1645 37 9.7 102.9 1.0X +Output Single Double Column 2398 2399 1 6.6 152.5 0.7X +Output Int and String Column 3766 3778 17 4.2 239.5 0.4X +Output Partitions 3162 3164 3 5.0 201.0 0.5X +Output Buckets 4015 4028 18 3.9 255.3 0.4X ================================================================================================ CSV writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor CSV writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 3536 3557 31 4.4 224.8 1.0X -Output Single Double Column 3863 3894 44 4.1 245.6 0.9X -Output Int and String Column 6363 6377 19 2.5 404.5 0.6X -Output Partitions 5128 5148 29 3.1 326.0 0.7X -Output Buckets 6613 6626 18 2.4 420.5 0.5X +Output Single Int Column 3985 3993 11 3.9 253.4 1.0X +Output Single Double Column 4148 4210 88 3.8 263.7 1.0X +Output Int and String Column 6728 6741 18 2.3 427.8 0.6X +Output Partitions 5431 5447 23 2.9 345.3 0.7X +Output Buckets 6927 6942 22 2.3 440.4 0.6X diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt index 6b9bf40c67b29..e43b3b53dfb25 100644 --- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt +++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt @@ -2,69 +2,69 @@ Parquet writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet(PARQUET_1_0) writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1778 1861 116 8.8 113.1 1.0X -Output Single Double Column 1750 1757 10 9.0 111.2 1.0X -Output Int and String Column 4290 4408 167 3.7 272.8 0.4X -Output Partitions 3089 3259 240 5.1 196.4 0.6X -Output Buckets 4269 4289 29 3.7 271.4 0.4X +Output Single Int Column 1813 1881 96 8.7 115.3 1.0X +Output Single Double Column 1976 1977 1 8.0 125.6 0.9X +Output Int and String Column 4403 4438 50 3.6 279.9 0.4X +Output Partitions 3388 3421 46 4.6 215.4 0.5X +Output Buckets 4670 4680 15 3.4 296.9 0.4X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet(PARQUET_2_0) writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1731 1744 19 9.1 110.0 1.0X -Output Single Double Column 1803 1804 2 8.7 114.6 1.0X -Output Int and String Column 4665 4672 10 3.4 296.6 0.4X -Output Partitions 3290 3308 26 4.8 209.2 0.5X -Output Buckets 4261 4327 93 3.7 270.9 0.4X +Output Single Int Column 1903 1926 33 8.3 121.0 1.0X +Output Single Double Column 1998 1998 0 7.9 127.0 1.0X +Output Int and String Column 4916 4936 29 3.2 312.6 0.4X +Output Partitions 3366 3375 13 4.7 214.0 0.6X +Output Buckets 4560 4583 33 3.4 289.9 0.4X ================================================================================================ ORC writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor ORC writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1072 1075 4 14.7 68.1 1.0X -Output Single Double Column 1579 1580 0 10.0 100.4 0.7X -Output Int and String Column 3815 3875 85 4.1 242.5 0.3X -Output Partitions 2510 2511 1 6.3 159.6 0.4X -Output Buckets 3441 3471 43 4.6 218.7 0.3X +Output Single Int Column 1034 1039 7 15.2 65.8 1.0X +Output Single Double Column 1687 1691 7 9.3 107.2 0.6X +Output Int and String Column 3941 3955 20 4.0 250.6 0.3X +Output Partitions 2553 2674 172 6.2 162.3 0.4X +Output Buckets 3544 3548 6 4.4 225.3 0.3X ================================================================================================ JSON writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor JSON writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 1635 1639 5 9.6 104.0 1.0X -Output Single Double Column 2218 2230 17 7.1 141.0 0.7X -Output Int and String Column 3948 3997 68 4.0 251.0 0.4X -Output Partitions 3165 3240 105 5.0 201.2 0.5X -Output Buckets 4132 4142 15 3.8 262.7 0.4X +Output Single Int Column 1669 1686 24 9.4 106.1 1.0X +Output Single Double Column 2342 2369 37 6.7 148.9 0.7X +Output Int and String Column 3776 3805 42 4.2 240.0 0.4X +Output Partitions 3060 3064 7 5.1 194.5 0.5X +Output Buckets 4009 4052 60 3.9 254.9 0.4X ================================================================================================ CSV writer benchmark ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor CSV writer benchmark: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -Output Single Int Column 3680 3696 22 4.3 234.0 1.0X -Output Single Double Column 3554 3559 7 4.4 225.9 1.0X -Output Int and String Column 6396 6402 9 2.5 406.6 0.6X -Output Partitions 4937 4942 7 3.2 313.9 0.7X -Output Buckets 6288 6300 17 2.5 399.8 0.6X +Output Single Int Column 3877 3889 18 4.1 246.5 1.0X +Output Single Double Column 4079 4086 10 3.9 259.3 1.0X +Output Int and String Column 6266 6269 4 2.5 398.4 0.6X +Output Partitions 5432 5438 8 2.9 345.4 0.7X +Output Buckets 6528 6530 4 2.4 415.0 0.6X diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt index 83e8059aab77d..43d7eb15b0ea5 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt @@ -2,430 +2,430 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9759 9826 94 1.6 620.5 1.0X -SQL Json 8157 8194 53 1.9 518.6 1.2X -SQL Parquet Vectorized: DataPageV1 86 99 11 183.5 5.4 113.9X -SQL Parquet Vectorized: DataPageV2 112 120 6 140.8 7.1 87.4X -SQL Parquet MR: DataPageV1 1775 1776 1 8.9 112.9 5.5X -SQL Parquet MR: DataPageV2 1745 1749 5 9.0 110.9 5.6X -SQL ORC Vectorized 119 133 8 132.4 7.6 82.1X -SQL ORC MR 1464 1464 0 10.7 93.1 6.7X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 9893 9962 97 1.6 629.0 1.0X +SQL Json 7942 8051 155 2.0 504.9 1.2X +SQL Parquet Vectorized: DataPageV1 84 96 8 187.9 5.3 118.2X +SQL Parquet Vectorized: DataPageV2 95 107 9 166.3 6.0 104.6X +SQL Parquet MR: DataPageV1 1727 1730 3 9.1 109.8 5.7X +SQL Parquet MR: DataPageV2 1615 1615 1 9.7 102.6 6.1X +SQL ORC Vectorized 135 146 8 116.4 8.6 73.2X +SQL ORC MR 1495 1511 22 10.5 95.0 6.6X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 94 96 3 167.7 6.0 1.0X -ParquetReader Vectorized: DataPageV2 113 115 2 139.1 7.2 0.8X -ParquetReader Vectorized -> Row: DataPageV1 75 75 1 210.9 4.7 1.3X -ParquetReader Vectorized -> Row: DataPageV2 95 96 1 166.2 6.0 1.0X +ParquetReader Vectorized: DataPageV1 92 93 1 170.7 5.9 1.0X +ParquetReader Vectorized: DataPageV2 112 113 1 140.8 7.1 0.8X +ParquetReader Vectorized -> Row: DataPageV1 72 73 1 218.6 4.6 1.3X +ParquetReader Vectorized -> Row: DataPageV2 94 96 2 167.5 6.0 1.0X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9826 9827 2 1.6 624.7 1.0X -SQL Json 9154 9168 20 1.7 582.0 1.1X -SQL Parquet Vectorized: DataPageV1 98 107 8 161.1 6.2 100.7X -SQL Parquet Vectorized: DataPageV2 95 107 11 164.7 6.1 102.9X -SQL Parquet MR: DataPageV1 1876 1883 9 8.4 119.3 5.2X -SQL Parquet MR: DataPageV2 1841 1849 11 8.5 117.1 5.3X -SQL ORC Vectorized 109 120 9 144.5 6.9 90.3X -SQL ORC MR 1600 1601 2 9.8 101.7 6.1X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 9431 9439 11 1.7 599.6 1.0X +SQL Json 8552 8570 26 1.8 543.7 1.1X +SQL Parquet Vectorized: DataPageV1 96 105 9 164.4 6.1 98.6X +SQL Parquet Vectorized: DataPageV2 93 104 9 168.4 5.9 101.0X +SQL Parquet MR: DataPageV1 1816 1821 6 8.7 115.5 5.2X +SQL Parquet MR: DataPageV2 1742 1746 5 9.0 110.8 5.4X +SQL ORC Vectorized 107 113 6 146.6 6.8 87.9X +SQL ORC MR 1582 1598 22 9.9 100.6 6.0X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 76 78 2 207.9 4.8 1.0X -ParquetReader Vectorized: DataPageV2 76 78 2 208.0 4.8 1.0X -ParquetReader Vectorized -> Row: DataPageV1 45 46 2 351.2 2.8 1.7X -ParquetReader Vectorized -> Row: DataPageV2 44 45 1 353.5 2.8 1.7X +ParquetReader Vectorized: DataPageV1 66 68 2 238.1 4.2 1.0X +ParquetReader Vectorized: DataPageV2 66 67 1 239.4 4.2 1.0X +ParquetReader Vectorized -> Row: DataPageV1 44 46 3 357.8 2.8 1.5X +ParquetReader Vectorized -> Row: DataPageV2 44 45 1 357.9 2.8 1.5X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9858 9859 1 1.6 626.8 1.0X -SQL Json 9321 9334 18 1.7 592.6 1.1X -SQL Parquet Vectorized: DataPageV1 115 130 17 137.0 7.3 85.9X -SQL Parquet Vectorized: DataPageV2 135 149 17 116.9 8.6 73.2X -SQL Parquet MR: DataPageV1 2192 2199 10 7.2 139.4 4.5X -SQL Parquet MR: DataPageV2 2003 2026 32 7.9 127.4 4.9X -SQL ORC Vectorized 143 153 17 109.9 9.1 68.9X -SQL ORC MR 1944 1951 11 8.1 123.6 5.1X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 9996 10013 25 1.6 635.5 1.0X +SQL Json 8898 8902 5 1.8 565.7 1.1X +SQL Parquet Vectorized: DataPageV1 121 137 14 129.7 7.7 82.4X +SQL Parquet Vectorized: DataPageV2 139 153 14 113.1 8.8 71.9X +SQL Parquet MR: DataPageV1 2015 2035 28 7.8 128.1 5.0X +SQL Parquet MR: DataPageV2 2000 2012 17 7.9 127.2 5.0X +SQL ORC Vectorized 143 174 27 109.8 9.1 69.8X +SQL ORC MR 1959 1990 44 8.0 124.6 5.1X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 140 147 8 112.7 8.9 1.0X -ParquetReader Vectorized: DataPageV2 173 177 3 91.0 11.0 0.8X -ParquetReader Vectorized -> Row: DataPageV1 134 141 8 117.2 8.5 1.0X -ParquetReader Vectorized -> Row: DataPageV2 165 176 12 95.2 10.5 0.8X +ParquetReader Vectorized: DataPageV1 151 160 8 104.3 9.6 1.0X +ParquetReader Vectorized: DataPageV2 168 180 14 93.5 10.7 0.9X +ParquetReader Vectorized -> Row: DataPageV1 160 166 6 98.3 10.2 0.9X +ParquetReader Vectorized -> Row: DataPageV2 164 175 12 96.1 10.4 0.9X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11219 11235 22 1.4 713.3 1.0X -SQL Json 9660 9667 9 1.6 614.2 1.2X -SQL Parquet Vectorized: DataPageV1 122 126 4 129.1 7.7 92.1X -SQL Parquet Vectorized: DataPageV2 178 195 17 88.5 11.3 63.1X -SQL Parquet MR: DataPageV1 2007 2031 33 7.8 127.6 5.6X -SQL Parquet MR: DataPageV2 2060 2084 34 7.6 131.0 5.4X -SQL ORC Vectorized 175 184 13 89.8 11.1 64.0X -SQL ORC MR 1804 1844 56 8.7 114.7 6.2X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 11250 11336 121 1.4 715.3 1.0X +SQL Json 9272 9279 10 1.7 589.5 1.2X +SQL Parquet Vectorized: DataPageV1 109 126 14 144.4 6.9 103.3X +SQL Parquet Vectorized: DataPageV2 190 195 5 82.8 12.1 59.2X +SQL Parquet MR: DataPageV1 2338 2342 6 6.7 148.6 4.8X +SQL Parquet MR: DataPageV2 2332 2343 17 6.7 148.2 4.8X +SQL ORC Vectorized 179 193 12 87.9 11.4 62.9X +SQL ORC MR 2094 2095 1 7.5 133.2 5.4X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 150 157 6 104.7 9.6 1.0X -ParquetReader Vectorized: DataPageV2 212 226 9 74.3 13.5 0.7X -ParquetReader Vectorized -> Row: DataPageV1 164 170 6 95.8 10.4 0.9X -ParquetReader Vectorized -> Row: DataPageV2 242 246 4 64.9 15.4 0.6X +ParquetReader Vectorized: DataPageV1 134 138 2 117.7 8.5 1.0X +ParquetReader Vectorized: DataPageV2 210 215 7 74.8 13.4 0.6X +ParquetReader Vectorized -> Row: DataPageV1 128 133 8 123.3 8.1 1.0X +ParquetReader Vectorized -> Row: DataPageV2 225 232 6 70.0 14.3 0.6X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11095 11134 54 1.4 705.4 1.0X -SQL Json 9688 9701 18 1.6 616.0 1.1X -SQL Parquet Vectorized: DataPageV1 293 297 4 53.7 18.6 37.9X -SQL Parquet Vectorized: DataPageV2 225 253 23 69.9 14.3 49.3X -SQL Parquet MR: DataPageV1 2423 2437 20 6.5 154.0 4.6X -SQL Parquet MR: DataPageV2 2041 2055 19 7.7 129.8 5.4X -SQL ORC Vectorized 165 192 24 95.3 10.5 67.2X -SQL ORC MR 1742 1753 15 9.0 110.8 6.4X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 11683 11683 1 1.3 742.8 1.0X +SQL Json 9457 9460 4 1.7 601.3 1.2X +SQL Parquet Vectorized: DataPageV1 277 312 21 56.9 17.6 42.2X +SQL Parquet Vectorized: DataPageV2 281 291 10 56.0 17.9 41.6X +SQL Parquet MR: DataPageV1 2506 2517 15 6.3 159.4 4.7X +SQL Parquet MR: DataPageV2 2053 2058 7 7.7 130.5 5.7X +SQL ORC Vectorized 166 172 12 95.0 10.5 70.5X +SQL ORC MR 1709 1738 40 9.2 108.7 6.8X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 308 317 8 51.0 19.6 1.0X -ParquetReader Vectorized: DataPageV2 276 283 5 56.9 17.6 1.1X -ParquetReader Vectorized -> Row: DataPageV1 317 321 4 49.6 20.2 1.0X -ParquetReader Vectorized -> Row: DataPageV2 271 278 7 58.1 17.2 1.1X +ParquetReader Vectorized: DataPageV1 311 331 16 50.6 19.8 1.0X +ParquetReader Vectorized: DataPageV2 265 280 21 59.4 16.8 1.2X +ParquetReader Vectorized -> Row: DataPageV1 317 321 3 49.6 20.2 1.0X +ParquetReader Vectorized -> Row: DataPageV2 254 262 13 62.0 16.1 1.2X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11177 11185 13 1.4 710.6 1.0X -SQL Json 11229 11252 32 1.4 713.9 1.0X -SQL Parquet Vectorized: DataPageV1 83 97 15 189.6 5.3 134.7X -SQL Parquet Vectorized: DataPageV2 82 96 13 191.1 5.2 135.8X -SQL Parquet MR: DataPageV1 2029 2055 36 7.8 129.0 5.5X -SQL Parquet MR: DataPageV2 1986 2014 39 7.9 126.3 5.6X -SQL ORC Vectorized 229 241 17 68.7 14.6 48.8X -SQL ORC MR 1751 1763 18 9.0 111.3 6.4X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 11446 11452 8 1.4 727.7 1.0X +SQL Json 10952 10955 4 1.4 696.3 1.0X +SQL Parquet Vectorized: DataPageV1 83 97 16 189.5 5.3 137.9X +SQL Parquet Vectorized: DataPageV2 82 94 12 192.7 5.2 140.2X +SQL Parquet MR: DataPageV1 2107 2120 18 7.5 134.0 5.4X +SQL Parquet MR: DataPageV2 1975 2003 40 8.0 125.5 5.8X +SQL ORC Vectorized 235 245 14 66.9 14.9 48.7X +SQL ORC MR 1779 1801 30 8.8 113.1 6.4X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 134 141 7 117.5 8.5 1.0X -ParquetReader Vectorized: DataPageV2 150 159 8 105.0 9.5 0.9X -ParquetReader Vectorized -> Row: DataPageV1 143 150 7 109.9 9.1 0.9X -ParquetReader Vectorized -> Row: DataPageV2 143 152 15 109.9 9.1 0.9X +ParquetReader Vectorized: DataPageV1 134 141 8 117.1 8.5 1.0X +ParquetReader Vectorized: DataPageV2 147 151 4 107.3 9.3 0.9X +ParquetReader Vectorized -> Row: DataPageV1 144 151 7 109.2 9.2 0.9X +ParquetReader Vectorized -> Row: DataPageV2 128 139 7 123.3 8.1 1.1X -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11485 11545 86 1.4 730.2 1.0X -SQL Json 11591 11597 8 1.4 737.0 1.0X -SQL Parquet Vectorized: DataPageV1 269 288 18 58.5 17.1 42.7X -SQL Parquet Vectorized: DataPageV2 273 287 16 57.6 17.4 42.0X -SQL Parquet MR: DataPageV1 2456 2477 29 6.4 156.2 4.7X -SQL Parquet MR: DataPageV2 2373 2386 17 6.6 150.9 4.8X -SQL ORC Vectorized 585 588 2 26.9 37.2 19.6X -SQL ORC MR 2132 2137 7 7.4 135.5 5.4X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 11723 11745 31 1.3 745.3 1.0X +SQL Json 11373 11395 31 1.4 723.1 1.0X +SQL Parquet Vectorized: DataPageV1 304 316 11 51.7 19.3 38.6X +SQL Parquet Vectorized: DataPageV2 276 301 16 56.9 17.6 42.4X +SQL Parquet MR: DataPageV1 2427 2438 16 6.5 154.3 4.8X +SQL Parquet MR: DataPageV2 2365 2381 22 6.7 150.4 5.0X +SQL ORC Vectorized 577 580 2 27.3 36.7 20.3X +SQL ORC MR 2149 2174 35 7.3 136.6 5.5X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 314 328 14 50.1 19.9 1.0X -ParquetReader Vectorized: DataPageV2 326 332 8 48.2 20.7 1.0X -ParquetReader Vectorized -> Row: DataPageV1 322 325 5 48.9 20.5 1.0X -ParquetReader Vectorized -> Row: DataPageV2 322 328 7 48.8 20.5 1.0X +ParquetReader Vectorized: DataPageV1 325 333 5 48.4 20.6 1.0X +ParquetReader Vectorized: DataPageV2 324 333 8 48.5 20.6 1.0X +ParquetReader Vectorized -> Row: DataPageV1 312 326 14 50.4 19.8 1.0X +ParquetReader Vectorized -> Row: DataPageV2 323 329 6 48.6 20.6 1.0X ================================================================================================ SQL Single Numeric Column Scan in Struct ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single TINYINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2186 2228 60 7.2 139.0 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2220 2236 22 7.1 141.1 1.0X -SQL ORC Vectorized (Nested Column Enabled) 147 156 14 107.0 9.3 14.9X -SQL Parquet MR: DataPageV1 2403 2434 44 6.5 152.8 0.9X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3030 3053 33 5.2 192.6 0.7X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 105 135 23 149.9 6.7 20.8X -SQL Parquet MR: DataPageV2 2343 2350 10 6.7 149.0 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2929 2964 50 5.4 186.2 0.7X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 104 117 15 151.1 6.6 21.0X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2116 2119 4 7.4 134.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2127 2157 42 7.4 135.3 1.0X +SQL ORC Vectorized (Nested Column Enabled) 146 153 9 107.5 9.3 14.5X +SQL Parquet MR: DataPageV1 2589 2609 28 6.1 164.6 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2883 2886 4 5.5 183.3 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 104 121 18 151.6 6.6 20.4X +SQL Parquet MR: DataPageV2 2472 2505 46 6.4 157.2 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2834 2851 25 5.6 180.2 0.7X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 106 121 13 148.8 6.7 20.0X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single SMALLINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2180 2203 32 7.2 138.6 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2156 2188 45 7.3 137.1 1.0X -SQL ORC Vectorized (Nested Column Enabled) 261 273 12 60.3 16.6 8.4X -SQL Parquet MR: DataPageV1 2345 2347 4 6.7 149.1 0.9X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2652 2672 28 5.9 168.6 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 120 124 3 131.4 7.6 18.2X -SQL Parquet MR: DataPageV2 2326 2357 43 6.8 147.9 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2635 2664 42 6.0 167.5 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 142 159 14 110.6 9.0 15.3X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2332 2378 65 6.7 148.3 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2331 2360 41 6.7 148.2 1.0X +SQL ORC Vectorized (Nested Column Enabled) 257 270 10 61.2 16.3 9.1X +SQL Parquet MR: DataPageV1 2383 2385 2 6.6 151.5 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2944 2945 1 5.3 187.2 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 120 134 16 130.9 7.6 19.4X +SQL Parquet MR: DataPageV2 2323 2334 17 6.8 147.7 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2983 2992 12 5.3 189.7 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 231 263 16 68.0 14.7 10.1X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single INT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2364 2366 4 6.7 150.3 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2347 2358 14 6.7 149.2 1.0X -SQL ORC Vectorized (Nested Column Enabled) 281 293 10 55.9 17.9 8.4X -SQL Parquet MR: DataPageV1 2409 2426 24 6.5 153.1 1.0X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2909 2918 14 5.4 184.9 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 123 128 4 127.4 7.8 19.2X -SQL Parquet MR: DataPageV2 2387 2398 16 6.6 151.8 1.0X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2865 2869 6 5.5 182.1 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 276 282 4 57.0 17.5 8.6X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2196 2201 7 7.2 139.6 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2243 2312 97 7.0 142.6 1.0X +SQL ORC Vectorized (Nested Column Enabled) 278 292 18 56.6 17.7 7.9X +SQL Parquet MR: DataPageV1 2539 2540 1 6.2 161.4 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3499 3514 20 4.5 222.5 0.6X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 112 117 4 139.9 7.2 19.5X +SQL Parquet MR: DataPageV2 2555 2563 12 6.2 162.4 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3424 3441 25 4.6 217.7 0.6X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 242 250 5 64.9 15.4 9.1X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single BIGINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2207 2213 9 7.1 140.3 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2245 2267 32 7.0 142.7 1.0X -SQL ORC Vectorized (Nested Column Enabled) 285 305 38 55.1 18.1 7.7X -SQL Parquet MR: DataPageV1 2807 2812 8 5.6 178.4 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3405 3407 3 4.6 216.5 0.6X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 291 321 20 54.0 18.5 7.6X -SQL Parquet MR: DataPageV2 2380 2384 7 6.6 151.3 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2874 2875 1 5.5 182.7 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 261 300 19 60.2 16.6 8.4X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2219 2229 15 7.1 141.1 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2234 2248 21 7.0 142.0 1.0X +SQL ORC Vectorized (Nested Column Enabled) 290 309 18 54.2 18.5 7.6X +SQL Parquet MR: DataPageV1 2806 2812 8 5.6 178.4 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3281 3296 20 4.8 208.6 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 326 335 10 48.3 20.7 6.8X +SQL Parquet MR: DataPageV2 2430 2454 34 6.5 154.5 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2898 2912 20 5.4 184.3 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 251 288 23 62.6 16.0 8.8X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single FLOAT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2259 2275 23 7.0 143.6 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2159 2167 10 7.3 137.3 1.0X -SQL ORC Vectorized (Nested Column Enabled) 334 356 19 47.1 21.2 6.8X -SQL Parquet MR: DataPageV1 2424 2431 10 6.5 154.1 0.9X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3094 3102 11 5.1 196.7 0.7X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 100 106 5 157.5 6.3 22.6X -SQL Parquet MR: DataPageV2 2283 2335 74 6.9 145.1 1.0X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2917 2927 15 5.4 185.4 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 91 99 6 173.6 5.8 24.9X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2313 2372 83 6.8 147.1 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2405 2419 19 6.5 152.9 1.0X +SQL ORC Vectorized (Nested Column Enabled) 337 355 19 46.6 21.5 6.9X +SQL Parquet MR: DataPageV1 2604 2617 17 6.0 165.6 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3103 3112 12 5.1 197.3 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 95 100 4 165.2 6.1 24.3X +SQL Parquet MR: DataPageV2 2674 2698 34 5.9 170.0 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3215 3237 32 4.9 204.4 0.7X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 87 101 9 180.4 5.5 26.5X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single DOUBLE Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2680 2694 20 5.9 170.4 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2627 2643 23 6.0 167.0 1.0X -SQL ORC Vectorized (Nested Column Enabled) 708 710 2 22.2 45.0 3.8X -SQL Parquet MR: DataPageV1 2773 2802 41 5.7 176.3 1.0X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3285 3303 26 4.8 208.8 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 287 318 20 54.8 18.2 9.3X -SQL Parquet MR: DataPageV2 2733 2745 17 5.8 173.8 1.0X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3253 3268 22 4.8 206.8 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 281 307 17 56.0 17.9 9.5X +SQL ORC MR 2676 2684 12 5.9 170.1 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2595 2596 2 6.1 165.0 1.0X +SQL ORC Vectorized (Nested Column Enabled) 697 708 16 22.6 44.3 3.8X +SQL Parquet MR: DataPageV1 2836 2854 25 5.5 180.3 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3428 3435 10 4.6 218.0 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 307 319 11 51.2 19.5 8.7X +SQL Parquet MR: DataPageV2 2903 2904 2 5.4 184.6 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3511 3518 9 4.5 223.2 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 317 322 4 49.7 20.1 8.5X ================================================================================================ SQL Nested Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Nested Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 13038 13249 150 0.1 12434.1 1.0X -SQL ORC Vectorized (Nested Column Disabled) 12833 12893 64 0.1 12238.8 1.0X -SQL ORC Vectorized (Nested Column Enabled) 7120 7144 16 0.1 6789.8 1.8X -SQL Parquet MR: DataPageV1 9138 9238 95 0.1 8714.4 1.4X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 9034 9073 23 0.1 8615.2 1.4X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 5906 5980 75 0.2 5632.7 2.2X -SQL Parquet MR: DataPageV2 9953 10002 39 0.1 9492.1 1.3X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 10262 10298 34 0.1 9786.3 1.3X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 5682 5711 32 0.2 5419.1 2.3X +SQL ORC MR 12857 12956 97 0.1 12261.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 12868 12963 93 0.1 12272.0 1.0X +SQL ORC Vectorized (Nested Column Enabled) 7063 7109 31 0.1 6735.6 1.8X +SQL Parquet MR: DataPageV1 9067 9173 81 0.1 8646.8 1.4X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 9287 9373 59 0.1 8856.4 1.4X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 5899 5931 25 0.2 5625.7 2.2X +SQL Parquet MR: DataPageV2 9529 9579 54 0.1 9087.2 1.3X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 9864 10035 165 0.1 9406.6 1.3X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 5650 5702 49 0.2 5388.4 2.3X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 10201 10325 175 1.0 972.8 1.0X -SQL Json 10262 10264 2 1.0 978.7 1.0X -SQL Parquet Vectorized: DataPageV1 1714 1739 36 6.1 163.5 6.0X -SQL Parquet Vectorized: DataPageV2 1938 1949 15 5.4 184.9 5.3X -SQL Parquet MR: DataPageV1 3841 3842 2 2.7 366.3 2.7X -SQL Parquet MR: DataPageV2 3858 3877 28 2.7 367.9 2.6X -SQL ORC Vectorized 1842 1847 8 5.7 175.6 5.5X -SQL ORC MR 3767 3780 19 2.8 359.2 2.7X +SQL CSV 10098 10209 156 1.0 963.0 1.0X +SQL Json 9940 9993 75 1.1 947.9 1.0X +SQL Parquet Vectorized: DataPageV1 1682 1707 36 6.2 160.4 6.0X +SQL Parquet Vectorized: DataPageV2 1912 1930 25 5.5 182.4 5.3X +SQL Parquet MR: DataPageV1 3861 3870 13 2.7 368.2 2.6X +SQL Parquet MR: DataPageV2 3961 3969 10 2.6 377.8 2.5X +SQL ORC Vectorized 1768 1780 18 5.9 168.6 5.7X +SQL ORC MR 3478 3493 21 3.0 331.7 2.9X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5859 5873 20 1.8 558.8 1.0X -SQL Json 6340 6344 6 1.7 604.7 0.9X -SQL Parquet Vectorized: DataPageV1 425 440 15 24.6 40.6 13.8X -SQL Parquet Vectorized: DataPageV2 420 423 3 25.0 40.1 13.9X -SQL Parquet MR: DataPageV1 1900 1906 9 5.5 181.2 3.1X -SQL Parquet MR: DataPageV2 1767 1769 3 5.9 168.5 3.3X -SQL ORC Vectorized 369 375 5 28.4 35.2 15.9X -SQL ORC MR 1893 1908 21 5.5 180.5 3.1X +SQL CSV 5870 5882 17 1.8 559.8 1.0X +SQL Json 6337 6345 10 1.7 604.4 0.9X +SQL Parquet Vectorized: DataPageV1 457 473 22 23.0 43.5 12.9X +SQL Parquet Vectorized: DataPageV2 491 501 8 21.3 46.9 11.9X +SQL Parquet MR: DataPageV1 1631 1648 24 6.4 155.6 3.6X +SQL Parquet MR: DataPageV2 1580 1606 36 6.6 150.7 3.7X +SQL ORC Vectorized 372 378 8 28.2 35.5 15.8X +SQL ORC MR 1732 1735 5 6.1 165.1 3.4X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 10973 11041 96 1.4 697.7 1.0X -Data column - Json 9301 9314 18 1.7 591.4 1.2X -Data column - Parquet Vectorized: DataPageV1 118 137 16 132.9 7.5 92.7X -Data column - Parquet Vectorized: DataPageV2 228 268 42 68.9 14.5 48.1X -Data column - Parquet MR: DataPageV1 2224 2228 5 7.1 141.4 4.9X -Data column - Parquet MR: DataPageV2 2216 2218 3 7.1 140.9 5.0X -Data column - ORC Vectorized 182 191 11 86.6 11.5 60.4X -Data column - ORC MR 2559 2560 1 6.1 162.7 4.3X -Partition column - CSV 3893 3981 125 4.0 247.5 2.8X -Partition column - Json 8839 8879 57 1.8 561.9 1.2X -Partition column - Parquet Vectorized: DataPageV1 30 40 9 518.7 1.9 361.9X -Partition column - Parquet Vectorized: DataPageV2 29 35 7 545.4 1.8 380.5X -Partition column - Parquet MR: DataPageV1 1274 1293 27 12.3 81.0 8.6X -Partition column - Parquet MR: DataPageV2 1295 1315 28 12.1 82.3 8.5X -Partition column - ORC Vectorized 30 38 9 516.1 1.9 360.1X -Partition column - ORC MR 1488 1489 2 10.6 94.6 7.4X -Both columns - CSV 10633 10710 109 1.5 676.0 1.0X -Both columns - Json 9583 9602 26 1.6 609.3 1.1X -Both columns - Parquet Vectorized: DataPageV1 156 185 24 100.6 9.9 70.2X -Both columns - Parquet Vectorized: DataPageV2 271 316 34 58.1 17.2 40.5X -Both columns - Parquet MR: DataPageV1 2325 2342 25 6.8 147.8 4.7X -Both columns - Parquet MR: DataPageV2 2353 2366 18 6.7 149.6 4.7X -Both columns - ORC Vectorized 182 212 29 86.5 11.6 60.4X -Both columns - ORC MR 2121 2146 35 7.4 134.9 5.2X +Data column - CSV 10956 10967 15 1.4 696.5 1.0X +Data column - Json 9169 9189 29 1.7 583.0 1.2X +Data column - Parquet Vectorized: DataPageV1 108 126 16 145.8 6.9 101.6X +Data column - Parquet Vectorized: DataPageV2 217 233 20 72.5 13.8 50.5X +Data column - Parquet MR: DataPageV1 2229 2346 166 7.1 141.7 4.9X +Data column - Parquet MR: DataPageV2 2224 2240 23 7.1 141.4 4.9X +Data column - ORC Vectorized 178 184 4 88.3 11.3 61.5X +Data column - ORC MR 2040 2069 41 7.7 129.7 5.4X +Partition column - CSV 3493 3514 30 4.5 222.1 3.1X +Partition column - Json 8200 8367 236 1.9 521.3 1.3X +Partition column - Parquet Vectorized: DataPageV1 29 36 7 543.6 1.8 378.6X +Partition column - Parquet Vectorized: DataPageV2 28 35 7 560.2 1.8 390.2X +Partition column - Parquet MR: DataPageV1 1233 1255 31 12.8 78.4 8.9X +Partition column - Parquet MR: DataPageV2 1239 1248 13 12.7 78.8 8.8X +Partition column - ORC Vectorized 29 34 6 547.3 1.8 381.2X +Partition column - ORC MR 1300 1304 5 12.1 82.6 8.4X +Both columns - CSV 10899 10923 34 1.4 693.0 1.0X +Both columns - Json 9755 9777 31 1.6 620.2 1.1X +Both columns - Parquet Vectorized: DataPageV1 187 215 18 83.9 11.9 58.5X +Both columns - Parquet Vectorized: DataPageV2 266 290 24 59.0 16.9 41.1X +Both columns - Parquet MR: DataPageV1 2368 2379 15 6.6 150.6 4.6X +Both columns - Parquet MR: DataPageV2 2315 2323 11 6.8 147.2 4.7X +Both columns - ORC Vectorized 181 210 27 86.8 11.5 60.4X +Both columns - ORC MR 2214 2274 86 7.1 140.7 4.9X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7025 7097 101 1.5 670.0 1.0X -SQL Json 8902 8910 11 1.2 849.0 0.8X -SQL Parquet Vectorized: DataPageV1 1046 1068 30 10.0 99.8 6.7X -SQL Parquet Vectorized: DataPageV2 1414 1464 71 7.4 134.9 5.0X -SQL Parquet MR: DataPageV1 3522 3524 3 3.0 335.9 2.0X -SQL Parquet MR: DataPageV2 3610 3631 29 2.9 344.3 1.9X -ParquetReader Vectorized: DataPageV1 740 745 6 14.2 70.6 9.5X -ParquetReader Vectorized: DataPageV2 1038 1053 21 10.1 99.0 6.8X -SQL ORC Vectorized 889 894 6 11.8 84.8 7.9X -SQL ORC MR 2852 2883 43 3.7 272.0 2.5X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 6979 7004 35 1.5 665.6 1.0X +SQL Json 8795 8811 23 1.2 838.8 0.8X +SQL Parquet Vectorized: DataPageV1 1153 1174 30 9.1 110.0 6.1X +SQL Parquet Vectorized: DataPageV2 1419 1454 51 7.4 135.3 4.9X +SQL Parquet MR: DataPageV1 3349 3358 14 3.1 319.3 2.1X +SQL Parquet MR: DataPageV2 3710 3720 13 2.8 353.8 1.9X +ParquetReader Vectorized: DataPageV1 788 795 10 13.3 75.2 8.9X +ParquetReader Vectorized: DataPageV2 1033 1057 35 10.2 98.5 6.8X +SQL ORC Vectorized 815 820 4 12.9 77.7 8.6X +SQL ORC MR 2914 2955 58 3.6 277.9 2.4X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5260 5266 8 2.0 501.6 1.0X -SQL Json 7406 7414 12 1.4 706.3 0.7X -SQL Parquet Vectorized: DataPageV1 730 737 9 14.4 69.6 7.2X -SQL Parquet Vectorized: DataPageV2 985 996 10 10.7 93.9 5.3X -SQL Parquet MR: DataPageV1 2785 2807 31 3.8 265.6 1.9X -SQL Parquet MR: DataPageV2 2931 2952 29 3.6 279.5 1.8X -ParquetReader Vectorized: DataPageV1 678 695 23 15.5 64.6 7.8X -ParquetReader Vectorized: DataPageV2 867 874 8 12.1 82.7 6.1X -SQL ORC Vectorized 964 974 15 10.9 92.0 5.5X -SQL ORC MR 2879 2889 13 3.6 274.6 1.8X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 5379 5379 1 1.9 513.0 1.0X +SQL Json 7512 7522 14 1.4 716.4 0.7X +SQL Parquet Vectorized: DataPageV1 766 773 10 13.7 73.1 7.0X +SQL Parquet Vectorized: DataPageV2 953 973 29 11.0 90.9 5.6X +SQL Parquet MR: DataPageV1 2627 2634 11 4.0 250.5 2.0X +SQL Parquet MR: DataPageV2 2857 2863 8 3.7 272.4 1.9X +ParquetReader Vectorized: DataPageV1 686 701 22 15.3 65.4 7.8X +ParquetReader Vectorized: DataPageV2 868 882 16 12.1 82.8 6.2X +SQL ORC Vectorized 952 980 34 11.0 90.8 5.6X +SQL ORC MR 2794 2796 3 3.8 266.4 1.9X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4057 4066 12 2.6 386.9 1.0X -SQL Json 5369 5375 9 2.0 512.0 0.8X -SQL Parquet Vectorized: DataPageV1 166 171 8 63.3 15.8 24.5X -SQL Parquet Vectorized: DataPageV2 194 199 4 54.1 18.5 20.9X -SQL Parquet MR: DataPageV1 1761 1763 3 6.0 167.9 2.3X -SQL Parquet MR: DataPageV2 1665 1674 12 6.3 158.8 2.4X -ParquetReader Vectorized: DataPageV1 144 146 3 73.0 13.7 28.2X -ParquetReader Vectorized: DataPageV2 170 171 1 61.7 16.2 23.9X -SQL ORC Vectorized 316 328 8 33.2 30.1 12.9X -SQL ORC MR 1669 1673 6 6.3 159.2 2.4X +SQL CSV 4196 4197 2 2.5 400.2 1.0X +SQL Json 5466 5479 19 1.9 521.3 0.8X +SQL Parquet Vectorized: DataPageV1 156 159 4 67.2 14.9 26.9X +SQL Parquet Vectorized: DataPageV2 184 190 6 57.0 17.5 22.8X +SQL Parquet MR: DataPageV1 1656 1659 4 6.3 157.9 2.5X +SQL Parquet MR: DataPageV2 1604 1604 0 6.5 153.0 2.6X +ParquetReader Vectorized: DataPageV1 163 164 1 64.5 15.5 25.8X +ParquetReader Vectorized: DataPageV2 190 193 2 55.3 18.1 22.1X +SQL ORC Vectorized 315 322 6 33.3 30.0 13.3X +SQL ORC MR 1610 1615 6 6.5 153.6 2.6X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 1234 1236 4 0.8 1176.5 1.0X -SQL Json 1527 1537 14 0.7 1456.3 0.8X -SQL Parquet Vectorized: DataPageV1 25 28 5 42.6 23.5 50.1X -SQL Parquet Vectorized: DataPageV2 32 39 6 32.6 30.7 38.3X -SQL Parquet MR: DataPageV1 157 163 6 6.7 149.6 7.9X -SQL Parquet MR: DataPageV2 155 160 6 6.8 147.5 8.0X -SQL ORC Vectorized 28 34 6 36.8 27.2 43.3X -SQL ORC MR 131 137 6 8.0 124.9 9.4X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 1157 1159 3 0.9 1103.2 1.0X +SQL Json 1698 1702 5 0.6 1619.7 0.7X +SQL Parquet Vectorized: DataPageV1 24 29 6 43.3 23.1 47.8X +SQL Parquet Vectorized: DataPageV2 32 38 6 32.5 30.8 35.8X +SQL Parquet MR: DataPageV1 163 170 8 6.4 155.3 7.1X +SQL Parquet MR: DataPageV2 159 167 6 6.6 151.8 7.3X +SQL ORC Vectorized 28 34 7 37.5 26.7 41.4X +SQL ORC MR 130 136 6 8.1 123.8 8.9X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2636 2706 99 0.4 2514.2 1.0X -SQL Json 5037 5127 127 0.2 4803.9 0.5X -SQL Parquet Vectorized: DataPageV1 27 32 6 38.6 25.9 97.0X -SQL Parquet Vectorized: DataPageV2 35 40 6 30.3 33.0 76.3X -SQL Parquet MR: DataPageV1 160 171 8 6.6 152.6 16.5X -SQL Parquet MR: DataPageV2 160 169 7 6.6 152.6 16.5X -SQL ORC Vectorized 32 35 5 32.8 30.4 82.6X -SQL ORC MR 136 141 5 7.7 129.9 19.4X - -OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 2485 2523 55 0.4 2369.7 1.0X +SQL Json 5915 5940 35 0.2 5641.2 0.4X +SQL Parquet Vectorized: DataPageV1 29 36 7 36.2 27.6 85.8X +SQL Parquet Vectorized: DataPageV2 35 39 6 30.2 33.1 71.6X +SQL Parquet MR: DataPageV1 168 173 4 6.2 160.0 14.8X +SQL Parquet MR: DataPageV2 164 175 8 6.4 156.2 15.2X +SQL ORC Vectorized 32 35 5 33.1 30.2 78.5X +SQL ORC MR 142 148 5 7.4 135.1 17.5X + +OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4290 4341 71 0.2 4091.7 1.0X -SQL Json 10204 10308 148 0.1 9731.3 0.4X -SQL Parquet Vectorized: DataPageV1 34 40 7 31.2 32.0 127.9X -SQL Parquet Vectorized: DataPageV2 41 46 7 25.5 39.3 104.2X -SQL Parquet MR: DataPageV1 174 182 8 6.0 165.5 24.7X -SQL Parquet MR: DataPageV2 170 178 8 6.2 161.7 25.3X -SQL ORC Vectorized 39 44 5 27.1 36.9 110.8X -SQL ORC MR 155 161 5 6.8 148.0 27.6X +SQL CSV 4100 4175 105 0.3 3910.5 1.0X +SQL Json 9817 9951 190 0.1 9362.4 0.4X +SQL Parquet Vectorized: DataPageV1 34 45 10 31.0 32.2 121.4X +SQL Parquet Vectorized: DataPageV2 41 47 7 25.5 39.2 99.7X +SQL Parquet MR: DataPageV1 179 187 8 5.9 170.8 22.9X +SQL Parquet MR: DataPageV2 169 183 14 6.2 161.0 24.3X +SQL ORC Vectorized 38 45 9 27.4 36.5 107.1X +SQL ORC MR 143 146 3 7.3 136.1 28.7X diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt index 796a774324207..76bbbfa26ae96 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt @@ -1,431 +1,431 @@ -================================================================================================ +DataSourceReadBenchmark-jdk21-results.txt================================================================================================ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12156 12215 83 1.3 772.8 1.0X -SQL Json 8033 8276 343 2.0 510.7 1.5X -SQL Parquet Vectorized: DataPageV1 104 119 8 151.4 6.6 117.0X -SQL Parquet Vectorized: DataPageV2 104 117 25 151.2 6.6 116.8X -SQL Parquet MR: DataPageV1 1795 1811 22 8.8 114.2 6.8X -SQL Parquet MR: DataPageV2 1728 1739 15 9.1 109.9 7.0X -SQL ORC Vectorized 140 147 5 112.1 8.9 86.6X -SQL ORC MR 1617 1617 0 9.7 102.8 7.5X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 10363 10364 2 1.5 658.9 1.0X +SQL Json 8667 8699 46 1.8 551.0 1.2X +SQL Parquet Vectorized: DataPageV1 103 114 8 153.3 6.5 101.0X +SQL Parquet Vectorized: DataPageV2 101 111 6 155.4 6.4 102.4X +SQL Parquet MR: DataPageV1 1809 1813 6 8.7 115.0 5.7X +SQL Parquet MR: DataPageV2 1715 1720 8 9.2 109.0 6.0X +SQL ORC Vectorized 139 146 5 113.1 8.8 74.5X +SQL ORC MR 1508 1511 5 10.4 95.8 6.9X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 94 95 1 166.6 6.0 1.0X -ParquetReader Vectorized: DataPageV2 101 103 2 155.3 6.4 0.9X -ParquetReader Vectorized -> Row: DataPageV1 74 75 2 213.8 4.7 1.3X -ParquetReader Vectorized -> Row: DataPageV2 81 83 1 193.2 5.2 1.2X +ParquetReader Vectorized: DataPageV1 88 90 2 178.9 5.6 1.0X +ParquetReader Vectorized: DataPageV2 95 96 1 166.2 6.0 0.9X +ParquetReader Vectorized -> Row: DataPageV1 73 74 1 215.3 4.6 1.2X +ParquetReader Vectorized -> Row: DataPageV2 81 83 1 193.1 5.2 1.1X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11407 11426 26 1.4 725.3 1.0X -SQL Json 9654 9660 7 1.6 613.8 1.2X -SQL Parquet Vectorized: DataPageV1 91 100 7 172.0 5.8 124.7X -SQL Parquet Vectorized: DataPageV2 88 97 8 178.6 5.6 129.5X -SQL Parquet MR: DataPageV1 1913 1916 5 8.2 121.6 6.0X -SQL Parquet MR: DataPageV2 1868 1871 5 8.4 118.7 6.1X -SQL ORC Vectorized 115 124 6 136.7 7.3 99.1X -SQL ORC MR 1519 1523 6 10.4 96.6 7.5X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 11538 11589 73 1.4 733.5 1.0X +SQL Json 9586 9596 14 1.6 609.5 1.2X +SQL Parquet Vectorized: DataPageV1 109 116 6 144.8 6.9 106.2X +SQL Parquet Vectorized: DataPageV2 110 118 8 142.6 7.0 104.6X +SQL Parquet MR: DataPageV1 1901 1953 74 8.3 120.9 6.1X +SQL Parquet MR: DataPageV2 1817 1832 22 8.7 115.5 6.4X +SQL ORC Vectorized 118 126 7 133.6 7.5 98.0X +SQL ORC MR 1505 1535 43 10.5 95.7 7.7X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 69 71 1 227.3 4.4 1.0X -ParquetReader Vectorized: DataPageV2 69 71 2 228.4 4.4 1.0X -ParquetReader Vectorized -> Row: DataPageV1 47 48 1 332.4 3.0 1.5X -ParquetReader Vectorized -> Row: DataPageV2 47 48 1 334.0 3.0 1.5X +ParquetReader Vectorized: DataPageV1 93 94 1 169.9 5.9 1.0X +ParquetReader Vectorized: DataPageV2 93 94 1 169.1 5.9 1.0X +ParquetReader Vectorized -> Row: DataPageV1 61 62 1 258.0 3.9 1.5X +ParquetReader Vectorized -> Row: DataPageV2 61 62 1 258.4 3.9 1.5X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11899 11906 9 1.3 756.5 1.0X -SQL Json 9956 9977 30 1.6 633.0 1.2X -SQL Parquet Vectorized: DataPageV1 128 133 5 122.7 8.1 92.9X -SQL Parquet Vectorized: DataPageV2 146 153 6 108.0 9.3 81.7X -SQL Parquet MR: DataPageV1 2064 2096 44 7.6 131.3 5.8X -SQL Parquet MR: DataPageV2 2016 2017 2 7.8 128.2 5.9X -SQL ORC Vectorized 124 130 4 127.0 7.9 96.1X -SQL ORC MR 1629 1630 1 9.7 103.6 7.3X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 12200 12203 5 1.3 775.7 1.0X +SQL Json 9813 9854 57 1.6 623.9 1.2X +SQL Parquet Vectorized: DataPageV1 101 107 6 156.1 6.4 121.0X +SQL Parquet Vectorized: DataPageV2 129 135 6 122.3 8.2 94.9X +SQL Parquet MR: DataPageV1 1968 1989 29 8.0 125.1 6.2X +SQL Parquet MR: DataPageV2 1913 1916 3 8.2 121.6 6.4X +SQL ORC Vectorized 130 135 6 120.8 8.3 93.7X +SQL ORC MR 1593 1600 10 9.9 101.3 7.7X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 156 158 2 100.7 9.9 1.0X -ParquetReader Vectorized: DataPageV2 173 176 3 91.0 11.0 0.9X -ParquetReader Vectorized -> Row: DataPageV1 150 153 2 104.5 9.6 1.0X -ParquetReader Vectorized -> Row: DataPageV2 165 167 2 95.3 10.5 0.9X +ParquetReader Vectorized: DataPageV1 138 140 2 113.9 8.8 1.0X +ParquetReader Vectorized: DataPageV2 166 168 3 94.8 10.6 0.8X +ParquetReader Vectorized -> Row: DataPageV1 136 138 6 115.6 8.6 1.0X +ParquetReader Vectorized -> Row: DataPageV2 164 166 2 96.1 10.4 0.8X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12927 12937 13 1.2 821.9 1.0X -SQL Json 10144 10149 8 1.6 644.9 1.3X -SQL Parquet Vectorized: DataPageV1 108 114 5 145.3 6.9 119.4X -SQL Parquet Vectorized: DataPageV2 188 194 8 83.8 11.9 68.9X -SQL Parquet MR: DataPageV1 2141 2142 0 7.3 136.1 6.0X -SQL Parquet MR: DataPageV2 2148 2153 7 7.3 136.5 6.0X -SQL ORC Vectorized 157 162 3 100.2 10.0 82.4X -SQL ORC MR 1650 1650 1 9.5 104.9 7.8X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 13361 13368 9 1.2 849.5 1.0X +SQL Json 10099 10118 27 1.6 642.1 1.3X +SQL Parquet Vectorized: DataPageV1 108 131 29 145.0 6.9 123.2X +SQL Parquet Vectorized: DataPageV2 177 185 7 88.9 11.3 75.5X +SQL Parquet MR: DataPageV1 2031 2083 74 7.7 129.1 6.6X +SQL Parquet MR: DataPageV2 2022 2026 5 7.8 128.6 6.6X +SQL ORC Vectorized 146 151 4 107.7 9.3 91.5X +SQL ORC MR 1642 1642 0 9.6 104.4 8.1X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 147 150 2 106.8 9.4 1.0X -ParquetReader Vectorized: DataPageV2 223 226 2 70.5 14.2 0.7X -ParquetReader Vectorized -> Row: DataPageV1 140 141 1 112.4 8.9 1.1X -ParquetReader Vectorized -> Row: DataPageV2 215 217 2 73.2 13.7 0.7X +ParquetReader Vectorized: DataPageV1 141 143 2 111.9 8.9 1.0X +ParquetReader Vectorized: DataPageV2 209 210 1 75.3 13.3 0.7X +ParquetReader Vectorized -> Row: DataPageV1 138 140 2 113.9 8.8 1.0X +ParquetReader Vectorized -> Row: DataPageV2 207 210 7 76.1 13.1 0.7X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12751 12778 38 1.2 810.7 1.0X -SQL Json 10133 10136 3 1.6 644.3 1.3X -SQL Parquet Vectorized: DataPageV1 284 294 6 55.4 18.1 44.9X -SQL Parquet Vectorized: DataPageV2 260 270 6 60.5 16.5 49.0X -SQL Parquet MR: DataPageV1 2569 2590 29 6.1 163.4 5.0X -SQL Parquet MR: DataPageV2 2079 2081 4 7.6 132.2 6.1X -SQL ORC Vectorized 179 186 7 88.1 11.4 71.4X -SQL ORC MR 1750 1759 13 9.0 111.3 7.3X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 13316 13326 13 1.2 846.6 1.0X +SQL Json 9808 9885 109 1.6 623.6 1.4X +SQL Parquet Vectorized: DataPageV1 290 293 3 54.3 18.4 46.0X +SQL Parquet Vectorized: DataPageV2 235 238 3 66.9 14.9 56.6X +SQL Parquet MR: DataPageV1 2404 2409 7 6.5 152.9 5.5X +SQL Parquet MR: DataPageV2 2007 2030 33 7.8 127.6 6.6X +SQL ORC Vectorized 150 153 3 104.8 9.5 88.7X +SQL ORC MR 1625 1634 13 9.7 103.3 8.2X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 349 351 3 45.1 22.2 1.0X -ParquetReader Vectorized: DataPageV2 296 301 4 53.1 18.8 1.2X -ParquetReader Vectorized -> Row: DataPageV1 352 356 4 44.7 22.4 1.0X -ParquetReader Vectorized -> Row: DataPageV2 299 304 3 52.6 19.0 1.2X +ParquetReader Vectorized: DataPageV1 334 335 2 47.1 21.2 1.0X +ParquetReader Vectorized: DataPageV2 277 279 2 56.9 17.6 1.2X +ParquetReader Vectorized -> Row: DataPageV1 351 355 3 44.8 22.3 1.0X +ParquetReader Vectorized -> Row: DataPageV2 297 303 7 52.9 18.9 1.1X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 13651 13688 52 1.2 867.9 1.0X -SQL Json 11871 11891 28 1.3 754.8 1.1X -SQL Parquet Vectorized: DataPageV1 89 101 7 175.9 5.7 152.7X -SQL Parquet Vectorized: DataPageV2 88 98 11 178.2 5.6 154.6X -SQL Parquet MR: DataPageV1 2118 2168 71 7.4 134.7 6.4X -SQL Parquet MR: DataPageV2 2051 2090 55 7.7 130.4 6.7X -SQL ORC Vectorized 250 267 17 62.9 15.9 54.6X -SQL ORC MR 1782 1784 2 8.8 113.3 7.7X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 13826 13835 13 1.1 879.0 1.0X +SQL Json 11577 11606 40 1.4 736.1 1.2X +SQL Parquet Vectorized: DataPageV1 87 103 11 181.0 5.5 159.1X +SQL Parquet Vectorized: DataPageV2 88 101 7 178.8 5.6 157.2X +SQL Parquet MR: DataPageV1 2072 2075 4 7.6 131.7 6.7X +SQL Parquet MR: DataPageV2 2075 2087 17 7.6 131.9 6.7X +SQL ORC Vectorized 261 273 10 60.2 16.6 52.9X +SQL ORC MR 1720 1726 8 9.1 109.4 8.0X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 143 144 1 110.3 9.1 1.0X -ParquetReader Vectorized: DataPageV2 156 162 15 100.9 9.9 0.9X -ParquetReader Vectorized -> Row: DataPageV1 176 180 5 89.5 11.2 0.8X -ParquetReader Vectorized -> Row: DataPageV2 175 181 8 89.9 11.1 0.8X +ParquetReader Vectorized: DataPageV1 135 138 5 116.9 8.6 1.0X +ParquetReader Vectorized: DataPageV2 134 135 2 117.7 8.5 1.0X +ParquetReader Vectorized -> Row: DataPageV1 149 155 5 105.3 9.5 0.9X +ParquetReader Vectorized -> Row: DataPageV2 133 140 11 118.4 8.4 1.0X -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 13848 14006 224 1.1 880.4 1.0X -SQL Json 12052 12055 5 1.3 766.2 1.1X -SQL Parquet Vectorized: DataPageV1 285 299 17 55.1 18.1 48.6X -SQL Parquet Vectorized: DataPageV2 285 291 5 55.1 18.1 48.5X -SQL Parquet MR: DataPageV1 2531 2535 6 6.2 160.9 5.5X -SQL Parquet MR: DataPageV2 2446 2489 62 6.4 155.5 5.7X -SQL ORC Vectorized 639 654 18 24.6 40.6 21.7X -SQL ORC MR 2166 2172 8 7.3 137.7 6.4X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 14086 14095 13 1.1 895.6 1.0X +SQL Json 11716 11726 14 1.3 744.9 1.2X +SQL Parquet Vectorized: DataPageV1 280 291 8 56.2 17.8 50.3X +SQL Parquet Vectorized: DataPageV2 282 287 4 55.8 17.9 50.0X +SQL Parquet MR: DataPageV1 2479 2498 27 6.3 157.6 5.7X +SQL Parquet MR: DataPageV2 2492 2509 23 6.3 158.4 5.7X +SQL ORC Vectorized 622 628 7 25.3 39.5 22.6X +SQL ORC MR 2084 2093 14 7.5 132.5 6.8X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 352 354 2 44.7 22.4 1.0X -ParquetReader Vectorized: DataPageV2 350 355 5 44.9 22.3 1.0X -ParquetReader Vectorized -> Row: DataPageV1 357 361 7 44.1 22.7 1.0X -ParquetReader Vectorized -> Row: DataPageV2 359 364 8 43.9 22.8 1.0X +ParquetReader Vectorized: DataPageV1 346 348 2 45.4 22.0 1.0X +ParquetReader Vectorized: DataPageV2 347 349 4 45.4 22.0 1.0X +ParquetReader Vectorized -> Row: DataPageV1 355 358 4 44.3 22.6 1.0X +ParquetReader Vectorized -> Row: DataPageV2 354 357 5 44.4 22.5 1.0X ================================================================================================ SQL Single Numeric Column Scan in Struct ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single TINYINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2020 2040 29 7.8 128.4 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2001 2002 1 7.9 127.2 1.0X -SQL ORC Vectorized (Nested Column Enabled) 110 120 22 143.6 7.0 18.4X -SQL Parquet MR: DataPageV1 2442 2459 24 6.4 155.2 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2640 2658 25 6.0 167.9 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 105 110 6 149.8 6.7 19.2X -SQL Parquet MR: DataPageV2 2305 2310 7 6.8 146.5 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2559 2569 13 6.1 162.7 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 105 112 7 149.5 6.7 19.2X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2210 2239 41 7.1 140.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2196 2226 43 7.2 139.6 1.0X +SQL ORC Vectorized (Nested Column Enabled) 106 138 35 148.1 6.8 20.8X +SQL Parquet MR: DataPageV1 2436 2446 14 6.5 154.9 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2790 2819 40 5.6 177.4 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 107 113 7 146.4 6.8 20.6X +SQL Parquet MR: DataPageV2 2308 2310 4 6.8 146.7 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2855 2862 9 5.5 181.5 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 125 137 11 125.9 7.9 17.7X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single SMALLINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2138 2186 67 7.4 135.9 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2134 2136 3 7.4 135.7 1.0X -SQL ORC Vectorized (Nested Column Enabled) 271 279 9 57.9 17.3 7.9X -SQL Parquet MR: DataPageV1 2547 2559 17 6.2 162.0 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2755 2767 16 5.7 175.2 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 135 142 7 116.7 8.6 15.9X -SQL Parquet MR: DataPageV2 2405 2413 12 6.5 152.9 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2615 2623 11 6.0 166.2 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 251 268 12 62.7 15.9 8.5X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2174 2175 2 7.2 138.2 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2170 2183 19 7.2 137.9 1.0X +SQL ORC Vectorized (Nested Column Enabled) 272 279 7 57.7 17.3 8.0X +SQL Parquet MR: DataPageV1 2539 2547 11 6.2 161.4 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2723 2741 25 5.8 173.1 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 131 140 8 119.7 8.4 16.5X +SQL Parquet MR: DataPageV2 2430 2430 0 6.5 154.5 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2748 2749 2 5.7 174.7 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 244 254 8 64.4 15.5 8.9X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single INT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2178 2216 53 7.2 138.5 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2123 2124 2 7.4 135.0 1.0X -SQL ORC Vectorized (Nested Column Enabled) 277 283 4 56.8 17.6 7.9X -SQL Parquet MR: DataPageV1 2534 2560 36 6.2 161.1 0.9X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3022 3030 12 5.2 192.1 0.7X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 134 148 10 117.5 8.5 16.3X -SQL Parquet MR: DataPageV2 2431 2444 19 6.5 154.5 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2932 2937 7 5.4 186.4 0.7X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 272 279 6 57.8 17.3 8.0X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2156 2188 46 7.3 137.1 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2176 2228 73 7.2 138.4 1.0X +SQL ORC Vectorized (Nested Column Enabled) 272 295 19 57.8 17.3 7.9X +SQL Parquet MR: DataPageV1 2542 2544 3 6.2 161.6 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2963 2973 14 5.3 188.4 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 135 144 9 116.8 8.6 16.0X +SQL Parquet MR: DataPageV2 2393 2412 28 6.6 152.1 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2939 2942 4 5.4 186.9 0.7X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 267 275 7 58.9 17.0 8.1X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single BIGINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2245 2266 30 7.0 142.7 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2240 2276 51 7.0 142.4 1.0X -SQL ORC Vectorized (Nested Column Enabled) 275 283 6 57.1 17.5 8.2X -SQL Parquet MR: DataPageV1 3008 3011 3 5.2 191.3 0.7X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3419 3437 26 4.6 217.4 0.7X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 309 318 8 50.8 19.7 7.3X -SQL Parquet MR: DataPageV2 2463 2465 3 6.4 156.6 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2768 2770 3 5.7 176.0 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 281 293 11 56.0 17.8 8.0X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2236 2261 35 7.0 142.2 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2212 2256 63 7.1 140.6 1.0X +SQL ORC Vectorized (Nested Column Enabled) 279 294 17 56.3 17.8 8.0X +SQL Parquet MR: DataPageV1 2785 2796 15 5.6 177.1 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3213 3327 162 4.9 204.3 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 308 321 10 51.1 19.6 7.3X +SQL Parquet MR: DataPageV2 2454 2496 59 6.4 156.0 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2719 2744 36 5.8 172.9 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 278 285 3 56.6 17.7 8.0X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single FLOAT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2370 2465 133 6.6 150.7 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2342 2348 9 6.7 148.9 1.0X -SQL ORC Vectorized (Nested Column Enabled) 358 365 7 43.9 22.8 6.6X -SQL Parquet MR: DataPageV1 2456 2472 22 6.4 156.2 1.0X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3180 3192 16 4.9 202.2 0.7X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 98 118 12 159.9 6.3 24.1X -SQL Parquet MR: DataPageV2 2389 2405 24 6.6 151.9 1.0X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2901 2921 28 5.4 184.4 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 95 123 19 165.4 6.0 24.9X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL ORC MR 2286 2327 57 6.9 145.4 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2290 2299 13 6.9 145.6 1.0X +SQL ORC Vectorized (Nested Column Enabled) 356 385 18 44.2 22.6 6.4X +SQL Parquet MR: DataPageV1 2374 2410 51 6.6 150.9 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3159 3169 14 5.0 200.8 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 103 122 14 153.3 6.5 22.3X +SQL Parquet MR: DataPageV2 2446 2456 14 6.4 155.5 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3008 3010 3 5.2 191.3 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 93 107 10 169.1 5.9 24.6X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Single DOUBLE Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 2601 2612 15 6.0 165.4 1.0X -SQL ORC Vectorized (Nested Column Disabled) 2623 2659 50 6.0 166.8 1.0X -SQL ORC Vectorized (Nested Column Enabled) 771 779 10 20.4 49.0 3.4X -SQL Parquet MR: DataPageV1 2844 2858 20 5.5 180.8 0.9X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3209 3210 1 4.9 204.0 0.8X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 295 309 11 53.3 18.8 8.8X -SQL Parquet MR: DataPageV2 2815 2846 43 5.6 179.0 0.9X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3129 3131 4 5.0 198.9 0.8X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 291 301 9 54.0 18.5 8.9X +SQL ORC MR 2626 2658 45 6.0 167.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2738 2746 11 5.7 174.1 1.0X +SQL ORC Vectorized (Nested Column Enabled) 778 779 1 20.2 49.5 3.4X +SQL Parquet MR: DataPageV1 2911 2911 1 5.4 185.0 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3340 3354 19 4.7 212.4 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 298 310 9 52.7 19.0 8.8X +SQL Parquet MR: DataPageV2 2959 2966 11 5.3 188.1 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3281 3289 10 4.8 208.6 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 297 305 8 52.9 18.9 8.8X ================================================================================================ SQL Nested Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor SQL Nested Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------------------------- -SQL ORC MR 13352 13527 128 0.1 12733.5 1.0X -SQL ORC Vectorized (Nested Column Disabled) 13197 13408 116 0.1 12586.0 1.0X -SQL ORC Vectorized (Nested Column Enabled) 7152 7179 14 0.1 6821.1 1.9X -SQL Parquet MR: DataPageV1 8592 8670 52 0.1 8193.9 1.6X -SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 8899 8930 21 0.1 8486.5 1.5X -SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 6001 6072 37 0.2 5723.1 2.2X -SQL Parquet MR: DataPageV2 9493 9541 33 0.1 9053.2 1.4X -SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 9888 9953 46 0.1 9430.0 1.4X -SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 5789 5861 44 0.2 5520.5 2.3X +SQL ORC MR 13102 13223 110 0.1 12495.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 12894 13024 101 0.1 12296.2 1.0X +SQL ORC Vectorized (Nested Column Enabled) 7180 7220 36 0.1 6847.0 1.8X +SQL Parquet MR: DataPageV1 8625 8658 23 0.1 8225.2 1.5X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 9197 9324 94 0.1 8771.2 1.4X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 5862 6041 81 0.2 5590.5 2.2X +SQL Parquet MR: DataPageV2 9564 9731 184 0.1 9120.6 1.4X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 9814 9865 50 0.1 9359.5 1.3X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 5651 5735 38 0.2 5389.3 2.3X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12019 12103 118 0.9 1146.2 1.0X -SQL Json 10690 10720 43 1.0 1019.5 1.1X -SQL Parquet Vectorized: DataPageV1 1803 1814 16 5.8 172.0 6.7X -SQL Parquet Vectorized: DataPageV2 2011 2011 0 5.2 191.8 6.0X -SQL Parquet MR: DataPageV1 4075 4076 1 2.6 388.6 2.9X -SQL Parquet MR: DataPageV2 3989 3994 7 2.6 380.4 3.0X -SQL ORC Vectorized 1807 1835 40 5.8 172.3 6.7X -SQL ORC MR 3618 3641 33 2.9 345.0 3.3X +SQL CSV 12381 12387 8 0.8 1180.8 1.0X +SQL Json 10369 10422 75 1.0 988.8 1.2X +SQL Parquet Vectorized: DataPageV1 1801 1809 12 5.8 171.8 6.9X +SQL Parquet Vectorized: DataPageV2 2010 2024 21 5.2 191.7 6.2X +SQL Parquet MR: DataPageV1 3932 3944 16 2.7 375.0 3.1X +SQL Parquet MR: DataPageV2 4029 4043 20 2.6 384.2 3.1X +SQL ORC Vectorized 1838 1839 2 5.7 175.3 6.7X +SQL ORC MR 3529 3549 28 3.0 336.5 3.5X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7126 7147 30 1.5 679.6 1.0X -SQL Json 6763 6775 17 1.6 645.0 1.1X -SQL Parquet Vectorized: DataPageV1 409 423 24 25.6 39.0 17.4X -SQL Parquet Vectorized: DataPageV2 407 414 12 25.8 38.8 17.5X -SQL Parquet MR: DataPageV1 1744 1753 12 6.0 166.3 4.1X -SQL Parquet MR: DataPageV2 1692 1696 6 6.2 161.4 4.2X -SQL ORC Vectorized 390 400 8 26.9 37.2 18.3X -SQL ORC MR 1782 1782 1 5.9 169.9 4.0X +SQL CSV 7396 7452 80 1.4 705.4 1.0X +SQL Json 6836 6847 14 1.5 652.0 1.1X +SQL Parquet Vectorized: DataPageV1 468 474 5 22.4 44.6 15.8X +SQL Parquet Vectorized: DataPageV2 458 475 12 22.9 43.7 16.1X +SQL Parquet MR: DataPageV1 1621 1625 4 6.5 154.6 4.6X +SQL Parquet MR: DataPageV2 1645 1654 13 6.4 156.8 4.5X +SQL ORC Vectorized 390 395 3 26.9 37.2 19.0X +SQL ORC MR 1787 1791 5 5.9 170.4 4.1X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 13184 13430 348 1.2 838.2 1.0X -Data column - Json 9953 9957 5 1.6 632.8 1.3X -Data column - Parquet Vectorized: DataPageV1 111 132 16 141.9 7.0 118.9X -Data column - Parquet Vectorized: DataPageV2 270 300 19 58.2 17.2 48.8X -Data column - Parquet MR: DataPageV1 2512 2557 63 6.3 159.7 5.2X -Data column - Parquet MR: DataPageV2 2598 2610 16 6.1 165.2 5.1X -Data column - ORC Vectorized 141 147 4 111.4 9.0 93.4X -Data column - ORC MR 2062 2066 5 7.6 131.1 6.4X -Partition column - CSV 3641 3672 44 4.3 231.5 3.6X -Partition column - Json 8624 8624 0 1.8 548.3 1.5X -Partition column - Parquet Vectorized: DataPageV1 33 36 4 482.0 2.1 404.0X -Partition column - Parquet Vectorized: DataPageV2 33 36 4 482.4 2.1 404.4X -Partition column - Parquet MR: DataPageV1 1259 1261 4 12.5 80.0 10.5X -Partition column - Parquet MR: DataPageV2 1260 1265 8 12.5 80.1 10.5X -Partition column - ORC Vectorized 34 38 4 459.0 2.2 384.8X -Partition column - ORC MR 1301 1310 12 12.1 82.7 10.1X -Both columns - CSV 13277 13308 43 1.2 844.1 1.0X -Both columns - Json 10664 10675 14 1.5 678.0 1.2X -Both columns - Parquet Vectorized: DataPageV1 116 146 24 135.4 7.4 113.5X -Both columns - Parquet Vectorized: DataPageV2 293 319 25 53.7 18.6 45.0X -Both columns - Parquet MR: DataPageV1 2515 2553 55 6.3 159.9 5.2X -Both columns - Parquet MR: DataPageV2 2568 2613 64 6.1 163.3 5.1X -Both columns - ORC Vectorized 178 185 12 88.6 11.3 74.3X -Both columns - ORC MR 2134 2147 19 7.4 135.7 6.2X +Data column - CSV 13711 13750 55 1.1 871.7 1.0X +Data column - Json 9919 9951 44 1.6 630.7 1.4X +Data column - Parquet Vectorized: DataPageV1 111 130 16 142.2 7.0 124.0X +Data column - Parquet Vectorized: DataPageV2 259 274 9 60.7 16.5 52.9X +Data column - Parquet MR: DataPageV1 2372 2381 13 6.6 150.8 5.8X +Data column - Parquet MR: DataPageV2 2337 2339 4 6.7 148.6 5.9X +Data column - ORC Vectorized 139 162 16 113.0 8.9 98.5X +Data column - ORC MR 2068 2078 15 7.6 131.4 6.6X +Partition column - CSV 3797 3846 69 4.1 241.4 3.6X +Partition column - Json 8388 8396 10 1.9 533.3 1.6X +Partition column - Parquet Vectorized: DataPageV1 32 35 4 498.4 2.0 434.5X +Partition column - Parquet Vectorized: DataPageV2 31 35 4 500.3 2.0 436.1X +Partition column - Parquet MR: DataPageV1 1241 1242 1 12.7 78.9 11.1X +Partition column - Parquet MR: DataPageV2 1222 1224 3 12.9 77.7 11.2X +Partition column - ORC Vectorized 30 33 3 531.0 1.9 462.9X +Partition column - ORC MR 1232 1241 13 12.8 78.3 11.1X +Both columns - CSV 13510 13516 9 1.2 858.9 1.0X +Both columns - Json 10324 10374 71 1.5 656.4 1.3X +Both columns - Parquet Vectorized: DataPageV1 121 144 18 130.3 7.7 113.6X +Both columns - Parquet Vectorized: DataPageV2 259 274 16 60.8 16.4 53.0X +Both columns - Parquet MR: DataPageV1 2338 2356 25 6.7 148.7 5.9X +Both columns - Parquet MR: DataPageV2 2320 2322 2 6.8 147.5 5.9X +Both columns - ORC Vectorized 177 193 17 89.1 11.2 77.7X +Both columns - ORC MR 2109 2135 36 7.5 134.1 6.5X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 8541 8543 2 1.2 814.5 1.0X -SQL Json 8519 8526 10 1.2 812.4 1.0X -SQL Parquet Vectorized: DataPageV1 1164 1188 35 9.0 111.0 7.3X -SQL Parquet Vectorized: DataPageV2 1463 1478 22 7.2 139.5 5.8X -SQL Parquet MR: DataPageV1 3599 3615 22 2.9 343.3 2.4X -SQL Parquet MR: DataPageV2 3701 3730 41 2.8 352.9 2.3X -ParquetReader Vectorized: DataPageV1 792 801 13 13.2 75.5 10.8X -ParquetReader Vectorized: DataPageV2 1051 1052 1 10.0 100.3 8.1X -SQL ORC Vectorized 866 883 16 12.1 82.6 9.9X -SQL ORC MR 2875 2879 6 3.6 274.2 3.0X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 8866 8885 26 1.2 845.5 1.0X +SQL Json 9201 9207 8 1.1 877.5 1.0X +SQL Parquet Vectorized: DataPageV1 1286 1291 6 8.2 122.7 6.9X +SQL Parquet Vectorized: DataPageV2 1554 1566 17 6.7 148.2 5.7X +SQL Parquet MR: DataPageV1 3482 3506 34 3.0 332.1 2.5X +SQL Parquet MR: DataPageV2 3607 3635 40 2.9 344.0 2.5X +ParquetReader Vectorized: DataPageV1 792 794 2 13.2 75.5 11.2X +ParquetReader Vectorized: DataPageV2 1116 1123 10 9.4 106.5 7.9X +SQL ORC Vectorized 912 934 20 11.5 87.0 9.7X +SQL ORC MR 2987 3000 18 3.5 284.9 3.0X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 6052 6056 6 1.7 577.1 1.0X -SQL Json 7570 7581 15 1.4 722.0 0.8X -SQL Parquet Vectorized: DataPageV1 831 836 5 12.6 79.2 7.3X -SQL Parquet Vectorized: DataPageV2 990 999 15 10.6 94.4 6.1X -SQL Parquet MR: DataPageV1 2848 2869 29 3.7 271.6 2.1X -SQL Parquet MR: DataPageV2 3037 3056 27 3.5 289.6 2.0X -ParquetReader Vectorized: DataPageV1 713 723 17 14.7 68.0 8.5X -ParquetReader Vectorized: DataPageV2 874 881 9 12.0 83.3 6.9X -SQL ORC Vectorized 961 965 5 10.9 91.7 6.3X -SQL ORC MR 2713 2716 5 3.9 258.7 2.2X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 6247 6258 16 1.7 595.8 1.0X +SQL Json 7887 7902 22 1.3 752.1 0.8X +SQL Parquet Vectorized: DataPageV1 824 836 19 12.7 78.5 7.6X +SQL Parquet Vectorized: DataPageV2 1027 1033 10 10.2 97.9 6.1X +SQL Parquet MR: DataPageV1 2799 2799 0 3.7 266.9 2.2X +SQL Parquet MR: DataPageV2 2883 2893 15 3.6 274.9 2.2X +ParquetReader Vectorized: DataPageV1 740 741 1 14.2 70.6 8.4X +ParquetReader Vectorized: DataPageV2 905 906 1 11.6 86.3 6.9X +SQL ORC Vectorized 983 986 3 10.7 93.8 6.4X +SQL ORC MR 2738 2741 4 3.8 261.1 2.3X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4168 4189 29 2.5 397.5 1.0X -SQL Json 5644 5662 25 1.9 538.2 0.7X -SQL Parquet Vectorized: DataPageV1 161 167 5 65.0 15.4 25.8X -SQL Parquet Vectorized: DataPageV2 187 191 2 55.9 17.9 22.2X -SQL Parquet MR: DataPageV1 1836 1837 1 5.7 175.1 2.3X -SQL Parquet MR: DataPageV2 1793 1812 26 5.8 171.0 2.3X -ParquetReader Vectorized: DataPageV1 169 170 1 62.1 16.1 24.7X -ParquetReader Vectorized: DataPageV2 193 195 2 54.2 18.4 21.6X -SQL ORC Vectorized 300 305 6 35.0 28.6 13.9X -SQL ORC MR 1583 1587 6 6.6 151.0 2.6X +SQL CSV 4395 4398 4 2.4 419.2 1.0X +SQL Json 5649 5663 20 1.9 538.7 0.8X +SQL Parquet Vectorized: DataPageV1 164 170 7 64.1 15.6 26.9X +SQL Parquet Vectorized: DataPageV2 186 190 4 56.4 17.7 23.6X +SQL Parquet MR: DataPageV1 1769 1771 2 5.9 168.7 2.5X +SQL Parquet MR: DataPageV2 1721 1730 13 6.1 164.2 2.6X +ParquetReader Vectorized: DataPageV1 169 170 2 62.1 16.1 26.0X +ParquetReader Vectorized: DataPageV2 193 195 2 54.3 18.4 22.8X +SQL ORC Vectorized 313 316 3 33.5 29.9 14.0X +SQL ORC MR 1580 1592 18 6.6 150.6 2.8X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 1226 1236 14 0.9 1169.0 1.0X -SQL Json 1883 1894 15 0.6 1796.2 0.7X -SQL Parquet Vectorized: DataPageV1 24 28 4 42.8 23.3 50.1X -SQL Parquet Vectorized: DataPageV2 34 37 3 31.1 32.2 36.3X -SQL Parquet MR: DataPageV1 173 177 3 6.1 164.9 7.1X -SQL Parquet MR: DataPageV2 166 185 33 6.3 158.1 7.4X -SQL ORC Vectorized 29 33 5 36.2 27.6 42.3X -SQL ORC MR 128 132 4 8.2 122.5 9.5X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 1197 1198 1 0.9 1141.7 1.0X +SQL Json 1855 1857 3 0.6 1769.2 0.6X +SQL Parquet Vectorized: DataPageV1 25 29 4 41.4 24.2 47.3X +SQL Parquet Vectorized: DataPageV2 34 37 5 30.9 32.4 35.2X +SQL Parquet MR: DataPageV1 160 167 6 6.6 152.7 7.5X +SQL Parquet MR: DataPageV2 154 158 4 6.8 146.7 7.8X +SQL ORC Vectorized 29 32 3 36.6 27.3 41.8X +SQL ORC MR 135 148 37 7.8 128.3 8.9X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2719 2722 5 0.4 2592.8 1.0X -SQL Json 6594 6627 47 0.2 6288.5 0.4X -SQL Parquet Vectorized: DataPageV1 28 32 4 37.8 26.4 98.1X -SQL Parquet Vectorized: DataPageV2 37 41 5 28.5 35.1 73.8X -SQL Parquet MR: DataPageV1 173 176 3 6.1 165.2 15.7X -SQL Parquet MR: DataPageV2 167 172 3 6.3 159.4 16.3X -SQL ORC Vectorized 32 35 4 32.4 30.9 84.0X -SQL ORC MR 133 137 4 7.9 126.9 20.4X - -OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure +SQL CSV 2630 2651 29 0.4 2508.3 1.0X +SQL Json 6628 6696 96 0.2 6321.0 0.4X +SQL Parquet Vectorized: DataPageV1 29 33 4 36.2 27.6 90.8X +SQL Parquet Vectorized: DataPageV2 38 41 4 27.7 36.1 69.4X +SQL Parquet MR: DataPageV1 164 167 2 6.4 156.9 16.0X +SQL Parquet MR: DataPageV2 160 165 4 6.5 152.9 16.4X +SQL ORC Vectorized 33 36 4 31.6 31.6 79.3X +SQL ORC MR 141 145 6 7.5 134.2 18.7X + +OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure AMD EPYC 7763 64-Core Processor Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4508 4548 57 0.2 4298.8 1.0X -SQL Json 12482 12569 123 0.1 11903.9 0.4X -SQL Parquet Vectorized: DataPageV1 35 39 4 29.7 33.7 127.7X -SQL Parquet Vectorized: DataPageV2 44 47 3 23.7 42.3 101.7X -SQL Parquet MR: DataPageV1 185 189 4 5.7 176.6 24.3X -SQL Parquet MR: DataPageV2 178 184 5 5.9 169.8 25.3X -SQL ORC Vectorized 40 44 5 26.4 37.8 113.6X -SQL ORC MR 140 145 5 7.5 133.6 32.2X +SQL CSV 4436 4536 141 0.2 4230.6 1.0X +SQL Json 12445 12624 253 0.1 11868.7 0.4X +SQL Parquet Vectorized: DataPageV1 36 39 4 29.2 34.3 123.5X +SQL Parquet Vectorized: DataPageV2 46 49 3 23.0 43.5 97.3X +SQL Parquet MR: DataPageV1 176 182 4 6.0 167.8 25.2X +SQL Parquet MR: DataPageV2 172 180 7 6.1 164.4 25.7X +SQL ORC Vectorized 39 43 4 26.8 37.3 113.6X +SQL ORC MR 148 154 11 7.1 141.5 29.9X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala index 027477a8291ae..bc16a69475106 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala @@ -487,7 +487,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "32", SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000") { // Test that the max scan size rather than an individual scan size on the filter - // application side matters. `bf5filtered` has 14168 bytes and `bf2` has 3409 bytes. + // application side matters. `bf5filtered` has 15049 bytes and `bf2` has 3409 bytes. withSQLConf( SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "5000") { assertRewroteWithBloomFilter("select * from " + @@ -495,7 +495,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp "join bf3 on t.c5 = bf3.c3 where bf3.a3 = 5", 2) } withSQLConf( - SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "15000") { + SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "16000") { assertDidNotRewriteWithBloomFilter("select * from " + "(select * from bf5filtered union all select * from bf2) t " + "join bf3 on t.c5 = bf3.c3 where bf3.a3 = 5") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala index 35e1a38376dd8..f2d04a9c28f2a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala @@ -502,7 +502,7 @@ class ParquetVectorizedSuite extends QueryTest with ParquetTest with SharedSpark val ty = parquetSchema.asGroupType().getType("a").asPrimitiveType() val cd = new ColumnDescriptor(Seq("a").toArray, ty, 0, maxDef) val repetitionLevels = Array.fill[Int](inputValues.length)(0) - val definitionLevels = inputValues.map(v => if (v == null) 0 else 1) + val definitionLevels = inputValues.map(v => if (v == null) 0 else maxDef) val memPageStore = new MemPageStore(expectedValues.length) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index adc4dd899500f..7dc7fc41dc708 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -1603,7 +1603,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto Seq(tbl, ext_tbl).foreach { tblName => sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')") - val expectedSize = 657 + val expectedSize = 690 // analyze table sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN") var tableStats = getTableStats(tblName)