diff --git a/assembly/pom.xml b/assembly/pom.xml
index c9aef1dd703d5..38aca5b450269 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 1269eb4d9cf38..c0ae7947cdebf 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index f285b3edf37f4..8c05cd9af70e1 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index df08788451fac..a311f2d0bf380 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index d1b6ab1d4539b..0b7dd909f96ad 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index c497e5673b4de..4958528313f0f 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 324cb14b66151..23b63740fea44 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index ef03eb52044e0..02ac2deb5eed1 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/core/pom.xml b/core/pom.xml
index d3defa86a159f..e38ead3ee15de 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/examples/pom.xml b/examples/pom.xml
index a32bbe3fc60ac..5e33c2f099535 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/external/avro/pom.xml b/external/avro/pom.xml
index 6fb062cd68520..6935cdc4e483a 100644
--- a/external/avro/pom.xml
+++ b/external/avro/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index ccf92dbaac068..60369002628f6 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index f761586cf1e28..18e7874aa470b 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index f9657e75421f8..8899350b8eaa8 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index bfaa7df88830f..915517563b1be 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index 8fd6a36f59d39..4774ea1cc5102 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 66aa7877278c6..665dd6a333385 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 4582047b0bce9..69b082c2d159f 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml
index 7915361f4d1ce..67518221b22c1 100644
--- a/external/kafka-0-8-assembly/pom.xml
+++ b/external/kafka-0-8-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml
index 5448156d096b9..34d09c23cec0f 100644
--- a/external/kafka-0-8/pom.xml
+++ b/external/kafka-0-8/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 6f3ead11da275..8496787e5d24f 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index de16094d22fa2..ad89d4effc6ca 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 28930d077c807..6b1f6ad262ef2 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8227de9e3c73e..ae2e736b29df0 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 4104e0f52e070..d3f1bd009042b 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 5c8e2d77f4171..8cd7e00a6a659 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index b0c3a680e95cb..209da0c9e6b60 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/mllib/pom.xml b/mllib/pom.xml
index d9c3dac1b4bd9..0b9e1d0929135 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/pom.xml b/pom.xml
index 214842c1232ba..026d305f60230 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
pom
Spark Project Parent POM
http://spark.apache.org/
@@ -130,7 +130,7 @@
1.2.1
10.12.1.1
- 1.12.0-kylin-r2
+ 1.12.0-kylin-r3
7.0.13
1.5.2
nohive
diff --git a/repl/pom.xml b/repl/pom.xml
index 1f7135a698bdf..cf21e83277b07 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index eddbd16825f85..71f349c48bd8b 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../../pom.xml
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index dedae69605ab1..34433db226353 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../../pom.xml
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 1d1006f443a29..acb83467baea2 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d2f9bf0504b9a..96c05b23c425d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index b68e47df90d17..d85293e9a4c25 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7d24a5138892d..02c0424c5c1a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -351,6 +351,11 @@ object SQLConf {
val INT96, TIMESTAMP_MICROS, TIMESTAMP_MILLIS = Value
}
+ val PARQUET_CELL_SIZE_LIMIT = buildConf("spark.sql.parquet.cellSizeLimit")
+ .doc(s"Parquet file cell size limit. default 512 * 1024")
+ .longConf
+ .createWithDefault(512 * 1024)
+
val PARQUET_OUTPUT_TIMESTAMP_TYPE = buildConf("spark.sql.parquet.outputTimestampType")
.doc("Sets which Parquet timestamp type to use when Spark writes data to Parquet files. " +
"INT96 is a non-standard but commonly used timestamp type in Parquet. TIMESTAMP_MICROS " +
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 1c7e48fd6b3bb..727a8a0c71b08 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index b40b8c2e61f33..20239ee7af300 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -68,6 +68,8 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
// Which parquet timestamp type to use when writing.
private var outputTimestampType: SQLConf.ParquetOutputTimestampType.Value = _
+ private val parquetCellSizeLimit = SQLConf.PARQUET_CELL_SIZE_LIMIT.defaultValue.get
+
// Reusable byte array used to write timestamps as Parquet INT96 values
private val timestampBuffer = new Array[Byte](12)
@@ -115,6 +117,14 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
}
}
+ @inline private def checkCellSize(cellLength: Long): Unit = {
+ if (cellLength >= parquetCellSizeLimit) {
+ logInfo(s"single cell size: $cellLength ")
+ logInfo(s"spark.sql.parquet.cellSizeLimit: $parquetCellSizeLimit ")
+ this.needCheckRowSize = true
+ }
+ }
+
private def writeFields(
row: InternalRow, schema: StructType, fieldWriters: Array[ValueWriter]): Unit = {
var i = 0
@@ -160,8 +170,10 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
case StringType =>
(row: SpecializedGetters, ordinal: Int) =>
+ val bytes = row.getUTF8String(ordinal).getBytes
+ checkCellSize(bytes.length)
recordConsumer.addBinary(
- Binary.fromReusedByteArray(row.getUTF8String(ordinal).getBytes))
+ Binary.fromReusedByteArray(bytes))
case TimestampType =>
outputTimestampType match {
@@ -184,7 +196,9 @@ private[parquet] class ParquetWriteSupport extends WriteSupport[InternalRow] wit
case BinaryType =>
(row: SpecializedGetters, ordinal: Int) =>
- recordConsumer.addBinary(Binary.fromReusedByteArray(row.getBinary(ordinal)))
+ val bytes = row.getBinary(ordinal)
+ checkCellSize(bytes.length)
+ recordConsumer.addBinary(Binary.fromReusedByteArray(bytes))
case DecimalType.Fixed(precision, scale) =>
makeDecimalWriter(precision, scale)
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index babb6f12fc108..2b908c427c19d 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index be9c5aca4c367..3665c7e7d497c 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../../pom.xml
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 8f0c26570ee9e..96effd5b12476 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml
diff --git a/tools/pom.xml b/tools/pom.xml
index 73f2293d07b72..4dbbaa7d3e232 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
org.apache.spark
spark-parent_2.11
- 2.4.1-kylin-r13
+ 2.4.1-kylin-r14
../pom.xml