Skip to content

Commit

Permalink
[bugfix](iceberg)fix read NULL with date partition (apache#30478)
Browse files Browse the repository at this point in the history
* fix date

* fix date

* add case
  • Loading branch information
wuwenchi authored Jan 30, 2024
1 parent a937c0e commit d9e9926
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.types.Conversions;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.DateTimeUtil;
import org.apache.iceberg.util.TableScanUtil;

import java.io.IOException;
Expand Down Expand Up @@ -220,11 +223,23 @@ private List<Split> doGetSplits() throws UserException {
List<String> partitionValues = new ArrayList<>();
if (isPartitionedTable) {
StructLike structLike = splitTask.file().partition();
List<PartitionField> fields = splitTask.spec().fields();
Types.StructType structType = icebergTable.schema().asStruct();

// set partitionValue for this IcebergSplit
for (int i = 0; i < structLike.size(); i++) {
String partition = String.valueOf(structLike.get(i, Object.class));
partitionValues.add(partition);
Object obj = structLike.get(i, Object.class);
String value = String.valueOf(obj);
PartitionField partitionField = fields.get(i);
if (partitionField.transform().isIdentity()) {
Type type = structType.fieldType(partitionField.name());
if (type != null && type.typeId().equals(Type.TypeID.DATE)) {
// iceberg use integer to store date,
// we need transform it to string
value = DateTimeUtil.daysToIsoDate((Integer) obj);
}
}
partitionValues.add(value);
}

// Counts the number of partitions read
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,36 @@
-- !q09 --
100 0.3 test3 2023-01-03T00:00

-- !q01 --
1 true 2020-01-02 2020-01-02T01:02:03.123456

-- !q02 --
1 true 2020-01-02 2020-01-02T01:02:03.123456

-- !q03 --
1 true 2020-01-02 2020-01-02T01:02:03.123456

-- !q04 --
1 true 2020-01-02 2020-01-02T01:02:03.123456

-- !q05 --
1 true 2020-01-02 2020-01-02T01:02:03.123456

-- !q06 --
1 2020-01-02

-- !q07 --
1 2020-01-02

-- !q08 --
1 2020-01-02

-- !q09 --
1 2020-01-02

-- !q10 --
1 2020-01-02

-- !q11 --
1 2020-01-02

Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,30 @@ suite("test_external_catalog_iceberg_partition", "p2,external,iceberg,external_r
qt_q08 """ select * from iceberg_catalog.orc_partitioned_truncate_and_fields where t_int is null order by t_float """
qt_q09 """ select * from iceberg_catalog.orc_partitioned_truncate_and_fields where t_int is not null order by t_float """
}

// test date for partition and predict
def q01_date = {

qt_q01 """ select * from user_case_date_without_partition where d = '2020-01-02' """
qt_q02 """ select * from user_case_date_without_partition where d > '2020-01-01' """
qt_q03 """ select * from user_case_date_without_partition where d < '2020-01-03' """
qt_q04 """ select * from user_case_date_without_partition where ts < '2020-01-03' """
qt_q05 """ select * from user_case_date_without_partition where ts > '2020-01-01' """

qt_q06 """ select * from user_case_date_with_date_partition where d = '2020-01-02' """
qt_q07 """ select * from user_case_date_with_date_partition where d < '2020-01-03' """
qt_q08 """ select * from user_case_date_with_date_partition where d > '2020-01-01' """

qt_q09 """ select * from user_case_date_with_days_date_partition where d = '2020-01-02' """
qt_q10 """ select * from user_case_date_with_days_date_partition where d < '2020-01-03' """
qt_q11 """ select * from user_case_date_with_days_date_partition where d > '2020-01-01' """

}

sql """ use `iceberg_catalog`; """
q01_parquet()
q01_orc()
q01_date()
}
}

0 comments on commit d9e9926

Please sign in to comment.