diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 18316a6ebb4d..92b79e3ce56b 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -496,16 +496,21 @@ readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::T const auto * orc_str_column = dynamic_cast(orc_column); size_t reserver_size = 0; for (size_t i = 0; i < orc_str_column->numElements; ++i) - reserver_size += orc_str_column->length[i] + 1; + { + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) + reserver_size += orc_str_column->length[i]; + reserver_size += 1; + } + column_chars_t.reserve(reserver_size); column_offsets.reserve(orc_str_column->numElements); size_t curr_offset = 0; for (size_t i = 0; i < orc_str_column->numElements; ++i) { - const auto * buf = orc_str_column->data[i]; - if (buf) + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) { + const auto * buf = orc_str_column->data[i]; size_t buf_size = orc_str_column->length[i]; column_chars_t.insert_assume_reserved(buf, buf + buf_size); curr_offset += buf_size; @@ -531,7 +536,7 @@ readColumnWithFixedStringData(const orc::ColumnVectorBatch * orc_column, const o const auto * orc_str_column = dynamic_cast(orc_column); for (size_t i = 0; i < orc_str_column->numElements; ++i) { - if (orc_str_column->data[i]) + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) column_chars_t.insert_assume_reserved(orc_str_column->data[i], orc_str_column->data[i] + orc_str_column->length[i]); else column_chars_t.resize_fill(column_chars_t.size() + fixed_len); @@ -580,7 +585,7 @@ readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const or for (size_t i = 0; i < orc_str_column->numElements; ++i) { /// If at least one value size is not 16 bytes, fallback to reading String column and further cast to IPv6. - if (orc_str_column->data[i] && orc_str_column->length[i] != sizeof(IPv6)) + if ((!orc_str_column->hasNulls || orc_str_column->notNull[i]) && orc_str_column->length[i] != sizeof(IPv6)) return readColumnWithStringData(orc_column, orc_type, column_name); } @@ -591,10 +596,10 @@ readIPv6ColumnFromBinaryData(const orc::ColumnVectorBatch * orc_column, const or for (size_t i = 0; i < orc_str_column->numElements; ++i) { - if (!orc_str_column->data[i]) [[unlikely]] - ipv6_column.insertDefault(); - else + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) ipv6_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); + else + ipv6_column.insertDefault(); } return {std::move(internal_column), std::move(internal_type), column_name}; @@ -628,9 +633,7 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData( for (size_t i = 0; i < orc_str_column->numElements; ++i) { - if (!orc_str_column->data[i]) [[unlikely]] - integer_column.insertDefault(); - else + if (!orc_str_column->hasNulls || orc_str_column->notNull[i]) { if (sizeof(typename ColumnType::ValueType) != orc_str_column->length[i]) throw Exception( @@ -642,6 +645,10 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData( integer_column.insertData(orc_str_column->data[i], orc_str_column->length[i]); } + else + { + integer_column.insertDefault(); + } } return {std::move(internal_column), column_type, column_name}; }