Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix make space failed when space is available #304

Open
wants to merge 59 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
8206619
Folder: scripts, third_party, website, .github
zhejiangxiaomai May 8, 2023
9b0fd0a
Folder: common, connectors
zhejiangxiaomai May 8, 2023
09533ac
Folder: core, duckdb
zhejiangxiaomai May 8, 2023
f9ed9a3
Folder: dwio
zhejiangxiaomai May 8, 2023
f567ffd
Folder: exec
zhejiangxiaomai May 8, 2023
9a66846
Folder: expression
zhejiangxiaomai May 8, 2023
d4b81ae
Folder: functions
zhejiangxiaomai May 8, 2023
197d44a
Folder: row
zhejiangxiaomai May 8, 2023
8f8906f
Folder: substrait
zhejiangxiaomai May 9, 2023
9ae2cdb
Folder: type
zhejiangxiaomai May 9, 2023
06754fd
Folder: vector
zhejiangxiaomai May 9, 2023
b03f9ec
Add back not node (#228)
rui-mo May 10, 2023
fce80c4
enable all tests (#247)
rui-mo May 10, 2023
70b9cfb
comments unstable customPlanNodeWithExchangeClient (#248)
zhejiangxiaomai May 10, 2023
0b50cfd
Update build dependencies (#185)
ccat3z May 11, 2023
217a93b
fix code style. (#252)
Yohahaha May 12, 2023
c22fa76
Add mapping for bit_or and bit_and (#251)
Yohahaha May 15, 2023
b055ddd
Avoid include Abi.h twice (#253)
zhejiangxiaomai May 15, 2023
6a43f9a
add decimal column reader support (#254)
zuochunwei May 15, 2023
61ec655
Support timestamp reader (#205)
rui-mo May 16, 2023
e26f9ef
Fix the intermediate type of First/Last, and support decimal (#245)
Yohahaha May 16, 2023
b711c8e
Removed duplicated memory copy in "upcastScalarValues" (#256)
yimin-yang May 16, 2023
ff91ff0
Added RleEncoderV2 (#240)
yimin-yang May 17, 2023
bde7b6a
[GLUTEN-1434] Serialize and deserialize RowVector (#250)
jinchengchenghh May 17, 2023
812dbd5
Expand timestamps in page reader (#260)
rui-mo May 17, 2023
12be4e3
[GLUTEN-1638] Add Hdfs support in parquet write (#255)
JkSelf May 19, 2023
a159948
Fix the array out of bounds while getting offsets (#257)
jackylee-ch May 22, 2023
9817ce5
whitelist approx_distinct (#270)
zhli1142015 May 22, 2023
3f33535
Support hash for timestamp type (#269)
liujiayi771 May 23, 2023
8f969eb
Add long decimal type support for ORC (#271)
yimin-yang May 23, 2023
c8a6d55
Add processedStrides and processedSplits metrics (#264)
rui-mo May 24, 2023
2f954b1
Refine make decimal to align with spark sql (#272)
JkSelf May 24, 2023
5b1806e
Add hash seed parameter to sparksql hash functions (#275)
marin-ma May 24, 2023
62570a9
Fix type check in MapFunction (#273)
rui-mo May 24, 2023
57ec320
Support spark asinh, acosh, atanh, sec, csc math functions (#274)
Yohahaha May 24, 2023
3e2b6f5
Create folder if not exits on HDFS write
JkSelf May 24, 2023
98d0451
Implement Spark's version of log2, log10 (#266)
zhztheplayer May 25, 2023
82ddf50
Implement Spark's version of atan2 (#263)
zhztheplayer May 25, 2023
0de5f0f
Fix replace SparkSQL function (#277)
izchen May 25, 2023
70898af
Align the implementation for ascii function with spark sql (#268)
PHILO-HE May 25, 2023
23c0569
Fix chr SparkSQL function (#278)
izchen May 25, 2023
97d2829
Fix semantic issues in cast function (#280)
PHILO-HE May 26, 2023
40fcf8f
Fix casting from string to decimal (#281)
rui-mo May 26, 2023
f258c6f
Fix casting from decimal to bool (#283)
rui-mo May 30, 2023
a808b04
remove log (#286)
Yohahaha May 31, 2023
d3da837
Support kPreceeding & kFollowing for window range frame type (#287)
PHILO-HE May 31, 2023
dba81cb
Fix the bug of orc reader test (#288)
zuochunwei May 31, 2023
551e1cd
Enable date type for kPreceeding & kFollowing window range bound (#291)
PHILO-HE Jun 1, 2023
f3d5e0f
Add spark comparison functions (#276)
yma11 Jun 2, 2023
b4f9103
remove unused DoubleValues (#292)
zhejiangxiaomai Jun 5, 2023
ff03bd6
Fix use pre-build arrow (#289)
Yohahaha Jun 6, 2023
5f450bd
Fallback timestamp sort (#295)
rui-mo Jun 6, 2023
2d0dd93
Use correct name in struct type (#297)
rui-mo Jun 8, 2023
e01c54f
[DWIO] refactor the reader of dwrf/orc (#261)
zuochunwei Jun 8, 2023
e3ec2b9
Fallback murmur3hash on complex types (#299)
rui-mo Jun 8, 2023
cd897b1
Optimize the search for bound index in window range frame (#300)
PHILO-HE Jun 9, 2023
7e73041
update dnf cache on centos (#302)
zhouyuan Jun 9, 2023
e595f78
fix make space failed when space is available
jackylee-ch Jun 9, 2023
0a72af2
refresh code
jackylee-ch Jun 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Folder: vector
main changes:

1. Make varchar and varbinary compatible.
2. Add ValueStreamNode operator
zhejiangxiaomai committed May 11, 2023
commit 06754fde2038a69a483779a8386d62e215c99994
10 changes: 5 additions & 5 deletions velox/vector/BaseVector.cpp
Original file line number Diff line number Diff line change
@@ -489,7 +489,7 @@ std::string BaseVector::toString(
}

void BaseVector::ensureWritable(const SelectivityVector& rows) {
auto newSize = std::max<vector_size_t>(rows.end(), length_);
auto newSize = std::max<vector_size_t>(rows.size(), length_);
if (nulls_ && !(nulls_->unique() && nulls_->isMutable())) {
BufferPtr newNulls = AlignedBuffer::allocate<bool>(newSize, pool_);
auto rawNewNulls = newNulls->asMutable<uint64_t>();
@@ -511,9 +511,9 @@ void BaseVector::ensureWritable(
VectorPool* vectorPool) {
if (!result) {
if (vectorPool) {
result = vectorPool->get(type, rows.end());
result = vectorPool->get(type, rows.size());
} else {
result = BaseVector::create(type, rows.end(), pool);
result = BaseVector::create(type, rows.size(), pool);
}
return;
}
@@ -542,7 +542,7 @@ void BaseVector::ensureWritable(

// The copy-on-write size is the max of the writable row set and the
// vector.
auto targetSize = std::max<vector_size_t>(rows.end(), result->size());
auto targetSize = std::max<vector_size_t>(rows.size(), result->size());

VectorPtr copy;
if (vectorPool) {
@@ -605,7 +605,7 @@ VectorPtr BaseVector::createConstant(
variant value,
vector_size_t size,
velox::memory::MemoryPool* pool) {
VELOX_CHECK_EQ(type->kind(), value.kind());
VELOX_CHECK(compatibleKind(type->kind(), value.kind()));
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH_ALL(
newConstant, value.kind(), type, value, size, pool);
}
6 changes: 5 additions & 1 deletion velox/vector/BaseVector.h
Original file line number Diff line number Diff line change
@@ -673,7 +673,11 @@ class BaseVector {
// two unknowns but values cannot be assigned into an unknown 'left'
// from a not-unknown 'right'.
static bool compatibleKind(TypeKind left, TypeKind right) {
return left == right || right == TypeKind::UNKNOWN;
// Vectors of VARCHAR and VARBINARY are compatible with each other.
bool varcharAndBinary =
(left == TypeKind::VARCHAR && right == TypeKind::VARBINARY) ||
(left == TypeKind::VARBINARY && right == TypeKind::VARCHAR);
return left == right || right == TypeKind::UNKNOWN || varcharAndBinary;
}

/// Returns a brief summary of the vector. If 'recursive' is true, includes a
7 changes: 4 additions & 3 deletions velox/vector/ComplexVector.cpp
Original file line number Diff line number Diff line change
@@ -209,7 +209,8 @@ void RowVector::copy(
BufferPtr mappedIndices;
vector_size_t* rawMappedIndices = nullptr;
if (toSourceRow) {
mappedIndices = AlignedBuffer::allocate<vector_size_t>(rows.end(), pool_);
mappedIndices =
AlignedBuffer::allocate<vector_size_t>(rows.size(), pool_);
rawMappedIndices = mappedIndices->asMutable<vector_size_t>();
nonNullRows.applyToSelected(
[&](auto row) { rawMappedIndices[row] = indices[toSourceRow[row]]; });
@@ -687,7 +688,7 @@ std::string ArrayVector::toString(vector_size_t index) const {
}

void ArrayVector::ensureWritable(const SelectivityVector& rows) {
auto newSize = std::max<vector_size_t>(rows.end(), BaseVector::length_);
auto newSize = std::max<vector_size_t>(rows.size(), BaseVector::length_);
if (offsets_ && !offsets_->unique()) {
BufferPtr newOffsets =
AlignedBuffer::allocate<vector_size_t>(newSize, BaseVector::pool_);
@@ -949,7 +950,7 @@ std::string MapVector::toString(vector_size_t index) const {
}

void MapVector::ensureWritable(const SelectivityVector& rows) {
auto newSize = std::max<vector_size_t>(rows.end(), BaseVector::length_);
auto newSize = std::max<vector_size_t>(rows.size(), BaseVector::length_);
if (offsets_ && !offsets_->unique()) {
BufferPtr newOffsets =
AlignedBuffer::allocate<vector_size_t>(newSize, BaseVector::pool_);
34 changes: 34 additions & 0 deletions velox/vector/ComplexVectorStream.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "velox/vector/ComplexVector.h"

namespace facebook::velox {

class RowVectorStream {
public:
RowVectorStream() {}

virtual ~RowVectorStream() = default;

virtual bool hasNext() = 0;

virtual RowVectorPtr next() = 0;
};

} // namespace facebook::velox
2 changes: 1 addition & 1 deletion velox/vector/FlatVector-inl.h
Original file line number Diff line number Diff line change
@@ -349,7 +349,7 @@ void FlatVector<T>::resize(vector_size_t newSize, bool setNotNull) {

template <typename T>
void FlatVector<T>::ensureWritable(const SelectivityVector& rows) {
auto newSize = std::max<vector_size_t>(rows.end(), BaseVector::length_);
auto newSize = std::max<vector_size_t>(rows.size(), BaseVector::length_);
if (values_ && !(values_->unique() && values_->isMutable())) {
BufferPtr newValues;
if constexpr (std::is_same_v<T, StringView>) {
37 changes: 36 additions & 1 deletion velox/vector/arrow/Bridge.cpp
Original file line number Diff line number Diff line change
@@ -954,6 +954,36 @@ VectorPtr importFromArrowImpl(
memory::MemoryPool* pool,
bool isViewer);

VectorPtr createDecimalVector(
memory::MemoryPool* pool,
const TypePtr& type,
BufferPtr nulls,
const ArrowSchema& arrowSchema,
const ArrowArray& arrowArray,
WrapInBufferViewFunc wrapInBufferView) {
auto valueBuf = wrapInBufferView(
arrowArray.buffers[1], arrowArray.length * sizeof(int128_t));

auto dst = valueBuf->as<uint8_t>();

VectorPtr base = BaseVector::create(type, arrowArray.length, pool);
base->setNulls(nulls);

auto flatVector =
std::dynamic_pointer_cast<FlatVector<UnscaledShortDecimal>>(base);

for (int i = 0; i < arrowArray.length; i++) {
int128_t result;
memcpy(&result, dst + i * sizeof(int128_t), sizeof(int128_t));
int64_t value = static_cast<int64_t>(result);
if (!base->isNullAt(i)) {
flatVector->set(i, UnscaledShortDecimal(static_cast<int64_t>(result)));
}
}

return flatVector;
}

RowVectorPtr createRowVector(
memory::MemoryPool* pool,
const RowTypePtr& rowType,
@@ -1154,6 +1184,11 @@ VectorPtr importFromArrowImpl(
return createMapVector(
pool, type, nulls, arrowSchema, arrowArray, isViewer, wrapInBufferView);
}
if (type->isShortDecimal()) {
return createDecimalVector(
pool, type, nulls, arrowSchema, arrowArray, wrapInBufferView);
}

// Other primitive types.
VELOX_CHECK(
type->isPrimitiveType(),
@@ -1166,7 +1201,7 @@ VectorPtr importFromArrowImpl(
auto values = wrapInBufferView(
arrowArray.buffers[1], arrowArray.length * type->cppSizeInBytes());

return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH_ALL(
createFlatVector,
type->kind(),
pool,
13 changes: 7 additions & 6 deletions velox/vector/tests/EnsureWritableVectorTest.cpp
Original file line number Diff line number Diff line change
@@ -421,17 +421,18 @@ TEST_F(EnsureWritableVectorTest, constant) {
}

// If constant has smaller size, check that we follow the selectivity vector
// max seleced row size.
// size.
{
const vector_size_t selectivityVectorSize = 100;
auto constant = BaseVector::createConstant(
BIGINT(), variant::create<TypeKind::BIGINT>(123), 1, pool_.get());
SelectivityVector rows(selectivityVectorSize);
rows.setValid(99, false);
rows.updateBounds();
BaseVector::ensureWritable(rows, BIGINT(), pool_.get(), constant);
BaseVector::ensureWritable(
SelectivityVector::empty(selectivityVectorSize),
BIGINT(),
pool_.get(),
constant);
EXPECT_EQ(VectorEncoding::Simple::FLAT, constant->encoding());
EXPECT_EQ(99, constant->size());
EXPECT_EQ(selectivityVectorSize, constant->size());
}

// If constant has larger size, check that we follow the constant vector