Skip to content

Commit

Permalink
Fix FileHandleGenerator inside HiveConnector (#8038)
Browse files Browse the repository at this point in the history
Summary:
A recent refactor #7725 missed passing the connector config
to the FileHandleGenerator in the HiveConnector.

Pull Request resolved: #8038

Reviewed By: xiaoxmeng

Differential Revision: D52154732

Pulled By: mbasmanova

fbshipit-source-id: df6c0edcc5fb169877f9795f6a17eec2c52c1dd2
  • Loading branch information
majetideepak authored and facebook-github-bot committed Dec 14, 2023
1 parent 7822fe1 commit ea2d2e1
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 1 deletion.
2 changes: 1 addition & 1 deletion velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ HiveConnector::HiveConnector(
SimpleLRUCache<std::string, std::shared_ptr<FileHandle>>>(
hiveConfig_->numCacheFileHandles())
: nullptr,
std::make_unique<FileHandleGenerator>(nullptr)),
std::make_unique<FileHandleGenerator>(config)),
executor_(executor) {
if (hiveConfig_->isFileHandleCacheEnabled()) {
LOG(INFO) << "Hive connector " << connectorId()
Expand Down
17 changes: 17 additions & 0 deletions velox/connectors/hive/storage_adapters/s3fs/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,20 @@ target_link_libraries(
velox_exec
gtest
gtest_main)

add_executable(velox_s3read_test S3ReadTest.cpp)
add_test(
NAME velox_s3read_test
COMMAND velox_s3read_test
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(
velox_s3read_test
velox_file
velox_s3fs
velox_hive_config
velox_core
velox_exec_test_lib
velox_dwio_common_exception
velox_exec
gtest
gtest_main)
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#pragma once

#include "velox/core/Config.h"
#include "velox/exec/tests/utils/TempDirectoryPath.h"

Expand Down
98 changes: 98 additions & 0 deletions velox/connectors/hive/storage_adapters/s3fs/tests/S3ReadTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <folly/init/Init.h>
#include <gtest/gtest.h>

#include "velox/connectors/hive/storage_adapters/s3fs/RegisterS3FileSystem.h"
#include "velox/connectors/hive/storage_adapters/s3fs/tests/S3Test.h"
#include "velox/dwio/common/tests/utils/DataFiles.h"
#include "velox/exec/tests/utils/AssertQueryBuilder.h"
#include "velox/exec/tests/utils/HiveConnectorTestBase.h"
#include "velox/exec/tests/utils/PlanBuilder.h"

using namespace facebook::velox::exec::test;

namespace facebook::velox {
namespace {
class S3ReadTest : public S3Test, public test::VectorTestBase {
public:
static constexpr char const* kMinioConnectionString{"127.0.0.1:6000"};

/// We use static initialization because we want a single version of the
/// Minio server running.
/// Each test must use a unique bucket to avoid concurrency issues.
static void SetUpTestSuite() {
minioServer_ = std::make_shared<MinioServer>(kMinioConnectionString);
minioServer_->start();

filesystems::registerS3FileSystem();
auto hiveConnector =
connector::getConnectorFactory(
connector::hive::HiveConnectorFactory::kHiveConnectorName)
->newConnector(kHiveConnectorId, minioServer_->hiveConfig());
connector::registerConnector(hiveConnector);
}

static void TearDownTestSuite() {
filesystems::finalizeS3FileSystem();
connector::unregisterConnector(kHiveConnectorId);
minioServer_->stop();
minioServer_ = nullptr;
}
};
} // namespace

TEST_F(S3ReadTest, s3ReadTest) {
const auto sourceFile = test::getDataFilePath(
"velox/connectors/hive/storage_adapters/s3fs/tests",
"../../../../../dwio/parquet/tests/examples/int.parquet");
const char* bucketName = "data";
const auto destinationFile = S3Test::localPath(bucketName) + "/int.parquet";
minioServer_->addBucket(bucketName);
std::ifstream src(sourceFile, std::ios::binary);
std::ofstream dest(destinationFile, std::ios::binary);
// Copy source file to destination bucket.
dest << src.rdbuf();
ASSERT_GT(dest.tellp(), 0) << "Unable to copy from source " << sourceFile;
dest.close();

// Read the parquet file via the S3 bucket.
const auto readDirectory{s3URI(bucketName)};
auto rowType = ROW({"int", "bigint"}, {INTEGER(), BIGINT()});
auto plan = PlanBuilder().tableScan(rowType).planNode();
auto split = HiveConnectorSplitBuilder(
fmt::format("{}/{}", readDirectory, "int.parquet"))
.fileFormat(dwio::common::FileFormat::PARQUET)
.build();
auto copy = AssertQueryBuilder(plan).split(split).copyResults(pool());

// expectedResults is the data in int.parquet file.
const int64_t kExpectedRows = 10;
auto expectedResults = makeRowVector(
{makeFlatVector<int32_t>(
kExpectedRows, [](auto row) { return row + 100; }),
makeFlatVector<int64_t>(
kExpectedRows, [](auto row) { return row + 1000; })});
assertEqualResults({expectedResults}, {copy});
}
} // namespace facebook::velox

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
folly::init(&argc, &argv, false);
return RUN_ALL_TESTS();
}

0 comments on commit ea2d2e1

Please sign in to comment.