diff --git a/velox/connectors/hive/storage_adapters/hdfs/tests/CMakeLists.txt b/velox/connectors/hive/storage_adapters/hdfs/tests/CMakeLists.txt index a3d88934805e..c0b32d0fe54c 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/tests/CMakeLists.txt +++ b/velox/connectors/hive/storage_adapters/hdfs/tests/CMakeLists.txt @@ -32,27 +32,27 @@ target_link_libraries( target_compile_options(velox_hdfs_file_test PRIVATE -Wno-deprecated-declarations) -# add_executable(velox_hdfs_insert_test InsertIntoHdfsTest.cpp -# HdfsMiniCluster.cpp HdfsUtilTest.cpp) - -# add_test(velox_hdfs_insert_test velox_hdfs_insert_test) - -# target_link_libraries( -# velox_hdfs_insert_test -# velox_exec_test_lib -# velox_exec -# GTest::gtest -# GTest::gtest_main -# GTest::gmock) - -# target_compile_options(velox_hdfs_insert_test -# PRIVATE -Wno-deprecated-declarations) - -# # velox_hdfs_insert_test and velox_hdfs_file_test two tests can't run in -# # parallel due to the port conflict in Hadoop NameNode and DataNode. The -# # namenode port conflict can be resolved using the -nnport configuration in -# # hadoop-mapreduce-client-jobclient-3.3.0-tests.jar. However the data node port -# # cannot be configured. Therefore, we need to make sure that -# # velox_hdfs_file_test runs only after velox_hdfs_insert_test has finished. -# set_tests_properties(velox_hdfs_insert_test PROPERTIES DEPENDS -# velox_hdfs_file_test) +add_executable(velox_hdfs_insert_test InsertIntoHdfsTest.cpp + HdfsMiniCluster.cpp HdfsUtilTest.cpp) + +add_test(velox_hdfs_insert_test velox_hdfs_insert_test) + +target_link_libraries( + velox_hdfs_insert_test + velox_exec_test_lib + velox_exec + GTest::gtest + GTest::gtest_main + GTest::gmock) + +target_compile_options(velox_hdfs_insert_test + PRIVATE -Wno-deprecated-declarations) + +# velox_hdfs_insert_test and velox_hdfs_file_test two tests can't run in +# parallel due to the port conflict in Hadoop NameNode and DataNode. The +# namenode port conflict can be resolved using the -nnport configuration in +# hadoop-mapreduce-client-jobclient-3.3.0-tests.jar. However the data node port +# cannot be configured. Therefore, we need to make sure that +# velox_hdfs_file_test runs only after velox_hdfs_insert_test has finished. +set_tests_properties(velox_hdfs_insert_test PROPERTIES DEPENDS + velox_hdfs_file_test) diff --git a/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp b/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp index c262bad3d1d1..4274e994e9c0 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp +++ b/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsMiniCluster.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include "HdfsMiniCluster.h" namespace facebook::velox::filesystems::test { @@ -77,6 +79,14 @@ HdfsMiniCluster::HdfsMiniCluster() { } void HdfsMiniCluster::addFile(std::string source, std::string destination) { + std::cout << "Starting to add file from " << source << " to " << destination << std::endl; + + std::ostringstream output; + std::ostringstream error; + + boost::process::ipstream outStream; + boost::process::ipstream errStream; + auto filePutProcess = std::make_shared( env_, exePath_, @@ -85,13 +95,36 @@ void HdfsMiniCluster::addFile(std::string source, std::string destination) { filesystemUrl, filePutOption, source, - destination); - bool isExited = - filePutProcess->wait_for(std::chrono::duration(5000)); - if (!isExited) { - VELOX_FAIL( - "Failed to add file to hdfs, exit code: {}", - filePutProcess->exit_code()); + destination, + boost::process::std_out > outStream, + boost::process::std_err > errStream); + + std::cout << "Process started with PID: " << filePutProcess->id() << std::endl; + + std::string line; + while (outStream && std::getline(outStream, line) && !line.empty()) + output << line << std::endl; + + while (errStream && std::getline(errStream, line) && !line.empty()) + error << line << std::endl; + + bool isExited = filePutProcess->wait_for(std::chrono::duration(5000)); + + if (isExited) { + int exitCode = filePutProcess->exit_code(); + std::cout << "Process exited with code: " << exitCode << std::endl; + std::cout << "Process output: " << output.str() << std::endl; + std::cout << "Process error: " << error.str() << std::endl; + + if (exitCode != 0) { + std::cerr << "Failed to add file to hdfs, exit code: " << exitCode << ", error: " << error.str() << std::endl; + throw std::runtime_error("Failed to add file to hdfs, exit code: " + std::to_string(exitCode)); + } + } else { + std::cerr << "Failed to add file to hdfs, process did not exit in time" << std::endl; + filePutProcess->terminate(); + std::cerr << "Process terminated" << std::endl; + throw std::runtime_error("Failed to add file to hdfs, process did not exit in time"); } }