diff --git a/velox/common/base/tests/GTestUtils.h b/velox/common/base/tests/GTestUtils.h index f11b61520b1c..4104b6955f94 100644 --- a/velox/common/base/tests/GTestUtils.h +++ b/velox/common/base/tests/GTestUtils.h @@ -63,7 +63,8 @@ << "Expected error message to contain '" << (_errorMessage) \ << "', but received '" << status.message() << "'." -#define VELOX_ASSERT_ERROR_CODE_IMPL(_type, _expression, _errorCode) \ +#define VELOX_ASSERT_ERROR_CODE_IMPL( \ + _type, _expression, _errorCode, _errorMessage) \ try { \ (_expression); \ FAIL() << "Expected an exception"; \ @@ -71,19 +72,26 @@ ASSERT_TRUE(e.errorCode() == _errorCode) \ << "Expected error code to be '" << _errorCode << "', but received '" \ << e.errorCode() << "'."; \ + ASSERT_TRUE(e.message().find(_errorMessage) != std::string::npos) \ + << "Expected error message to contain '" << (_errorMessage) \ + << "', but received '" << e.message() << "'."; \ } -#define VELOX_ASSERT_THROW_CODE(_expression, _errorCode) \ - VELOX_ASSERT_ERROR_CODE_IMPL( \ - facebook::velox::VeloxException, _expression, _errorCode) +#define VELOX_ASSERT_THROW_CODE(_expression, _errorCode, _errorMessage) \ + VELOX_ASSERT_ERROR_CODE_IMPL( \ + facebook::velox::VeloxException, _expression, _errorCode, _errorMessage) -#define VELOX_ASSERT_USER_THROW_CODE(_expression, _errorCode) \ - VELOX_ASSERT_ERROR_CODE_IMPL( \ - facebook::velox::VeloxUserError, _expression, _errorCode) +#define VELOX_ASSERT_USER_THROW_CODE(_expression, _errorCode, _errorMessage) \ + VELOX_ASSERT_ERROR_CODE_IMPL( \ + facebook::velox::VeloxUserError, _expression, _errorCode, _errorMessage) -#define VELOX_ASSERT_RUNTIME_THROW_CODE(_expression, _errorCode) \ - VELOX_ASSERT_ERROR_CODE_IMPL( \ - facebook::velox::VeloxRuntimeError, _expression, _errorCode) +#define VELOX_ASSERT_RUNTIME_THROW_CODE( \ + _expression, _errorCode, _errorMessage) \ + VELOX_ASSERT_ERROR_CODE_IMPL( \ + facebook::velox::VeloxRuntimeError, \ + _expression, \ + _errorCode, \ + _errorMessage) #ifndef NDEBUG #define DEBUG_ONLY_TEST(test_fixture, test_name) TEST(test_fixture, test_name) diff --git a/velox/common/file/tests/FileTest.cpp b/velox/common/file/tests/FileTest.cpp index 1b73653495de..af5654a72369 100644 --- a/velox/common/file/tests/FileTest.cpp +++ b/velox/common/file/tests/FileTest.cpp @@ -324,5 +324,7 @@ TEST(LocalFile, fileNotFound) { auto path = fmt::format("{}/file", tempFolder->path); auto localFs = filesystems::getFileSystem(path, nullptr); VELOX_ASSERT_RUNTIME_THROW_CODE( - localFs->openFileForRead(path), error_code::kFileNotFound); + localFs->openFileForRead(path), + error_code::kFileNotFound, + "No such file or directory"); } diff --git a/velox/connectors/hive/storage_adapters/abfs/AbfsUtil.h b/velox/connectors/hive/storage_adapters/abfs/AbfsUtil.h index 609623fe47a4..2af0f4239009 100644 --- a/velox/connectors/hive/storage_adapters/abfs/AbfsUtil.h +++ b/velox/connectors/hive/storage_adapters/abfs/AbfsUtil.h @@ -66,11 +66,15 @@ inline const std::string throwStorageExceptionWithOperationDetails( std::string operation, std::string path, Azure::Storage::StorageException& error) { - VELOX_FAIL( + const auto errMsg = fmt::format( "Operation '{}' to path '{}' encountered azure storage exception, Details: '{}'.", operation, path, error.what()); + if (error.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) { + VELOX_FILE_NOT_FOUND_ERROR(errMsg); + } + VELOX_FAIL(errMsg); } } // namespace facebook::velox::filesystems::abfs diff --git a/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp index 98354f9dd52c..76dc3e834261 100644 --- a/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/abfs/tests/AbfsFileSystemTest.cpp @@ -171,18 +171,14 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) { } TEST_F(AbfsFileSystemTest, missingFile) { - try { - auto hiveConfig = AbfsFileSystemTest::hiveConfig( - {{"fs.azure.account.key.test.dfs.core.windows.net", - azuriteServer->connectionStr()}}); - const std::string abfsFile = - facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt"; - auto abfs = std::make_shared(hiveConfig); - auto readFile = abfs->openFileForRead(abfsFile); - FAIL() << "Expected VeloxException"; - } catch (VeloxException const& err) { - EXPECT_TRUE(err.message().find("404") != std::string::npos); - } + auto hiveConfig = AbfsFileSystemTest::hiveConfig( + {{"fs.azure.account.key.test.dfs.core.windows.net", + azuriteServer->connectionStr()}}); + const std::string abfsFile = + facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt"; + auto abfs = std::make_shared(hiveConfig); + VELOX_ASSERT_RUNTIME_THROW_CODE( + abfs->openFileForRead(abfsFile), error_code::kFileNotFound, "404"); } TEST_F(AbfsFileSystemTest, openFileForWriteNotImplemented) { diff --git a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp index 6eaada548701..cf9371d62a2c 100644 --- a/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp @@ -48,14 +48,17 @@ inline void checkGCSStatus( const std::string& bucket, const std::string& key) { if (!outcome.ok()) { - auto error = outcome.error_info(); - VELOX_FAIL( + const auto errMsg = fmt::format( "{} due to: Path:'{}', SDK Error Type:{}, GCS Status Code:{}, Message:'{}'", errorMsgPrefix, gcsURI(bucket, key), - error.domain(), + outcome.error_info().domain(), getErrorStringFromGCSError(outcome.code()), outcome.message()); + if (outcome.code() == gc::StatusCode::kNotFound) { + VELOX_FILE_NOT_FOUND_ERROR(errMsg); + } + VELOX_FAIL(errMsg); } } diff --git a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp index a0cb3c7c5222..545af97ba4a2 100644 --- a/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/gcs/tests/GCSFileSystemTest.cpp @@ -285,30 +285,20 @@ TEST_F(GCSFileSystemTest, missingFile) { const std::string gcsFile = gcsURI(preexistingBucketName(), file); filesystems::GCSFileSystem gcfs(testGcsOptions()); gcfs.initializeClient(); - try { - gcfs.openFileForRead(gcsFile); - FAIL() << "Expected VeloxException"; - } catch (VeloxException const& err) { - EXPECT_THAT( - err.message(), - ::testing::HasSubstr( - "\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\"")); - } + VELOX_ASSERT_RUNTIME_THROW_CODE( + gcfs.openFileForRead(gcsFile), + error_code::kFileNotFound, + "\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\""); } TEST_F(GCSFileSystemTest, missingBucket) { filesystems::GCSFileSystem gcfs(testGcsOptions()); gcfs.initializeClient(); - try { - const char* gcsFile = "gs://dummy/foo.txt"; - gcfs.openFileForRead(gcsFile); - FAIL() << "Expected VeloxException"; - } catch (VeloxException const& err) { - EXPECT_THAT( - err.message(), - ::testing::HasSubstr( - "\\\"message\\\": \\\"Bucket dummy does not exist.\\\"")); - } + const char* gcsFile = "gs://dummy/foo.txt"; + VELOX_ASSERT_RUNTIME_THROW_CODE( + gcfs.openFileForRead(gcsFile), + error_code::kFileNotFound, + "\\\"message\\\": \\\"Bucket dummy does not exist.\\\""); } TEST_F(GCSFileSystemTest, credentialsConfig) { diff --git a/velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.cpp b/velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.cpp index 84bbd217d474..9d99420c9d7e 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.cpp +++ b/velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.cpp @@ -23,11 +23,17 @@ namespace facebook::velox { HdfsReadFile::HdfsReadFile(hdfsFS hdfs, const std::string_view path) : hdfsClient_(hdfs), filePath_(path) { fileInfo_ = hdfsGetPathInfo(hdfsClient_, filePath_.data()); - VELOX_CHECK_NOT_NULL( - fileInfo_, - "Unable to get file path info for file: {}. got error: {}", - filePath_, - hdfsGetLastError()); + if (fileInfo_ == nullptr) { + auto error = hdfsGetLastError(); + auto errMsg = fmt::format( + "Unable to get file path info for file: {}. got error: {}", + filePath_, + error); + if (std::strstr(error, "FileNotFoundException") != nullptr) { + VELOX_FILE_NOT_FOUND_ERROR(errMsg); + } + VELOX_FAIL(errMsg); + } } HdfsReadFile::~HdfsReadFile() { diff --git a/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp b/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp index f9d50f5985d6..ac8ed66f7c0f 100644 --- a/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/hdfs/tests/HdfsFileSystemTest.cpp @@ -218,20 +218,14 @@ TEST_F(HdfsFileSystemTest, oneFsInstanceForOneEndpoint) { } TEST_F(HdfsFileSystemTest, missingFileViaFileSystem) { - try { - auto memConfig = - std::make_shared(configurationValues); - auto hdfsFileSystem = - filesystems::getFileSystem(fullDestinationPath, memConfig); - auto readFile = hdfsFileSystem->openFileForRead( - "hdfs://localhost:7777/path/that/does/not/exist"); - FAIL() << "expected VeloxException"; - } catch (VeloxException const& error) { - EXPECT_THAT( - error.message(), - testing::HasSubstr( - "Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist.")); - } + auto memConfig = std::make_shared(configurationValues); + auto hdfsFileSystem = + filesystems::getFileSystem(fullDestinationPath, memConfig); + VELOX_ASSERT_RUNTIME_THROW_CODE( + hdfsFileSystem->openFileForRead( + "hdfs://localhost:7777/path/that/does/not/exist"), + error_code::kFileNotFound, + "Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist."); } TEST_F(HdfsFileSystemTest, missingHost) { diff --git a/velox/connectors/hive/storage_adapters/s3fs/S3Util.h b/velox/connectors/hive/storage_adapters/s3fs/S3Util.h index 09f42a47ffb5..ec67fb0c4175 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/S3Util.h +++ b/velox/connectors/hive/storage_adapters/s3fs/S3Util.h @@ -26,6 +26,8 @@ #include "velox/common/base/Exceptions.h" +#include + namespace facebook::velox { namespace { @@ -158,7 +160,7 @@ inline std::string getRequestID( { \ if (!outcome.IsSuccess()) { \ auto error = outcome.GetError(); \ - VELOX_FAIL( \ + auto errMsg = fmt::format( \ "{} due to: '{}'. Path:'{}', SDK Error Type:{}, HTTP Status Code:{}, S3 Service:'{}', Message:'{}', RequestID:'{}'", \ errorMsgPrefix, \ getErrorStringFromS3Error(error), \ @@ -167,7 +169,11 @@ inline std::string getRequestID( error.GetResponseCode(), \ getS3BackendService(error.GetResponseHeaders()), \ error.GetMessage(), \ - getRequestID(error.GetResponseHeaders())) \ + getRequestID(error.GetResponseHeaders())); \ + if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { \ + VELOX_FILE_NOT_FOUND_ERROR(errMsg); \ + } \ + VELOX_FAIL(errMsg) \ } \ } diff --git a/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp b/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp index 2d0b56fedf6f..1f383e37b381 100644 --- a/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp +++ b/velox/connectors/hive/storage_adapters/s3fs/tests/S3FileSystemTest.cpp @@ -113,16 +113,18 @@ TEST_F(S3FileSystemTest, missingFile) { addBucket(bucketName); auto hiveConfig = minioServer_->hiveConfig(); filesystems::S3FileSystem s3fs(hiveConfig); - VELOX_ASSERT_THROW( + VELOX_ASSERT_RUNTIME_THROW_CODE( s3fs.openFileForRead(s3File), + error_code::kFileNotFound, "Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://data1/i-do-not-exist.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'"); } TEST_F(S3FileSystemTest, missingBucket) { auto hiveConfig = minioServer_->hiveConfig(); filesystems::S3FileSystem s3fs(hiveConfig); - VELOX_ASSERT_THROW( + VELOX_ASSERT_RUNTIME_THROW_CODE( s3fs.openFileForRead(kDummyPath), + error_code::kFileNotFound, "Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://dummy/foo.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'"); } diff --git a/velox/docs/develop/connectors.rst b/velox/docs/develop/connectors.rst index 16bedb50cf77..2d4602508741 100644 --- a/velox/docs/develop/connectors.rst +++ b/velox/docs/develop/connectors.rst @@ -82,6 +82,9 @@ Storage Adapters Hive Connector allows reading and writing files from a variety of distributed storage systems. The supported storage API are S3, HDFS, GCS, Linux FS. +If file is not found when reading, `openFileForRead` API throws `VeloxRuntimeError` with `error_code::kFileNotFound`. +This behavior is necessary to support the `ignore_missing_files` configuration property. + S3 is supported using the `AWS SDK for C++ `_ library. S3 supported schemes are `s3://` (Amazon S3, Minio), `s3a://` (Hadoop 3.x), `s3n://` (Deprecated in Hadoop 3.x), `oss://` (Alibaba cloud storage), and `cos://`, `cosn://` (Tencent cloud storage). diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp index a0919adb5d5b..76485ebb37fa 100644 --- a/velox/exec/tests/TableScanTest.cpp +++ b/velox/exec/tests/TableScanTest.cpp @@ -1400,7 +1400,9 @@ TEST_F(TableScanTest, fileNotFound) { }; assertMissingFile(true); VELOX_ASSERT_RUNTIME_THROW_CODE( - assertMissingFile(false), error_code::kFileNotFound); + assertMissingFile(false), + error_code::kFileNotFound, + "No such file or directory"); } // A valid ORC file (containing headers) but no data.