Skip to content

Commit

Permalink
Throw kFileNotFound in all storage adapters (facebookincubator#8662)
Browse files Browse the repository at this point in the history
Summary: Pull Request resolved: facebookincubator#8662

Reviewed By: pedroerp

Differential Revision: D53416855

Pulled By: mbasmanova

fbshipit-source-id: 7f0b7e475d7725ae98ce0a6ca6eb03130bbbea57
  • Loading branch information
zhli1142015 authored and facebook-github-bot committed Feb 5, 2024
1 parent 985d1de commit 3c5d864
Show file tree
Hide file tree
Showing 12 changed files with 86 additions and 70 deletions.
28 changes: 18 additions & 10 deletions velox/common/base/tests/GTestUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,35 @@
<< "Expected error message to contain '" << (_errorMessage) \
<< "', but received '" << status.message() << "'."

#define VELOX_ASSERT_ERROR_CODE_IMPL(_type, _expression, _errorCode) \
#define VELOX_ASSERT_ERROR_CODE_IMPL( \
_type, _expression, _errorCode, _errorMessage) \
try { \
(_expression); \
FAIL() << "Expected an exception"; \
} catch (const _type& e) { \
ASSERT_TRUE(e.errorCode() == _errorCode) \
<< "Expected error code to be '" << _errorCode << "', but received '" \
<< e.errorCode() << "'."; \
ASSERT_TRUE(e.message().find(_errorMessage) != std::string::npos) \
<< "Expected error message to contain '" << (_errorMessage) \
<< "', but received '" << e.message() << "'."; \
}

#define VELOX_ASSERT_THROW_CODE(_expression, _errorCode) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxException, _expression, _errorCode)
#define VELOX_ASSERT_THROW_CODE(_expression, _errorCode, _errorMessage) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxException, _expression, _errorCode, _errorMessage)

#define VELOX_ASSERT_USER_THROW_CODE(_expression, _errorCode) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxUserError, _expression, _errorCode)
#define VELOX_ASSERT_USER_THROW_CODE(_expression, _errorCode, _errorMessage) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxUserError, _expression, _errorCode, _errorMessage)

#define VELOX_ASSERT_RUNTIME_THROW_CODE(_expression, _errorCode) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxRuntimeError, _expression, _errorCode)
#define VELOX_ASSERT_RUNTIME_THROW_CODE( \
_expression, _errorCode, _errorMessage) \
VELOX_ASSERT_ERROR_CODE_IMPL( \
facebook::velox::VeloxRuntimeError, \
_expression, \
_errorCode, \
_errorMessage)

#ifndef NDEBUG
#define DEBUG_ONLY_TEST(test_fixture, test_name) TEST(test_fixture, test_name)
Expand Down
4 changes: 3 additions & 1 deletion velox/common/file/tests/FileTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,5 +324,7 @@ TEST(LocalFile, fileNotFound) {
auto path = fmt::format("{}/file", tempFolder->path);
auto localFs = filesystems::getFileSystem(path, nullptr);
VELOX_ASSERT_RUNTIME_THROW_CODE(
localFs->openFileForRead(path), error_code::kFileNotFound);
localFs->openFileForRead(path),
error_code::kFileNotFound,
"No such file or directory");
}
6 changes: 5 additions & 1 deletion velox/connectors/hive/storage_adapters/abfs/AbfsUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,15 @@ inline const std::string throwStorageExceptionWithOperationDetails(
std::string operation,
std::string path,
Azure::Storage::StorageException& error) {
VELOX_FAIL(
const auto errMsg = fmt::format(
"Operation '{}' to path '{}' encountered azure storage exception, Details: '{}'.",
operation,
path,
error.what());
if (error.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound) {
VELOX_FILE_NOT_FOUND_ERROR(errMsg);
}
VELOX_FAIL(errMsg);
}

} // namespace facebook::velox::filesystems::abfs
Original file line number Diff line number Diff line change
Expand Up @@ -171,18 +171,14 @@ TEST_F(AbfsFileSystemTest, multipleThreadsWithReadFile) {
}

TEST_F(AbfsFileSystemTest, missingFile) {
try {
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{"fs.azure.account.key.test.dfs.core.windows.net",
azuriteServer->connectionStr()}});
const std::string abfsFile =
facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt";
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
auto readFile = abfs->openFileForRead(abfsFile);
FAIL() << "Expected VeloxException";
} catch (VeloxException const& err) {
EXPECT_TRUE(err.message().find("404") != std::string::npos);
}
auto hiveConfig = AbfsFileSystemTest::hiveConfig(
{{"fs.azure.account.key.test.dfs.core.windows.net",
azuriteServer->connectionStr()}});
const std::string abfsFile =
facebook::velox::filesystems::test::AzuriteABFSEndpoint + "test.txt";
auto abfs = std::make_shared<filesystems::abfs::AbfsFileSystem>(hiveConfig);
VELOX_ASSERT_RUNTIME_THROW_CODE(
abfs->openFileForRead(abfsFile), error_code::kFileNotFound, "404");
}

TEST_F(AbfsFileSystemTest, openFileForWriteNotImplemented) {
Expand Down
9 changes: 6 additions & 3 deletions velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,17 @@ inline void checkGCSStatus(
const std::string& bucket,
const std::string& key) {
if (!outcome.ok()) {
auto error = outcome.error_info();
VELOX_FAIL(
const auto errMsg = fmt::format(
"{} due to: Path:'{}', SDK Error Type:{}, GCS Status Code:{}, Message:'{}'",
errorMsgPrefix,
gcsURI(bucket, key),
error.domain(),
outcome.error_info().domain(),
getErrorStringFromGCSError(outcome.code()),
outcome.message());
if (outcome.code() == gc::StatusCode::kNotFound) {
VELOX_FILE_NOT_FOUND_ERROR(errMsg);
}
VELOX_FAIL(errMsg);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,30 +285,20 @@ TEST_F(GCSFileSystemTest, missingFile) {
const std::string gcsFile = gcsURI(preexistingBucketName(), file);
filesystems::GCSFileSystem gcfs(testGcsOptions());
gcfs.initializeClient();
try {
gcfs.openFileForRead(gcsFile);
FAIL() << "Expected VeloxException";
} catch (VeloxException const& err) {
EXPECT_THAT(
err.message(),
::testing::HasSubstr(
"\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\""));
}
VELOX_ASSERT_RUNTIME_THROW_CODE(
gcfs.openFileForRead(gcsFile),
error_code::kFileNotFound,
"\\\"message\\\": \\\"Live version of object test1-gcs/newTest.txt does not exist.\\\"");
}

TEST_F(GCSFileSystemTest, missingBucket) {
filesystems::GCSFileSystem gcfs(testGcsOptions());
gcfs.initializeClient();
try {
const char* gcsFile = "gs://dummy/foo.txt";
gcfs.openFileForRead(gcsFile);
FAIL() << "Expected VeloxException";
} catch (VeloxException const& err) {
EXPECT_THAT(
err.message(),
::testing::HasSubstr(
"\\\"message\\\": \\\"Bucket dummy does not exist.\\\""));
}
const char* gcsFile = "gs://dummy/foo.txt";
VELOX_ASSERT_RUNTIME_THROW_CODE(
gcfs.openFileForRead(gcsFile),
error_code::kFileNotFound,
"\\\"message\\\": \\\"Bucket dummy does not exist.\\\"");
}

TEST_F(GCSFileSystemTest, credentialsConfig) {
Expand Down
16 changes: 11 additions & 5 deletions velox/connectors/hive/storage_adapters/hdfs/HdfsReadFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ namespace facebook::velox {
HdfsReadFile::HdfsReadFile(hdfsFS hdfs, const std::string_view path)
: hdfsClient_(hdfs), filePath_(path) {
fileInfo_ = hdfsGetPathInfo(hdfsClient_, filePath_.data());
VELOX_CHECK_NOT_NULL(
fileInfo_,
"Unable to get file path info for file: {}. got error: {}",
filePath_,
hdfsGetLastError());
if (fileInfo_ == nullptr) {
auto error = hdfsGetLastError();
auto errMsg = fmt::format(
"Unable to get file path info for file: {}. got error: {}",
filePath_,
error);
if (std::strstr(error, "FileNotFoundException") != nullptr) {
VELOX_FILE_NOT_FOUND_ERROR(errMsg);
}
VELOX_FAIL(errMsg);
}
}

HdfsReadFile::~HdfsReadFile() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,20 +218,14 @@ TEST_F(HdfsFileSystemTest, oneFsInstanceForOneEndpoint) {
}

TEST_F(HdfsFileSystemTest, missingFileViaFileSystem) {
try {
auto memConfig =
std::make_shared<const core::MemConfig>(configurationValues);
auto hdfsFileSystem =
filesystems::getFileSystem(fullDestinationPath, memConfig);
auto readFile = hdfsFileSystem->openFileForRead(
"hdfs://localhost:7777/path/that/does/not/exist");
FAIL() << "expected VeloxException";
} catch (VeloxException const& error) {
EXPECT_THAT(
error.message(),
testing::HasSubstr(
"Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist."));
}
auto memConfig = std::make_shared<const core::MemConfig>(configurationValues);
auto hdfsFileSystem =
filesystems::getFileSystem(fullDestinationPath, memConfig);
VELOX_ASSERT_RUNTIME_THROW_CODE(
hdfsFileSystem->openFileForRead(
"hdfs://localhost:7777/path/that/does/not/exist"),
error_code::kFileNotFound,
"Unable to get file path info for file: /path/that/does/not/exist. got error: FileNotFoundException: Path /path/that/does/not/exist does not exist.");
}

TEST_F(HdfsFileSystemTest, missingHost) {
Expand Down
10 changes: 8 additions & 2 deletions velox/connectors/hive/storage_adapters/s3fs/S3Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#include "velox/common/base/Exceptions.h"

#include <fmt/format.h>

namespace facebook::velox {

namespace {
Expand Down Expand Up @@ -158,7 +160,7 @@ inline std::string getRequestID(
{ \
if (!outcome.IsSuccess()) { \
auto error = outcome.GetError(); \
VELOX_FAIL( \
auto errMsg = fmt::format( \
"{} due to: '{}'. Path:'{}', SDK Error Type:{}, HTTP Status Code:{}, S3 Service:'{}', Message:'{}', RequestID:'{}'", \
errorMsgPrefix, \
getErrorStringFromS3Error(error), \
Expand All @@ -167,7 +169,11 @@ inline std::string getRequestID(
error.GetResponseCode(), \
getS3BackendService(error.GetResponseHeaders()), \
error.GetMessage(), \
getRequestID(error.GetResponseHeaders())) \
getRequestID(error.GetResponseHeaders())); \
if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { \
VELOX_FILE_NOT_FOUND_ERROR(errMsg); \
} \
VELOX_FAIL(errMsg) \
} \
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,18 @@ TEST_F(S3FileSystemTest, missingFile) {
addBucket(bucketName);
auto hiveConfig = minioServer_->hiveConfig();
filesystems::S3FileSystem s3fs(hiveConfig);
VELOX_ASSERT_THROW(
VELOX_ASSERT_RUNTIME_THROW_CODE(
s3fs.openFileForRead(s3File),
error_code::kFileNotFound,
"Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://data1/i-do-not-exist.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'");
}

TEST_F(S3FileSystemTest, missingBucket) {
auto hiveConfig = minioServer_->hiveConfig();
filesystems::S3FileSystem s3fs(hiveConfig);
VELOX_ASSERT_THROW(
VELOX_ASSERT_RUNTIME_THROW_CODE(
s3fs.openFileForRead(kDummyPath),
error_code::kFileNotFound,
"Failed to get metadata for S3 object due to: 'Resource not found'. Path:'s3://dummy/foo.txt', SDK Error Type:16, HTTP Status Code:404, S3 Service:'MinIO', Message:'No response body.'");
}

Expand Down
3 changes: 3 additions & 0 deletions velox/docs/develop/connectors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ Storage Adapters
Hive Connector allows reading and writing files from a variety of distributed storage systems.
The supported storage API are S3, HDFS, GCS, Linux FS.

If file is not found when reading, `openFileForRead` API throws `VeloxRuntimeError` with `error_code::kFileNotFound`.
This behavior is necessary to support the `ignore_missing_files` configuration property.

S3 is supported using the `AWS SDK for C++ <https://github.com/aws/aws-sdk-cpp>`_ library.
S3 supported schemes are `s3://` (Amazon S3, Minio), `s3a://` (Hadoop 3.x), `s3n://` (Deprecated in Hadoop 3.x),
`oss://` (Alibaba cloud storage), and `cos://`, `cosn://` (Tencent cloud storage).
Expand Down
4 changes: 3 additions & 1 deletion velox/exec/tests/TableScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1400,7 +1400,9 @@ TEST_F(TableScanTest, fileNotFound) {
};
assertMissingFile(true);
VELOX_ASSERT_RUNTIME_THROW_CODE(
assertMissingFile(false), error_code::kFileNotFound);
assertMissingFile(false),
error_code::kFileNotFound,
"No such file or directory");
}

// A valid ORC file (containing headers) but no data.
Expand Down

0 comments on commit 3c5d864

Please sign in to comment.