Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-38333: [C++][FS][Azure] Implement file writes #38780

Merged
Merged
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
89899b4
Paste in object append stream from #12914
Tom-Newton Nov 8, 2023
12c1978
Delete hierarchical namespace code paths
Tom-Newton Nov 9, 2023
bd787cc
Paste in tests from gcsfs_test.cc
Tom-Newton Nov 15, 2023
cb9879b
Mostly correct tests
Tom-Newton Nov 15, 2023
bbaf138
Paste in OpenOutputStream and OpenAppendStream from #12914
Tom-Newton Nov 15, 2023
2f27fbb
Tests pass
Tom-Newton Nov 16, 2023
bfeb57d
Better error handling and tidy
Tom-Newton Nov 16, 2023
d4c53b2
Implement append and output
Tom-Newton Nov 17, 2023
a1a8c15
Fix rebase
Tom-Newton Nov 18, 2023
0b0a2ee
Add tests to distinguish OpenAppendStream and OpenOutputStream
Tom-Newton Nov 18, 2023
c69f188
More precise error handling on calls to Azure blob storage
Tom-Newton Nov 18, 2023
5a91113
Avoid unnecessary extra block list fetching and committing
Tom-Newton Nov 18, 2023
1c4300b
Adjust block_ids and add some comments
Tom-Newton Nov 19, 2023
a62b95e
Add test for writing blob metadata
Tom-Newton Nov 19, 2023
28ec1ed
Implement metadata writes
Tom-Newton Nov 19, 2023
2800dea
Add simple sanity checks on the location
Tom-Newton Nov 19, 2023
9a98a0f
Tidy
Tom-Newton Nov 19, 2023
d86c374
PR comments 1
Tom-Newton Nov 19, 2023
77f8345
PR comments2: move `DoAppend` to private
Tom-Newton Nov 19, 2023
0178660
Autoformat
Tom-Newton Nov 19, 2023
7b19feb
Handle TODO(GH-38780) comments for using fs to write data in tests
Tom-Newton Nov 19, 2023
74a6d55
PR comments: tests tidy up
Tom-Newton Nov 20, 2023
9a0d9ca
Add a comment about the risk of overlapping block ids
Tom-Newton Nov 20, 2023
0e84a82
Lint
Tom-Newton Nov 20, 2023
ad0ed8f
Lint
Tom-Newton Nov 20, 2023
7412b90
Simplify
kou Nov 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add test for writing blob metadata
Tom-Newton committed Nov 19, 2023
commit a62b95e3d6d87dde79235fa1aef825ea04eac2ff
81 changes: 37 additions & 44 deletions cpp/src/arrow/filesystem/azurefs_test.cc
Original file line number Diff line number Diff line change
@@ -647,49 +647,43 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) {
ASSERT_RAISES(Invalid, stream->Tell());
}

// TEST_F(AzuriteFileSystemTest, TestWriteWithDefaults) {
// auto options = TestGcsOptions();
// options.default_bucket_location = "utopia";
// options.default_metadata = arrow::key_value_metadata({{"foo", "bar"}});
// auto fs = GcsFileSystem::Make(options);
// std::string bucket = "new_bucket_with_default_location";
// auto file_name = "object_with_defaults";
// ASSERT_OK(fs->CreateDir(bucket, /*recursive=*/false));
// const auto path = bucket + "/" + file_name;
// std::shared_ptr<io::OutputStream> output;
// ASSERT_OK_AND_ASSIGN(output, fs->OpenOutputStream(path, /*metadata=*/{}));
// const auto expected = std::string(kLoremIpsum);
// ASSERT_OK(output->Write(expected.data(), expected.size()));
// ASSERT_OK(output->Close());

// // Verify we can read the object back.
// std::shared_ptr<io::InputStream> input;
// ASSERT_OK_AND_ASSIGN(input, fs->OpenInputStream(path));

// std::array<char, 1024> inbuf{};
// std::int64_t size;
// ASSERT_OK_AND_ASSIGN(size, input->Read(inbuf.size(), inbuf.data()));

// EXPECT_EQ(std::string(inbuf.data(), size), expected);
// auto object = GcsClient().GetObjectMetadata(bucket, file_name);
// ASSERT_TRUE(object.ok()) << "status=" << object.status();
// EXPECT_EQ(object->mutable_metadata()["foo"], "bar");
// auto bucket_info = GcsClient().GetBucketMetadata(bucket);
// ASSERT_TRUE(bucket_info.ok()) << "status=" << object.status();
// EXPECT_EQ(bucket_info->location(), "utopia");

// // Check that explicit metadata overrides the defaults.
// ASSERT_OK_AND_ASSIGN(
// output, fs->OpenOutputStream(
// path, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}})));
// ASSERT_OK(output->Write(expected.data(), expected.size()));
// ASSERT_OK(output->Close());
// object = GcsClient().GetObjectMetadata(bucket, file_name);
// ASSERT_TRUE(object.ok()) << "status=" << object.status();
// EXPECT_EQ(object->mutable_metadata()["bar"], "foo");
// // Defaults are overwritten and not merged.
// EXPECT_FALSE(object->has_metadata("foo"));
// }
TEST_F(AzuriteFileSystemTest, TestWriteMetadata) {
options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}});

std::shared_ptr<AzureFileSystem> fs_with_defaults;
ASSERT_OK_AND_ASSIGN(fs_with_defaults, AzureFileSystem::Make(options_));
Tom-Newton marked this conversation as resolved.
Show resolved Hide resolved
std::string path = "object_with_defaults";
auto location = PreexistingContainerPath() + path;
std::shared_ptr<io::OutputStream> output;
ASSERT_OK_AND_ASSIGN(output,
Tom-Newton marked this conversation as resolved.
Show resolved Hide resolved
fs_with_defaults->OpenOutputStream(location, /*metadata=*/{}));
const auto expected = std::string(kLoremIpsum);
ASSERT_OK(output->Write(expected.data(), expected.size()));
Tom-Newton marked this conversation as resolved.
Show resolved Hide resolved
ASSERT_OK(output->Close());

// Verify the metadata has been set.
auto blob_metadata =
blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
.GetBlockBlobClient(path)
.GetProperties()
.Value.Metadata;
EXPECT_EQ(blob_metadata["foo"], "bar");
Tom-Newton marked this conversation as resolved.
Show resolved Hide resolved

// Check that explicit metadata overrides the defaults.
ASSERT_OK_AND_ASSIGN(
output, fs_with_defaults->OpenOutputStream(
location, /*metadata=*/arrow::key_value_metadata({{"bar", "foo"}})));
ASSERT_OK(output->Write(expected.data(), expected.size()));
Tom-Newton marked this conversation as resolved.
Show resolved Hide resolved
ASSERT_OK(output->Close());
blob_metadata =
blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
.GetBlockBlobClient(path)
.GetProperties()
.Value.Metadata;
EXPECT_EQ(blob_metadata["bar"], "foo");
// Defaults are overwritten and not merged.
EXPECT_EQ(blob_metadata.find("foo"), blob_metadata.end());
Tom-Newton marked this conversation as resolved.
Show resolved Hide resolved
}

TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) {
const auto path = PreexistingContainerPath() + "test-write-object";
@@ -714,7 +708,6 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) {
const auto path = PreexistingContainerPath() + "test-write-object";
std::shared_ptr<io::OutputStream> output;
ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {}));
// These buffer sizes are intentionally not multiples of the upload quantum (256 KiB).
std::array<std::int64_t, 3> sizes{257 * 1024, 258 * 1024, 259 * 1024};
std::array<std::string, 3> buffers{
std::string(sizes[0], 'A'),