Skip to content

Commit

Permalink
deprecate unused parameter of AddRowGroup()
Browse files Browse the repository at this point in the history
  • Loading branch information
kszucs committed Dec 17, 2024
1 parent 857092c commit 81c50e0
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 11 deletions.
3 changes: 1 addition & 2 deletions cpp/src/arrow/dataset/file_parquet_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,15 +85,14 @@ class ParquetFormatHelper {
static Status WriteRecordBatch(const RecordBatch& batch,
parquet::arrow::FileWriter* writer) {
auto schema = batch.schema();
auto size = batch.num_rows();

if (!schema->Equals(*writer->schema(), false)) {
return Status::Invalid("RecordBatch schema does not match this writer's. batch:'",
schema->ToString(), "' this:'", writer->schema()->ToString(),
"'");
}

RETURN_NOT_OK(writer->NewRowGroup(size));
RETURN_NOT_OK(writer->NewRowGroup());
for (int i = 0; i < batch.num_columns(); i++) {
RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i)));
}
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/parquet/arrow/arrow_reader_writer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test {
ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(),
MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
// writer->Close() should be idempotent
Expand Down Expand Up @@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) {
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
for (int i = 0; i < 4; i++) {
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
Expand Down Expand Up @@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) {
this->MakeWriter(schema), arrow_schema,
default_arrow_writer_properties(), &writer));
for (int i = 0; i < 4; i++) {
ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
std::shared_ptr<Array> sliced_array = values->Slice(i * chunk_size, chunk_size);
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array));
}
Expand Down Expand Up @@ -5128,7 +5128,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO<TestType> {
::arrow::default_memory_pool(),
ParquetFileWriter::Open(this->sink_, schema_node, writer_properties),
arrow_schema, default_arrow_writer_properties(), &writer));
ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length()));
ASSERT_OK_NO_THROW(writer->NewRowGroup());
ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values));
ASSERT_OK_NO_THROW(writer->Close());
}
Expand Down Expand Up @@ -5460,7 +5460,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) {
// Operations on closed writer are invalid
ASSERT_OK(writer->Close());

ASSERT_RAISES(Invalid, writer->NewRowGroup(1));
ASSERT_RAISES(Invalid, writer->NewRowGroup());
ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1));
ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup());
ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch());
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/parquet/arrow/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter {
default_arrow_reader_properties(), &schema_manifest_);
}

Status NewRowGroup(int64_t chunk_size) override {
Status NewRowGroup() override {
RETURN_NOT_OK(CheckClosed());
if (row_group_writer_ != nullptr) {
PARQUET_CATCH_NOT_OK(row_group_writer_->Close());
Expand Down Expand Up @@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter {
}

auto WriteRowGroup = [&](int64_t offset, int64_t size) {
RETURN_NOT_OK(NewRowGroup(size));
RETURN_NOT_OK(NewRowGroup());
for (int i = 0; i < table.num_columns(); i++) {
RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size));
}
Expand Down
9 changes: 7 additions & 2 deletions cpp/src/parquet/arrow/writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter {
/// \brief Start a new row group.
///
/// Returns an error if not all columns have been written.
virtual ::arrow::Status NewRowGroup() = 0;

/// \brief Return a RecordBatchReader of all row groups and columns.
///
/// \param chunk_size the number of rows in the next row group.
virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0;
/// \deprecated Deprecated in 19.0.0.
ARROW_DEPRECATED(
"Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` argument.")
virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return NewRowGroup(); }

/// \brief Write ColumnChunk in row group using an array.
virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;
Expand Down

0 comments on commit 81c50e0

Please sign in to comment.