diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 2c05dcd9be459..536fcdb21c107 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -85,7 +85,6 @@ class ParquetFormatHelper { static Status WriteRecordBatch(const RecordBatch& batch, parquet::arrow::FileWriter* writer) { auto schema = batch.schema(); - auto size = batch.num_rows(); if (!schema->Equals(*writer->schema(), false)) { return Status::Invalid("RecordBatch schema does not match this writer's. batch:'", @@ -93,7 +92,7 @@ class ParquetFormatHelper { "'"); } - RETURN_NOT_OK(writer->NewRowGroup(size)); + RETURN_NOT_OK(writer->NewRowGroup()); for (int i = 0; i < batch.num_columns(); i++) { RETURN_NOT_OK(writer->WriteColumnChunk(*batch.column(i))); } diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index f8e639176aba3..9ea27b983131f 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -739,7 +739,7 @@ class ParquetIOTestBase : public ::testing::Test { ASSERT_OK_NO_THROW(FileWriter::Make(::arrow::default_memory_pool(), MakeWriter(schema), arrow_schema, default_arrow_writer_properties(), &writer)); - ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length())); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values)); ASSERT_OK_NO_THROW(writer->Close()); // writer->Close() should be idempotent @@ -1053,7 +1053,7 @@ TYPED_TEST(TestParquetIO, SingleColumnRequiredChunkedWrite) { this->MakeWriter(schema), arrow_schema, default_arrow_writer_properties(), &writer)); for (int i = 0; i < 4; i++) { - ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size)); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); std::shared_ptr sliced_array = values->Slice(i * chunk_size, chunk_size); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array)); } @@ -1126,7 +1126,7 @@ TYPED_TEST(TestParquetIO, SingleColumnOptionalChunkedWrite) { this->MakeWriter(schema), arrow_schema, default_arrow_writer_properties(), &writer)); for (int i = 0; i < 4; i++) { - ASSERT_OK_NO_THROW(writer->NewRowGroup(chunk_size)); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); std::shared_ptr sliced_array = values->Slice(i * chunk_size, chunk_size); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*sliced_array)); } @@ -5128,7 +5128,7 @@ class TestIntegerAnnotateDecimalTypeParquetIO : public TestParquetIO { ::arrow::default_memory_pool(), ParquetFileWriter::Open(this->sink_, schema_node, writer_properties), arrow_schema, default_arrow_writer_properties(), &writer)); - ASSERT_OK_NO_THROW(writer->NewRowGroup(values->length())); + ASSERT_OK_NO_THROW(writer->NewRowGroup()); ASSERT_OK_NO_THROW(writer->WriteColumnChunk(*values)); ASSERT_OK_NO_THROW(writer->Close()); } @@ -5460,7 +5460,7 @@ TEST(TestArrowReadWrite, OperationsOnClosedWriter) { // Operations on closed writer are invalid ASSERT_OK(writer->Close()); - ASSERT_RAISES(Invalid, writer->NewRowGroup(1)); + ASSERT_RAISES(Invalid, writer->NewRowGroup()); ASSERT_RAISES(Invalid, writer->WriteColumnChunk(table->column(0), 0, 1)); ASSERT_RAISES(Invalid, writer->NewBufferedRowGroup()); ASSERT_OK_AND_ASSIGN(auto record_batch, table->CombineChunksToBatch()); diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 463713df1b1aa..c6d86648c1d63 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -305,7 +305,7 @@ class FileWriterImpl : public FileWriter { default_arrow_reader_properties(), &schema_manifest_); } - Status NewRowGroup(int64_t chunk_size) override { + Status NewRowGroup() override { RETURN_NOT_OK(CheckClosed()); if (row_group_writer_ != nullptr) { PARQUET_CATCH_NOT_OK(row_group_writer_->Close()); @@ -379,7 +379,7 @@ class FileWriterImpl : public FileWriter { } auto WriteRowGroup = [&](int64_t offset, int64_t size) { - RETURN_NOT_OK(NewRowGroup(size)); + RETURN_NOT_OK(NewRowGroup()); for (int i = 0; i < table.num_columns(); i++) { RETURN_NOT_OK(WriteColumnChunk(table.column(i), offset, size)); } diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h index 4e1ddafd9a082..5bd33173bba64 100644 --- a/cpp/src/parquet/arrow/writer.h +++ b/cpp/src/parquet/arrow/writer.h @@ -87,9 +87,14 @@ class PARQUET_EXPORT FileWriter { /// \brief Start a new row group. /// /// Returns an error if not all columns have been written. + virtual ::arrow::Status NewRowGroup() = 0; + + /// \brief Return a RecordBatchReader of all row groups and columns. /// - /// \param chunk_size the number of rows in the next row group. - virtual ::arrow::Status NewRowGroup(int64_t chunk_size) = 0; + /// \deprecated Deprecated in 19.0.0. Use arrow::Result version instead. + ARROW_DEPRECATED( + "Deprecated in 19.0.0. Use NewRowGroup() without the `chunk_size` argument.") + virtual ::arrow::Status NewRowGroup(int64_t chunk_size) { return NewRowGroup(); } /// \brief Write ColumnChunk in row group using an array. virtual ::arrow::Status WriteColumnChunk(const ::arrow::Array& data) = 0;