Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add WriteBatch method to ParquetFileWriter
Browse files Browse the repository at this point in the history
amoeba committed Jun 21, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 0fbcb32 commit ab4887b
Showing 2 changed files with 33 additions and 0 deletions.
5 changes: 5 additions & 0 deletions r/R/parquet.R
Original file line number Diff line number Diff line change
@@ -431,6 +431,11 @@ ParquetFileWriter <- R6Class("ParquetFileWriter",
assert_is(table, "Table")
parquet___arrow___FileWriter__WriteTable(self, table, chunk_size)
},
WriteBatch = function(batch, chunk_size) {
assert_is(batch, "RecordBatch")
table <- Table$create(batch)
parquet___arrow___FileWriter__WriteTable(self, table, chunk_size)
},
Close = function() parquet___arrow___FileWriter__Close(self)
)
)
28 changes: 28 additions & 0 deletions r/tests/testthat/test-parquet.R
Original file line number Diff line number Diff line change
@@ -530,3 +530,31 @@ test_that("thrift string and container size can be specified when reading Parque
data <- reader_container$ReadTable()
expect_identical(collect.ArrowTabular(data), example_data)
})

test_that("We can use WriteBatch on ParquetFileWriter", {
tf <- tempfile()
on.exit(unlink(tf))
sink <- FileOutputStream$create(tf)
sch <- schema(a = int32())
props <- ParquetWriterProperties$create(column_names = names(sch))
writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props)

batch <- RecordBatch$create(data.frame(a = 1:10))
writer$WriteBatch(batch, chunk_size = 10)
writer$WriteBatch(batch, chunk_size = 10)
writer$WriteBatch(batch, chunk_size = 10)
writer$Close()

tbl <- read_parquet(tf)
expect_equal(nrow(tbl), 30)
})

test_that("WriteBatch on ParquetFileWriter errors when called on closed sink", {
sink <- FileOutputStream$create(tempfile())
sch <- schema(a = int32())
props <- ParquetWriterProperties$create(column_names = names(sch))
writer <- ParquetFileWriter$create(schema = sch, sink = sink, properties = props)
writer$Close()
batch <- RecordBatch$create(data.frame(a = 1:10))
expect_error(writer$WriteBatch(batch, chunk_size = 10), "Operation on closed file")
})

0 comments on commit ab4887b

Please sign in to comment.