rstudio · juliasilge · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -54,6 +54,7 @@ Suggests:
     Microsoft365R,
     mime,
     mockery,
+    nanoparquet,
     openssl,
     paws.storage,
     qs,

diff --git a/NEWS.md b/NEWS.md
@@ -16,6 +16,8 @@
 
 * Fixed how previously deleted pin versions are detected (#838, @MichalLauer)
 
+* Switched writing with `type = "parquet"` to use the nanoparquet package (#843).
+
 # pins 1.3.0
 
 ## Breaking changes

diff --git a/R/pin-read-write.R b/R/pin-read-write.R
@@ -194,8 +194,8 @@ write_qs <- function(x, path) {
 }
 
 write_parquet <- function(x, path) {
-  check_installed("arrow")
-  arrow::write_parquet(x, path)
+  check_installed("nanoparquet")
+  nanoparquet::write_parquet(x, path)
   invisible(path)
 }
 
@@ -251,8 +251,8 @@ read_qs <- function(path) {
 }
 
 read_parquet <- function(path) {
-  check_installed("arrow")
-  arrow::read_parquet(path)
+  check_installed("nanoparquet")
+  nanoparquet::read_parquet(path)
 }
 
 read_arrow <- function(path) {

diff --git a/tests/testthat/test-pin-read-write.R b/tests/testthat/test-pin-read-write.R
@@ -1,6 +1,7 @@
 test_that("can round trip all types", {
   skip_if_not_installed("qs")
   skip_if_not_installed("arrow")
+  skip_if_not_installed("nanoparquet")
   board <- board_temp()
 
   # Data frames
@@ -9,7 +10,13 @@ test_that("can round trip all types", {
   expect_equal(pin_read(board, "df-1"), df)
 
   pin_write(board, df, "df-2", type = "parquet")
-  expect_equal(pin_read(board, "df-2"), df)
+  expect_equal(
+    withr::with_options(
+      list(nanoparquet.class = c("tbl_df", "tbl")), 
+      pin_read(board, "df-2")
+    ), 
+    df
+  )
 
   pin_write(board, df, "df-3", type = "arrow")
   expect_equal(pin_read(board, "df-3"), df)

diff --git a/vignettes/pins.Rmd b/vignettes/pins.Rmd
@@ -73,7 +73,7 @@ But you can choose another option depending on your goals:
 
 -   `type = "rds"` uses `writeRDS()` to create a binary R data file. It can save any R object (including trained models) but it's only readable from R, not other languages.
 -   `type = "csv"` uses `write.csv()` to create a CSV file. CSVs are plain text and can be read easily by many applications, but they only support simple columns (e.g. numbers, strings), can take up a lot of disk space, and can be slow to read.
--   `type = "parquet"` uses `arrow::write_parquet()` to create a Parquet file. [Parquet](https://parquet.apache.org/) is a modern, language-independent, column-oriented file format for efficient data storage and retrieval. Parquet is an excellent choice for storing tabular data but requires the [arrow](https://arrow.apache.org/docs/r/) package.
+-   `type = "parquet"` uses `nanoparquet::write_parquet()` to create a Parquet file. [Parquet](https://parquet.apache.org/) is a modern, language-independent, column-oriented file format for efficient data storage and retrieval. Parquet is an excellent choice for storing tabular data but requires the [nanoparquet](https://nanoparquet.r-lib.org/) package.
 -   `type = "arrow"` uses `arrow::write_feather()` to create an Arrow/Feather file.
 -   `type = "json"` uses `jsonlite::write_json()` to create a JSON file. Pretty much every programming language can read json files, but they only work well for nested lists.
 -   `type = "qs"` uses `qs::qsave()` to create a binary R data file, like `writeRDS()`. This format achieves faster read/write speeds than RDS, and compresses data more efficiently, making it a good choice for larger objects. Read more on the [qs package](https://github.com/traversc/qs).