diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 760e3b6da448d..89526bccdf5d5 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -102,12 +102,16 @@ jobs: run: | cd arrow/r/libarrow/dist zip -r $PKG_FILE lib/ include/ - + - name: Create Checksum + shell: bash + run: | + cd arrow/r/libarrow/dist + shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' }}-openssl-{{ '${{ matrix.openssl }}' }} - path: arrow/r/libarrow/dist/arrow-*.zip + path: arrow/r/libarrow/dist/arrow-*.zip* linux-cpp: name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }} @@ -149,15 +153,21 @@ jobs: PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }} run: | - cd arrow/r/libarrow/dist - # These files were created by the docker user so we have to sudo to get them - sudo -E zip -r $PKG_FILE lib/ include/ + # These files were created by the docker user so we have to chown them + sudo chown -R $USER:$USER arrow/r/libarrow + cd arrow/r/libarrow/dist + zip -r $PKG_FILE lib/ include/ + - name: Create Checksum + shell: bash + run: | + cd arrow/r/libarrow/dist + shasum -a 512 arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: name: r-lib__libarrow__bin__linux-openssl-{{ '${{ matrix.openssl }}' }} - path: arrow/r/libarrow/dist/arrow-*.zip + path: arrow/r/libarrow/dist/arrow-*.zip* windows-cpp: name: C++ Binary Windows RTools (40 only) @@ -181,11 +191,16 @@ jobs: ARROW_HOME: "arrow" {{ macros.github_set_sccache_envvars()|indent(8) }} run: arrow/ci/scripts/r_windows_build.sh + - name: Create Checksum + shell: bash + run: | + cd build + sha512sum arrow-*.zip > arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip.sha512 - name: Upload binary artifact uses: actions/upload-artifact@v3 with: name: r-lib__libarrow__bin__windows - path: build/arrow-*.zip + path: build/arrow-*.zip* r-packages: needs: [source, windows-cpp, macos-cpp] @@ -222,7 +237,6 @@ jobs: rig system add-pak {{ macros.github_setup_local_r_repo(false, true, true)|indent }} - name: Prepare Dependency Installation - shell: bash run: | tar -xzf repo/src/contrib/arrow_*.tar.gz arrow/DESCRIPTION @@ -244,6 +258,8 @@ jobs: NOT_CRAN: "false" # actions/setup-r sets this implicitly ARROW_R_DEV: "true" LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated + ARROW_R_ENFORCE_CHECKSUM: "true" + ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" run: | on_windows <- tolower(Sys.info()[["sysname"]]) == "windows" @@ -335,6 +351,8 @@ jobs: ARROW_R_DEV: "TRUE" LIBARROW_BUILD: "FALSE" LIBARROW_BINARY: {{ '${{ matrix.config.libarrow_binary }}' }} + ARROW_R_ENFORCE_CHECKSUM: "true" + ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" shell: bash run: | Rscript -e ' diff --git a/r/.gitignore b/r/.gitignore index e607d2662f24f..d680fb42ddd6f 100644 --- a/r/.gitignore +++ b/r/.gitignore @@ -20,6 +20,10 @@ arrow_*.tgz extra-tests/files .deps +# Checksums for the precompiled binaries will be added just before CRAN submission +# use `tools/update-checksums.R` to download them. +/tools/checksums/ + # C++ sources for an offline build. They're copied from the ../cpp directory, so ignore them here. /tools/cpp/ # cmake expects dotenv, NOTICE.txt, and LICENSE.txt to be available one level up diff --git a/r/Makefile b/r/Makefile index 3679840ca9f7e..514f5adf4e144 100644 --- a/r/Makefile +++ b/r/Makefile @@ -67,5 +67,5 @@ clean: -rm src/Makevars.win -rm -rf arrow.Rcheck/ -rm -rf libarrow/ - -rm -rf tools/cpp/ tools/.env tools/NOTICE.txt tools/LICENSE.txt + -rm -rf tools/cpp/ tools/.env tools/NOTICE.txt tools/LICENSE.txt tools/checksums -find . -name "*.orig" -delete diff --git a/r/PACKAGING.md b/r/PACKAGING.md index 64423b4d8eb9b..7f42ecf562e59 100644 --- a/r/PACKAGING.md +++ b/r/PACKAGING.md @@ -68,6 +68,7 @@ Wait for the release candidate to be cut: - [ ] Create a PR entitled `WIP: [R] Verify CRAN release-10.0.1-rc0`. Add a comment `@github-actions crossbow submit --group r` to run all R crossbow jobs against the CRAN-specific release branch. +- [ ] Run `Rscript tools/update-checksums.R ` to download the checksums for the pre-compiled binaries from the ASF artifactory into the tools directory. - [ ] Regenerate arrow_X.X.X.tar.gz (i.e., `make build`) Ensure linux binary packages are available: diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 60deca05cd172..1346e209d8d14 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -28,11 +28,12 @@ if (test_mode && is.na(VERSION)) { } dev_version <- package_version(VERSION)[1, 4] +is_release <- is.na(dev_version) || dev_version < "100" on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin" - +checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") # Small dev versions are added for R-only changes during CRAN submission. -if (is.na(dev_version) || dev_version < "100") { +if (is_release) { VERSION <- package_version(VERSION)[1, 1:3] arrow_repo <- paste0(getOption("arrow.repo", sprintf("https://apache.jfrog.io/artifactory/arrow/r/%s", VERSION)), "/libarrow/") } else { @@ -88,7 +89,7 @@ thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tool download_binary <- function(lib) { - libfile <- tempfile() + libfile <- paste0("arrow-", VERSION, ".zip") binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") if (try_download(binary_url, libfile)) { if (!quietly) { @@ -103,6 +104,42 @@ download_binary <- function(lib) { } libfile <- NULL } + # Explicitly setting the env var to "false" will skip checksum validation + # e.g. in case the included checksums are stale. + skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") + enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") + # validate binary checksum for CRAN release only + if (!skip_checksum && dir.exists(checksum_path) && is_release || + enforce_checksum) { + checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url) + checksum_file <- file.path(checksum_path, checksum_file) + checksum_cmd <- "shasum" + checksum_args <- c("--status", "-a", "512", "-c", checksum_file) + + # shasum is not available on all linux versions + status_shasum <- try( + suppressWarnings( + system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE) + ), + silent = TRUE + ) + + if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && status_shasum != 0) { + checksum_cmd <- "sha512sum" + checksum_args <- c("--status", "-c", checksum_file) + } + + checksum_ok <- system2(checksum_cmd, args = checksum_args) + + if (checksum_ok != 0) { + cat("*** Checksum validation failed for libarrow binary: ", libfile, "\n") + unlink(libfile) + libfile <- NULL + } else { + cat("*** Checksum validated successfully for libarrow binary: ", libfile, "\n") + } + } + libfile } diff --git a/r/tools/update-checksums.R b/r/tools/update-checksums.R new file mode 100644 index 0000000000000..2aa9df317166f --- /dev/null +++ b/r/tools/update-checksums.R @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Run this script AFTER the release was voted and the artifacts +# are moved into the final dir. This script will download the checksum +# files and save them to the tools/checksums directory mirroring the +# artifactory layout. *libs.R uses these files to validated the downloaded +# binaries when installing the package. +# +# Run this script from the r/ directory of the arrow repo with the version +# as the first argument$ Rscript tools/update-checksum.R 14.0.0 + +args <- commandArgs(TRUE) +VERSION <- args[1] +tools_root <- "" + +if (length(args) != 1) { + stop("Usage: Rscript tools/update-checksums.R ") +} + +tasks_yml <- "../dev/tasks/tasks.yml" + +if (!file.exists(tasks_yml)) { + stop("Run this script from the r/ directory of the arrow repo") +} + +# Get the libarrow binary paths from the tasks.yml file +binary_paths <- readLines(tasks_yml) |> + grep("r-lib__libarrow", x = _, value = TRUE) |> + sub(".+r-lib__libarrow__bin__(.+\\.zip)", "\\1", x = _) |> + sub("{no_rc_r_version}", VERSION, fixed = TRUE, x = _) |> + sub("__", "/", x = _) |> + sub("\\.zip", ".zip", fixed = TRUE, x = _) + +artifactory_root <- "https://apache.jfrog.io/artifactory/arrow/r/%s/libarrow/bin/%s" + +# Get the checksuym file from the artifactory +for (path in binary_paths) { + sha_path <- paste0(path, ".sha512") + file <- file.path("tools/checksums", sha_path) + dirname(file) |> dir.create(path = _, recursive = TRUE, showWarnings = FALSE) + + url <- sprintf(artifactory_root, VERSION, sha_path) + download.file(url, file, quiet = TRUE, cacheOK = FALSE) + + if (grepl("windows", path)) { + # UNIX style line endings cause errors with mysys2 sha512sum + sed_status <- system2("sed", args = c("-i", "s/\\r//", file)) + if (sed_status != 0) { + stop("Failed to remove \\r from windows checksum file. Exit code: ", sed_status) + } + } +} diff --git a/r/tools/winlibs.R b/r/tools/winlibs.R index b554770e40c9b..314062044dcf2 100644 --- a/r/tools/winlibs.R +++ b/r/tools/winlibs.R @@ -17,55 +17,84 @@ args <- commandArgs(TRUE) VERSION <- args[1] -if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { - if (length(args) > 1) { - # Arg 2 would be the path/to/lib.zip - localfile <- args[2] - cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile)) - if (!file.exists(localfile)) { - cat(sprintf("*** %s does not exist; build will fail\n", localfile)) - } - file.copy(localfile, "lib.zip") - } else { - # Download static arrow from the apache artifactory - quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") - get_file <- function(template, version) { - try( - suppressWarnings( - download.file(sprintf(template, version), "lib.zip", quiet = quietly) - ), - silent = quietly - ) - } +dev_version <- package_version(VERSION)[1, 4] +# Small dev versions are added for R-only changes during CRAN submission +is_release <- is.na(dev_version) || dev_version < "100" +env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) +# We want to log the message in the style of the configure script +# not as an R error. Use `return` to exit the script after logging. +lg <- function(...) { + cat("*** ", sprintf(...), "\n") +} - # URL templates - nightly <- paste0( - getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), - "/libarrow/bin/windows/arrow-%s.zip" - ) - # %1$s uses the first variable for both substitutions - artifactory <- paste0( - getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), - "/libarrow/bin/windows/arrow-%1$s.zip" - ) - rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip" +if (is_release) { + # This is a release version, so we need to use the major.minor.patch version without + # the CRAN suffix/dev_version + VERSION <- package_version(VERSION)[1, 1:3] + # %1$s uses the first variable for both substitutions + url_template <- paste0( + getOption("arrow.repo", "https://apache.jfrog.io/artifactory/arrow/r/%1$s"), + "/libarrow/bin/windows/arrow-%1$s.zip" + ) +} else { + url_template <- paste0( + getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"), + "/libarrow/bin/windows/arrow-%s.zip" + ) +} - dev_version <- package_version(VERSION)[1, 4] +if (file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) { + lg("Found local Arrow %s!", VERSION) + return() +} - # Small dev versions are added for R-only changes during CRAN submission. - if (is.na(dev_version) || dev_version < "100") { - VERSION <- package_version(VERSION)[1, 1:3] - get_file(rwinlib, VERSION) +zip_file <- sprintf("arrow-%s.zip", VERSION) - # If not found, fall back to apache artifactory - if (!file.exists("lib.zip")) { - get_file(artifactory, VERSION) - } - } else { - get_file(nightly, VERSION) +if (length(args) > 1) { + # Arg 2 would be the path/to/lib.zip + localfile <- args[2] + if (!file.exists(localfile)) { + lg("RWINLIB_LOCAL '%s' does not exist. Build will fail.", localfile) + return() + } else { + lg("Using RWINLIB_LOCAL %s", localfile) + } + file.copy(localfile, zip_file) +} else { + quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") + binary_url <- sprintf(url_template, VERSION) + try( + suppressWarnings( + download.file(binary_url, zip_file, quiet = quietly) + ), + silent = quietly + ) + + if (!file.exists(zip_file) || file.size(zip_file) == 0) { + lg("Failed to download libarrow binary from %s. Build will fail.", binary_url) + return() + } + + checksum_path <- Sys.getenv("ARROW_R_CHECKSUM_PATH", "tools/checksums") + # Explicitly setting the env var to "false" will skip checksum validation + # e.g. in case the included checksums are stale. + skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") + enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true") + # validate binary checksum for CRAN release only + if (!skip_checksum && dir.exists(checksum_path) && is_release || + enforce_checksum) { + checksum_file <- sprintf("%s/windows/arrow-%s.zip.sha512", checksum_path, VERSION) + # rtools does not have shasum with default config + checksum_ok <- system2("sha512sum", args = c("--status", "-c", checksum_file)) + + if (checksum_ok != 0) { + lg("Checksum validation failed for libarrow binary: %s", zip_file) + return() } + lg("Checksum validated successfully for libarrow binary: %s", zip_file) } - dir.create("windows", showWarnings = FALSE) - unzip("lib.zip", exdir = "windows") - unlink("lib.zip") } + +dir.create("windows", showWarnings = FALSE) +unzip(zip_file, exdir = "windows") +unlink(zip_file)