From 31b2886f1f3c6339f3f0bb9df69dd509d2a6f5f9 Mon Sep 17 00:00:00 2001 From: wlandau Date: Fri, 7 Jun 2024 09:50:31 -0400 Subject: [PATCH] Fix #170 --- DESCRIPTION | 2 +- NEWS.md | 3 ++- R/crew_client.R | 24 +++++++++++++++++++++++- R/crew_controller_local.R | 4 +++- man/crew_class_client.Rd | 5 +++++ man/crew_client.Rd | 10 +++++++++- man/crew_controller_local.Rd | 8 ++++++++ tests/local/test-launcher-system2.R | 4 +++- tests/testthat/test-crew_client.R | 4 +++- 9 files changed, 57 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 78a3ac98..6e7d5692 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,7 @@ Description: In computationally demanding analysis projects, 'clustermq' by Schubert (2019) ), and 'batchtools' by Lang, Bischel, and Surmann (2017) . -Version: 0.9.3.9001 +Version: 0.9.3.9002 License: MIT + file LICENSE URL: https://wlandau.github.io/crew/, https://github.com/wlandau/crew BugReports: https://github.com/wlandau/crew/issues diff --git a/NEWS.md b/NEWS.md index b5a67b1f..e2f766b8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ -# crew 0.9.3.9001 (development) +# crew 0.9.3.9002 (development) * Do not use extended tasks in Shiny vignette. +* Add a new `retry_tasks` argument with default `TRUE` (#170). # crew 0.9.3 diff --git a/R/crew_client.R b/R/crew_client.R index a65a9f98..5c8d17ae 100644 --- a/R/crew_client.R +++ b/R/crew_client.R @@ -21,6 +21,12 @@ #' @param seconds_timeout Number of seconds until timing #' out while waiting for certain synchronous operations to complete, #' such as checking `mirai::status()`. +#' @param retry_tasks `TRUE` to automatically retry a task in the event of +#' an unexpected worker exit. `FALSE` to give up on the first exit and +#' return a `mirai` error code (code number 19). +#' `TRUE` (default) is recommended in most situations. +#' Use `FALSE` for debugging purposes, e.g. to confirm that a task +#' is causing a worker to run out of memory or crash in some other way. #' @examples #' if (identical(Sys.getenv("CREW_EXAMPLES"), "true")) { #' client <- crew_client() @@ -37,7 +43,8 @@ crew_client <- function( tls_enable = NULL, tls_config = NULL, seconds_interval = 0.5, - seconds_timeout = 5 + seconds_timeout = 5, + retry_tasks = TRUE ) { crew_deprecate( name = "tls_enable", @@ -69,6 +76,7 @@ crew_client <- function( tls = tls, seconds_interval = seconds_interval, seconds_timeout = seconds_timeout, + retry_tasks = retry_tasks, relay = crew_relay() ) client$validate() @@ -98,6 +106,7 @@ crew_class_client <- R6::R6Class( .tls = NULL, .seconds_interval = NULL, .seconds_timeout = NULL, + .retry_tasks = NULL, .relay = NULL, .started = NULL, .dispatcher = NULL @@ -131,6 +140,10 @@ crew_class_client <- R6::R6Class( seconds_timeout = function() { .subset2(private, ".seconds_timeout") }, + #' @field retry_tasks See [crew_client()] + retry_tasks = function() { + .subset2(private, ".retry_tasks") + }, #' @field relay Relay object for event-driven programming on a downstream #' condition variable. relay = function() { @@ -155,6 +168,7 @@ crew_class_client <- R6::R6Class( #' @param tls Argument passed from [crew_client()]. #' @param seconds_interval Argument passed from [crew_client()]. #' @param seconds_timeout Argument passed from [crew_client()]. + #' @param retry_tasks Argument passed from [crew_client()]. #' @param relay Argument passed from [crew_client()]. #' @examples #' if (identical(Sys.getenv("CREW_EXAMPLES"), "true")) { @@ -171,6 +185,7 @@ crew_class_client <- R6::R6Class( tls = NULL, seconds_interval = NULL, seconds_timeout = NULL, + retry_tasks = NULL, relay = NULL ) { private$.name <- name @@ -180,6 +195,7 @@ crew_class_client <- R6::R6Class( private$.tls <- tls private$.seconds_interval <- seconds_interval private$.seconds_timeout <- seconds_timeout + private$.retry_tasks <- retry_tasks private$.relay <- relay }, #' @description Validate the client. @@ -224,6 +240,11 @@ crew_class_client <- R6::R6Class( . >= 0 ) } + crew_assert( + private$.retry_tasks, + isTRUE(.) || isFALSE(.), + message = "retry_tasks must be TRUE or FALSE" + ) crew_assert( private$.dispatcher %|||% 0L, is.numeric(.), @@ -256,6 +277,7 @@ crew_class_client <- R6::R6Class( tls = private$.tls$client(), pass = private$.tls$password, token = TRUE, + retry = private$.retry_tasks, .compute = private$.name ) # TODO: remove code that gets the dispatcher PID if the dispatcher diff --git a/R/crew_controller_local.R b/R/crew_controller_local.R index a1c5b198..7520690f 100644 --- a/R/crew_controller_local.R +++ b/R/crew_controller_local.R @@ -29,6 +29,7 @@ crew_controller_local <- function( seconds_idle = Inf, seconds_wall = Inf, seconds_exit = NULL, + retry_tasks = TRUE, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -56,7 +57,8 @@ crew_controller_local <- function( tls_enable = tls_enable, tls_config = tls_config, seconds_interval = seconds_interval, - seconds_timeout = seconds_timeout + seconds_timeout = seconds_timeout, + retry_tasks = retry_tasks ) launcher <- crew_launcher_local( name = name, diff --git a/man/crew_class_client.Rd b/man/crew_class_client.Rd index 4803706c..9d9ccead 100644 --- a/man/crew_class_client.Rd +++ b/man/crew_class_client.Rd @@ -50,6 +50,8 @@ Other client: \item{\code{seconds_timeout}}{See \code{\link[=crew_client]{crew_client()}}.} +\item{\code{retry_tasks}}{See \code{\link[=crew_client]{crew_client()}}} + \item{\code{relay}}{Relay object for event-driven programming on a downstream condition variable.} @@ -85,6 +87,7 @@ condition variable.} tls = NULL, seconds_interval = NULL, seconds_timeout = NULL, + retry_tasks = NULL, relay = NULL )}\if{html}{\out{}} } @@ -106,6 +109,8 @@ condition variable.} \item{\code{seconds_timeout}}{Argument passed from \code{\link[=crew_client]{crew_client()}}.} +\item{\code{retry_tasks}}{Argument passed from \code{\link[=crew_client]{crew_client()}}.} + \item{\code{relay}}{Argument passed from \code{\link[=crew_client]{crew_client()}}.} } \if{html}{\out{}} diff --git a/man/crew_client.Rd b/man/crew_client.Rd index 910b5429..77ed95e2 100644 --- a/man/crew_client.Rd +++ b/man/crew_client.Rd @@ -13,7 +13,8 @@ crew_client( tls_enable = NULL, tls_config = NULL, seconds_interval = 0.5, - seconds_timeout = 5 + seconds_timeout = 5, + retry_tasks = TRUE ) } \arguments{ @@ -44,6 +45,13 @@ such as checking \code{mirai::status()}} \item{seconds_timeout}{Number of seconds until timing out while waiting for certain synchronous operations to complete, such as checking \code{mirai::status()}.} + +\item{retry_tasks}{\code{TRUE} to automatically retry a task in the event of +an unexpected worker exit. \code{FALSE} to give up on the first exit and +return a \code{mirai} error code (code number 19). +\code{TRUE} (default) is recommended in most situations. +Use \code{FALSE} for debugging purposes, e.g. to confirm that a task +is causing a worker to run out of memory or crash in some other way.} } \description{ Create an \code{R6} wrapper object to manage the \code{mirai} client. diff --git a/man/crew_controller_local.Rd b/man/crew_controller_local.Rd index b6b56b64..8c60b084 100644 --- a/man/crew_controller_local.Rd +++ b/man/crew_controller_local.Rd @@ -18,6 +18,7 @@ crew_controller_local( seconds_idle = Inf, seconds_wall = Inf, seconds_exit = NULL, + retry_tasks = TRUE, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -81,6 +82,13 @@ See the \code{walltime} argument of \code{mirai::daemon()}.} \item{seconds_exit}{Deprecated on 2023-09-21 in version 0.5.0.9002. No longer necessary.} +\item{retry_tasks}{\code{TRUE} to automatically retry a task in the event of +an unexpected worker exit. \code{FALSE} to give up on the first exit and +return a \code{mirai} error code (code number 19). +\code{TRUE} (default) is recommended in most situations. +Use \code{FALSE} for debugging purposes, e.g. to confirm that a task +is causing a worker to run out of memory or crash in some other way.} + \item{tasks_max}{Maximum number of tasks that a worker will do before exiting. See the \code{maxtasks} argument of \code{mirai::daemon()}. \code{crew} does not diff --git a/tests/local/test-launcher-system2.R b/tests/local/test-launcher-system2.R index e7c31336..6a1aa5dc 100644 --- a/tests/local/test-launcher-system2.R +++ b/tests/local/test-launcher-system2.R @@ -26,6 +26,7 @@ crew_test("custom launcher plugin based on system2()", { seconds_launch = 30, seconds_idle = Inf, seconds_wall = Inf, + retry_tasks = TRUE, tasks_max = Inf, tasks_timers = 0L, reset_globals = TRUE, @@ -41,7 +42,8 @@ crew_test("custom launcher plugin based on system2()", { port = port, tls = tls, seconds_interval = seconds_interval, - seconds_timeout = seconds_timeout + seconds_timeout = seconds_timeout, + retry_tasks = retry_tasks ) launcher <- system2_launcher_class$new( name = name, diff --git a/tests/testthat/test-crew_client.R b/tests/testthat/test-crew_client.R index 66c5b53e..b2a53238 100644 --- a/tests/testthat/test-crew_client.R +++ b/tests/testthat/test-crew_client.R @@ -8,12 +8,14 @@ crew_test("crew_client() active bindings", { host = "127.0.0.1", port = 123L, seconds_interval = 123, - seconds_timeout = 456 + seconds_timeout = 456, + retry_tasks = FALSE ) expect_equal(client$host, "127.0.0.1") expect_equal(client$port, 123L) expect_equal(client$seconds_interval, 123) expect_equal(client$seconds_timeout, 456) + expect_false(client$retry_tasks) expect_true(inherits(client$tls, "crew_class_tls")) expect_silent(client$validate()) })