Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parameterized model formulas stop working when future plan is declared #405

Open
jrauser opened this issue Jun 15, 2024 · 2 comments
Open

Comments

@jrauser
Copy link

jrauser commented Jun 15, 2024

See reprex below. In my real codebase I'm using furrr's future_map for other purposes (reading a series of files off disk).

library(fable)
#> Loading required package: fabletools
library(tsibble)
#> 
#> Attaching package: 'tsibble'
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, union
library(feasts)
library(glue)
library(future)

# Define some random data
set.seed(7)
data <- tsibble(idx=1:1000, value=rnorm(1000), index=idx) 

# Make a STL decomposition
model(data, stl = STL(value ~ trend() + season(10))) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# We can use a variable in the formula
seasonal_period <- 10
model(data, stl = STL(value ~ trend() + season(seasonal_period))) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows


# Or we can make a formula ourselves
stl_formula <- as.formula(glue("value ~ trend() + season({seasonal_period})"))
stl_formula
#> value ~ trend() + season(10)

# ... and hand it to STL
model(data, stl = STL(stl_formula)) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# The same thing works with ARIMA
fixed_p <- 1
model(data, arima = ARIMA(value ~ pdq(p=fixed_p))) %>% tidy()
#> # A tibble: 1 × 6
#>   .model term  estimate std.error statistic p.value
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>   <dbl>
#> 1 arima  ar1     0.0194    0.0317     0.612   0.541

arima_formula <- as.formula(glue("value ~ pdq(p={fixed_p})"))
model(data, arima = ARIMA(arima_formula)) %>% tidy()
#> # A tibble: 1 × 6
#>   .model term  estimate std.error statistic p.value
#>   <chr>  <chr>    <dbl>     <dbl>     <dbl>   <dbl>
#> 1 arima  ar1     0.0194    0.0317     0.612   0.541

# But the moment we declare a plan ...
plan(multisession, workers=8)

# ... "normal" formula with no parameters still works, ...
model(data, stl = STL(value ~ trend() + season(10))) %>% components()
#> # A dable: 1,000 x 7 [1]
#> # Key:     .model [1]
#> # :        value = trend + season_10 + remainder
#>    .model   idx  value   trend season_10 remainder season_adjust
#>    <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
#>  1 stl        1  2.29  -0.477     0.419      2.35         1.87  
#>  2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
#>  3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
#>  4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
#>  5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
#>  6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
#>  7 stl        7  0.748  0.298     0.0246     0.426        0.724 
#>  8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
#>  9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
#> 10 stl       10  2.19   0.706     0.459      1.02         1.73  
#> # ℹ 990 more rows

# ... but the above strategies for parameterizing the formula stop working
model(data, stl = STL(stl_formula)) %>% components()
#> Error: object 'stl_formula' not found
model(data, stl = STL(value ~ trend() + season(seasonal_period))) %>% components()
#> Warning: 1 error encountered for stl
#> [1] object 'seasonal_period' not found
#> Error in `transmute()`:
#> ℹ In argument: `cmp = map(.fit, components)`.
#> Caused by error in `UseMethod()`:
#> ! no applicable method for 'components' applied to an object of class "null_mdl"
#> Backtrace:
#>      ▆
#>   1. ├─... %>% components()
#>   2. ├─generics::components(.)
#>   3. ├─fabletools:::components.mdl_df(.)
#>   4. │ ├─dplyr::transmute(...)
#>   5. │ └─dplyr:::transmute.data.frame(...)
#>   6. │   └─dplyr:::mutate_cols(.data, dots, by)
#>   7. │     ├─base::withCallingHandlers(...)
#>   8. │     └─dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#>   9. │       └─mask$eval_all_mutate(quo)
#>  10. │         └─dplyr (local) eval()
#>  11. ├─fabletools:::map(.fit, components)
#>  12. │ └─base::lapply(.x, .f, ...)
#>  13. │   ├─generics (local) FUN(X[[i]], ...)
#>  14. │   └─fabletools:::components.mdl_ts(X[[i]], ...)
#>  15. │     └─generics::components(object$fit, ...)
#>  16. └─base::.handleSimpleError(...)
#>  17.   └─dplyr (local) h(simpleError(msg, call))
#>  18.     └─rlang::abort(message, class = error_class, parent = parent, call = error_call)
model(data, arima = ARIMA(arima_formula)) %>% tidy()
#> Error: object 'arima_formula' not found
@jrauser
Copy link
Author

jrauser commented Jun 15, 2024

sessionInfo() output in case it matters:

> sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.6.7

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] reprex_2.0.2     fable_0.3.3      future_1.33.0    glue_1.7.0       feasts_0.3.1     fabletools_0.3.3 tsibble_1.1.3   

loaded via a namespace (and not attached):
 [1] utf8_1.2.4           generics_0.1.3       tidyr_1.3.0          anytime_0.3.9        lattice_0.21-8       listenv_0.9.0        digest_0.6.35        magrittr_2.0.3      
 [9] evaluate_0.21        grid_4.3.1           timechange_0.3.0     fastmap_1.1.1        processx_3.8.4       ps_1.7.5             purrr_1.0.1          fansi_1.0.6         
[17] scales_1.3.0         codetools_0.2-19     cli_3.6.2            rlang_1.1.3          parallelly_1.36.0    future.apply_1.11.2  ellipsis_0.3.2       munsell_0.5.1       
[25] yaml_2.3.7           withr_3.0.0          tools_4.3.1          parallel_4.3.1       dplyr_1.1.2          colorspace_2.1-0     ggplot2_3.5.0        globals_0.16.2      
[33] vctrs_0.6.5          R6_2.5.1             lifecycle_1.0.4      lubridate_1.9.3      fs_1.6.2             callr_3.7.3          clipr_0.8.0          pkgconfig_2.0.3     
[41] urca_1.3-3           progressr_0.13.0     pillar_1.9.0         gtable_0.3.4         Rcpp_1.0.10          xfun_0.39            tibble_3.2.1         tidyselect_1.2.0    
[49] rstudioapi_0.14      knitr_1.43           farver_2.1.1         htmltools_0.5.8.1    nlme_3.1-162         rmarkdown_2.22       compiler_4.3.1       distributional_0.3.2

@schoulten
Copy link

Hey @jrauser, here is an workaround:

Add the Injection operator !! before the formula or other global object in your call, like this:

model(data, stl = STL(!!stl_formula)) %>% components()
# A dable: 1,000 x 7 [1]
# Key:     .model [1]
# :        value = trend + season_10 + remainder
   .model   idx  value   trend season_10 remainder season_adjust
   <chr>  <int>  <dbl>   <dbl>     <dbl>     <dbl>         <dbl>
 1 stl        1  2.29  -0.477     0.419      2.35         1.87  
 2 stl        2 -1.20  -0.349     0.141     -0.988       -1.34  
 3 stl        3 -0.694 -0.222     0.279     -0.751       -0.973 
 4 stl        4 -0.412 -0.0899   -0.443      0.121        0.0311
 5 stl        5 -0.971  0.0418    0.0143    -1.03        -0.985 
 6 stl        6 -0.947  0.170    -0.350     -0.767       -0.597 
 7 stl        7  0.748  0.298     0.0246     0.426        0.724 
 8 stl        8 -0.117  0.431    -0.214     -0.334        0.0972
 9 stl        9  0.153  0.564    -0.300     -0.112        0.453 
10 stl       10  2.19   0.706     0.459      1.02         1.73  
# ℹ 990 more rows
# ℹ Use `print(n = ...)` to see more rows

Should work smoothly!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants