Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to the cache API required by the new reticulate cache implementation #2170

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c9e142c
Change API of external engines' cache to be similar to the R cache
leogama Sep 11, 2022
ed0ead6
Also check for .Rdata cache file (with results) in the lazy cache case
leogama Sep 12, 2022
706ab38
Create cache directory only before first cache check
leogama Sep 12, 2022
82b7930
Fix: load package (and cache) of external engine only if necessary
leogama Sep 12, 2022
8336713
Pass full options to engine_cache$exists(), $load() and $save()
leogama Sep 12, 2022
19def13
Pass full options to engine_caches$get(); fix variable name
leogama Sep 13, 2022
1070a1b
Only return python engine cache if available, or the R cache will alw…
leogama Sep 14, 2022
ae43d23
Fix: purge invalid cache files after the first cache check
leogama Sep 16, 2022
2a1ff59
Test: cache invalidation due to missing cache file
leogama Sep 16, 2022
0b7eb4e
showWarnings = FALSE is unnecessary (xfun::dir_create() won't create …
yihui Sep 27, 2022
8d6d9a8
let cache$load() handle custom cache engines
yihui Sep 27, 2022
5ca33cd
cosmetic
yihui Sep 27, 2022
212bd26
let block_cache() handle custom cache engines, too
yihui Sep 27, 2022
95b8f80
cache_engines$get() always try to get the engine from a name, instead…
yihui Sep 27, 2022
3307e76
use paste() instead of stringr::str_dup()
yihui Sep 27, 2022
b5e64bf
the path could be a vector of length > 1
yihui Sep 27, 2022
e093389
cosmetic
yihui Sep 27, 2022
5330ffc
clean up after testing
yihui Sep 27, 2022
7b3fb59
add news and ctb
yihui Sep 27, 2022
eeaafcb
Merge commit '6bfffe9c1ae8c84f0f2ae07cd3c4b00876757587'
yihui Sep 27, 2022
5316f3d
Merged origin/master into leogama-cache-api
yihui Sep 27, 2022
7a6c852
apply cache_engines$get() on `options` to return a list (of methods)
yihui Sep 27, 2022
ad99f51
Merge branch 'master' into cache-api
leogama Dec 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Authors@R: c(
person(c("Kevin", "K."), "Smith", role = "ctb"),
person("Kirill", "Mueller", role = "ctb"),
person("Kohske", "Takahashi", role = "ctb"),
person("Leonardo", "Gama", role = "ctb"),
person("Lorenz", "Walthert", role = "ctb"),
person("Lucas", "Gallindo", role = "ctb"),
person("Marius", "Hofert", role = "ctb"),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

- Added an argument `exact` to `pandoc_to()` and `pandoc_from()` to decide whether to use/return the exact Pandoc output/input format name. If not (default), Pandoc extensions will be removed from the format name, e.g., `latex-smart` will be treated as `latex`.

- For `python` code chunks, objects can be cached using the Python package **dill**. This currently requires the patch in **reticulate** https://github.com/rstudio/reticulate/pull/1210, and should be considered experimental before the patch is accepted and a new version of **reticulate** is released (thanks, @leogama, #2170).

## BUG FIXES

- Plot created outside of `knit()` could sneak into `knit_child()` results (thanks, @niklaswillrich, #2166).
Expand Down
39 changes: 28 additions & 11 deletions R/block.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ call_block = function(block) {
}
hash = paste(valid_path(params$cache.path, label), digest(content), sep = '_')
params$hash = hash
if (cache$exists(hash, params$cache.lazy) &&
isFALSE(params$cache.rebuild) &&
params$engine != 'Rcpp') {
xfun::dir_create(dirname(hash))
if (cache_exists(params) && isFALSE(params$cache.rebuild) && params$engine != 'Rcpp') {
if (opts_knit$get('verbose')) message(' loading cache from ', hash)
cache$load(hash, lazy = params$cache.lazy)
cache_engine(params)
cache$load(hash, options = params)
if (!params$include) return('')
if (params$cache == 3) return(cache$output(hash))
} else {
purge_cache(params) # purge any invalid cache files
}
if (params$engine == 'R')
cache$library(params$cache.path, save = FALSE) # load packages
Expand Down Expand Up @@ -156,7 +156,7 @@ block_exec = function(options) {
output = paste(c(res.before, output, res.after), collapse = '')
output = knit_hooks$get('chunk')(output, options)
if (options$cache) {
cache.exists = cache$exists(options$hash, options$cache.lazy)
cache.exists = cache_exists(options)
if (options$cache.rebuild || !cache.exists) block_cache(options, output, switch(
options$engine,
'stan' = options$output.var, 'sql' = options$output.var, character(0)
Expand Down Expand Up @@ -246,7 +246,7 @@ eng_r = function(options) {
# guess plot file type if it is NULL
if (keep != 'none') options$fig.ext = dev2ext(options)

cache.exists = cache$exists(options$hash, options$cache.lazy)
cache.exists = cache_exists(options)
evaluate = knit_hooks$get('evaluate')
# return code with class 'source' if not eval chunks
res = if (is_blank(code)) list() else if (isFALSE(ev)) {
Expand Down Expand Up @@ -350,16 +350,33 @@ block_cache = function(options, output, objects) {
hash = options$hash
outname = cache_output_name(hash)
assign(outname, output, envir = knit_global())
purge_cache(options)
cache$library(options$cache.path, save = TRUE)
cache$save(objects, outname, hash, lazy = options$cache.lazy)
cache_action(options, 'save', options)
}

# test if cache exists: first R cache must exist, then if a custom cache engine
# exists, use the engine to check its cache exists
cache_exists = function(options) {
cache$exists(options$hash, options$cache.lazy) &&
cache_action(options, 'exists', options)
}

purge_cache = function(options) {
# purge my old cache and cache of chunks dependent on me
cache$purge(paste0(valid_path(
options$cache.path, c(options$label, dep_list$get(options$label))
), '_????????????????????????????????'))
prefix = valid_path(options$cache.path, c(options$label, dep_list$get(options$label)))
glob_path = paste0(prefix, '_', paste(rep('?', 32), collapse = '')) # length of the MD5 hash
cache$purge(glob_path)
cache_action(options, 'purge', glob_path)
}

cache_action = function(options, method, ...) {
res = if (method == 'exists') TRUE
if (length(eng <- cache_engines$get(options$engine))) {
obj = eng(options)
if (is.function(action <- obj[[method]])) res = action(...)
}
res
}

cache_globals = function(option, code) {
Expand Down
29 changes: 11 additions & 18 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,8 @@
## but it is using .rdb and .rdx as 'hard cache' (instead of cache in memory)
new_cache = function() {

cache_path = function(hash) {
d = dirname(hash)
if (!file.exists(d)) dir.create(d, showWarnings = FALSE, recursive = TRUE)
file.path(d, basename(hash))
}

cache_purge = function(hash) {
for (h in hash) unlink(paste(cache_path(h), c('rdb', 'rdx', 'RData'), sep = '.'))
cache_purge = function(path) {
for (p in path) unlink(paste(p, c('rdb', 'rdx', 'RData'), sep = '.'))
}

cache_save = function(keys, outname, hash, lazy = TRUE) {
Expand All @@ -20,18 +14,17 @@ new_cache = function() {
out0 = outname
on.exit(rm(list = out0, envir = knit_global()), add = TRUE)
# keys are new variables created; outname is the text output of a chunk
path = cache_path(hash)
# add random seed to cache if exists
if (exists('.Random.seed', envir = globalenv(), inherits = FALSE)) {
copy_env(globalenv(), knit_global(), '.Random.seed')
outname = c('.Random.seed', outname)
}
if (!lazy) outname = c(keys, outname)
save(list = outname, file = paste(path, 'RData', sep = '.'), envir = knit_global())
save(list = outname, file = paste(hash, 'RData', sep = '.'), envir = knit_global())
if (!lazy) return() # everything has been saved; no need to make lazy db
# random seed is always load()ed
keys = setdiff(keys, '.Random.seed')
getFromNamespace('makeLazyLoadDB', 'tools')(knit_global(), path, variables = keys)
getFromNamespace('makeLazyLoadDB', 'tools')(knit_global(), hash, variables = keys)
}

save_objects = function(objs, label, path) {
Expand All @@ -53,13 +46,12 @@ new_cache = function() {
save_objects(globals, label, valid_path(path, '__globals'))
}

cache_load = function(hash, lazy = TRUE) {
path = cache_path(hash)
if (!is_abs_path(path)) path = file.path(getwd(), path)
if (lazy) lazyLoad(path, envir = knit_global())
cache_load = function(hash, lazy = options$cache.lazy, options = list()) {
if (!is_abs_path(hash)) path = file.path(getwd(), hash)
if (lazy) lazyLoad(hash, envir = knit_global())
# load output from last run if exists
if (file.exists(path2 <- paste(path, 'RData', sep = '.'))) {
load(path2, envir = knit_global())
if (file.exists(path <- paste(hash, 'RData', sep = '.'))) {
load(path, envir = knit_global())
if (exists('.Random.seed', envir = knit_global(), inherits = FALSE))
copy_env(knit_global(), globalenv(), '.Random.seed')
name = cache_meta_name(hash)
Expand All @@ -70,6 +62,7 @@ new_cache = function() {
rm(list = name, envir = knit_global())
}
}
cache_action(options, 'load', options)
}

cache_library = function(path, save = TRUE) {
Expand All @@ -89,7 +82,7 @@ new_cache = function() {
cache_exists = function(hash, lazy = TRUE) {
is.character(hash) &&
all(file.exists(paste(
cache_path(hash), if (lazy) c('rdb', 'rdx') else 'RData', sep = '.'
hash, c('RData', 'rdb', 'rdx')[if (lazy) 1:3 else 1], sep = '.'
)))
}

Expand Down
14 changes: 4 additions & 10 deletions R/engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,11 @@ eng_python = function(options) {

cache_eng_python = function(options) {
if (isFALSE(options$python.reticulate)) return()
# TODO: change this hack to reticulate::cache_eng_python(options) after
# https://github.com/rstudio/reticulate/pull/167 is merged and released
# TODO: change this hack to reticulate::cache_eng_python after
# https://github.com/rstudio/reticulate/pull/1210 is merged and released
if (!'cache_eng_python' %in% ls(asNamespace('reticulate'))) return()
fun = getFromNamespace('cache_eng_python', 'reticulate')
fun(options)
eng = getFromNamespace('cache_eng_python', 'reticulate')
if (eng$available(options)) eng
}

## Java
Expand Down Expand Up @@ -936,12 +936,6 @@ get_engine = function(name) {
}
}

cache_engine = function(options) {
cache_fun = cache_engines$get(options$engine)
if (!is.function(cache_fun)) return()
cache_fun(options)
}

# possible values for engines (for auto-completion in RStudio)
opts_chunk_attr$engine = as.list(sort(c('R', names(knit_engines$get()))))
opts_chunk_attr[c('engine.path', 'engine.opts')] = list('character', 'character')
2 changes: 1 addition & 1 deletion R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ save_plot = function(plot, name, dev, width, height, ext, dpi, options) {

path = paste(name, ext, sep = '.')
# when cache=2 and plot file exists, just return the filename
if (options$cache == 2 && cache$exists(options$hash, options$cache.lazy)) {
if (options$cache == 2 && cache_exists(options)) {
if (in_base_dir(!file.exists(path))) {
purge_cache(options)
stop('cannot find ', path, '; the cache has been purged; please re-compile')
Expand Down
31 changes: 31 additions & 0 deletions tests/testit/test-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,34 @@ assert('dep_prev() sets dependencies on previous chunks', {
})
dep_list$restore()
knit_code$restore()

mock_cache = (function() {
noop_false = function(...) FALSE
noop_true = function(...) TRUE
list(
available = noop_true, exists = noop_false, load = noop_false,
save = noop_false, purge = noop_false
) # may return anything
})()
knit_engines$set(mock = function(...) "\n\nmock result\n\n")
cache_engines$set(mock = function(...) mock_cache)
knit_engine_cache = function() {
in_dir(tempdir(), {
txt = c(
'```{mock test, cache=TRUE, cache.path=""}',
'mock code',
'```'
)
knit(text = txt, quiet = TRUE)
R_cache_file = list.files(pattern = "RData$")
t1 = file.mtime(R_cache_file)
knit(text = txt, quiet = TRUE)
t2 = file.mtime(R_cache_file)
t1 != t2 # missing "mock" cache should invalidate R cache
})
}
assert("missing external engine's cache invalidates R cache", {
(knit_engine_cache())
})
knit_engines$delete('mock')
cache_engines$delete('mock')