-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path02_parallel_eval.R
82 lines (65 loc) · 2.24 KB
/
02_parallel_eval.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Introduction to running a slow function in parallel in R
# last edited 2024-09-04 by @vankesteren
# ODISSEI Social Data Science team
library(tidyverse)
library(pbapply)
library(parallel)
source("./src/schelling_cpp.R")
# Create an analysis function ----
# let's create a function that returns a number of interest
analysis_function <- function(x) {
# we run the abm with certain parameters
output <- abm_cpp(
prop = c(.69, .19, .12),
Ba = 0.6
)
# we find out how happy the smallest group is
return(output$h_prop[3])
}
# Run the function 300 times using the "apply" family of functions
# Apply is a short, functional version of a "for loop".
res <- pbsapply(X = 1:300, FUN = analysis_function)
# plot
res |>
tibble() |>
ggplot() +
geom_histogram(aes(x = res), fill = "#345534", bins = 40) +
theme_minimal() +
labs(x = "Happiness", y = "Count", title = "Variation in happiness",
subtitle = "Variation over 300 runs of our ABM")
# what is the mean happiness?
mean(res)
# that's pretty slow! let's see if we can speed this up.
# we will use the parallel package
# Run the abm in parallel ----
# first, figure out how many logical cores (threads) you have
detectCores()
# I have 12 threads (logical cores) available on my machine so
# I use 10 threads to leave some computing power for other tasks.
n_threads <- 10
# create the cluster
clus <- makeCluster(n_threads)
# then, we load the abm code on each of the threads
out <- clusterEvalQ(clus, source("./src/schelling_cpp.R"))
# now, we run the function in parallel
res_parl <- pbsapply(X = 1:300, FUN = analysis_function, cl = clus)
# we can also use "load-balancing" (LB) which can deal with
# the fact that runs can take differing amounts of time
# (at the cost of a little more overhead than non-load-balancing)
res_parl <- parSapplyLB(
cl = clus,
X = 1:300,
FUN = analysis_function
)
# important step! stop the cluster to free up resources.
stopCluster(clus)
# plot
res_parl |>
tibble() |>
ggplot() +
geom_histogram(aes(x = res_parl), fill = "#345534", bins = 40) +
theme_minimal() +
labs(x = "Happiness", y = "Count", title = "Variation in happiness",
subtitle = "Variation over 300 runs of our ABM")
# what is the mean happiness?
mean(res_parl)