forked from lei-zhang/socialRL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rl_ppc.stan
80 lines (66 loc) · 1.8 KB
/
rl_ppc.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
data {
int<lower=1> nSubjects;
int<lower=1> nTrials;
int<lower=1,upper=2> choice[nSubjects, nTrials];
real<lower=-1, upper=1> reward[nSubjects, nTrials];
}
transformed data {
vector[2] initV; // initial values for V
initV = rep_vector(0.0, 2);
}
parameters {
// group-level parameters
real lr_mu_raw;
real tau_mu_raw;
real<lower=0> lr_sd_raw;
real<lower=0> tau_sd_raw;
// subject-level raw parameters
vector[nSubjects] lr_raw;
vector[nSubjects] tau_raw;
}
transformed parameters {
vector<lower=0,upper=1>[nSubjects] lr;
vector<lower=0,upper=3>[nSubjects] tau;
for (s in 1:nSubjects) {
lr[s] = Phi_approx( lr_mu_raw + lr_sd_raw * lr_raw[s] );
tau[s] = Phi_approx( tau_mu_raw + tau_sd_raw * tau_raw[s] ) * 3;
}
}
model {
lr_mu_raw ~ normal(0,1);
tau_mu_raw ~ normal(0,1);
lr_sd_raw ~ cauchy(0,3);
tau_sd_raw ~ cauchy(0,3);
lr_raw ~ normal(0,1);
tau_raw ~ normal(0,1);
for (s in 1:nSubjects) {
vector[2] v;
real pe;
v = initV;
for (t in 1:nTrials) {
choice[s,t] ~ categorical_logit( tau[s] * v );
pe = reward[s,t] - v[choice[s,t]];
v[choice[s,t]] = v[choice[s,t]] + lr[s] * pe;
}
}
}
generated quantities {
real<lower=0,upper=1> lr_mu;
real<lower=0,upper=3> tau_mu;
int y_pred[nSubjects, nTrials];
lr_mu = Phi_approx(lr_mu_raw);
tau_mu = Phi_approx(tau_mu_raw) * 3;
y_pred = rep_array(-999,nSubjects ,nTrials);
{ // local block
for (s in 1:nSubjects) {
vector[2] v;
real pe;
v = initV;
for (t in 1:nTrials) {
y_pred[s,t] = categorical_logit_rng( tau[s] * v );
pe = reward[s,t] - v[choice[s,t]];
v[choice[s,t]] = v[choice[s,t]] + lr[s] * pe;
}
}
}
}