-
Notifications
You must be signed in to change notification settings - Fork 0
/
7-modeling.R
69 lines (51 loc) · 2.62 KB
/
7-modeling.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
library(dplyr)
# Pseudo R2: pscl::pR2(m)['McFadden']
d <- read.csv('../research-data/processed/lak23wlsample.csv')
py_to_r_bool <- function(v) {
v <- as.character(v)
v[v==""] <- NA
v <- v == "True" # NA == TRUE -> NA
return(v)
}
d$student_dropped_program <- d$student_dropped_program %>%
py_to_r_bool()
d$graduated_on_time <- d$graduated_on_time %>%
py_to_r_bool()
# Scale variables for modeling
d_model <- d
for (variable in c('sem_load_credit_hours', 'sem_load_predicted', 'n_courses', 'ratio_courses_stem_match', 'X.SEMESTER_GPA'))
d_model[variable] <- d_model[variable] %>% scale()
# Model predicts that larger semester load in credit hours is negatively associated with dropout
m <- glm(student_dropped_program ~ sem_load_credit_hours, d_model, family='binomial')
summary(m)
# Analytical load not associated with dropout alone
m <- glm(student_dropped_program ~ sem_load_predicted, d_model, family='binomial')
summary(m)
# Together they have significant additive effect, where analytical load makes dropout more likely given a particular CH load
m <- glm(student_dropped_program ~ sem_load_credit_hours + sem_load_predicted, d_model, family='binomial')
summary(m)
# Here is the prediction space
m <- glm(student_dropped_program ~ sem_load_credit_hours * sem_load_predicted, d, family='binomial')
summary(m)
# ANOVA
m0 <- glm(student_dropped_program ~ sem_load_credit_hours, d_model, family='binomial')
m1 <- glm(student_dropped_program ~ sem_load_credit_hours + sem_load_predicted, d_model, family='binomial')
m2 <- glm(student_dropped_program ~ sem_load_credit_hours * sem_load_predicted, d, family='binomial')
anova(m0, m1, m2, test='Chisq')
# Repeat with on-time graduation
# Model predicts that larger semester load in credit hours is positively associated with on-time graduation
m <- glm(graduated_on_time ~ sem_load_credit_hours, d_model, family='binomial')
summary(m)
# Model predicts that larger semester load in predicted is positively associated with on-time graduation
m <- glm(graduated_on_time ~ sem_load_predicted, d_model, family='binomial')
summary(m)
# Model finds a significant interaction
m <- glm(graduated_on_time ~ sem_load_credit_hours + sem_load_predicted, d, family='binomial')
summary(m)
m <- glm(graduated_on_time ~ sem_load_credit_hours * sem_load_predicted, d, family='binomial')
summary(m)
# ANOVA
m0 <- glm(graduated_on_time ~ sem_load_credit_hours, d_model, family='binomial')
m1 <- glm(graduated_on_time ~ sem_load_credit_hours + sem_load_predicted, d_model, family='binomial')
m2 <- glm(graduated_on_time ~ sem_load_credit_hours * sem_load_predicted, d_model, family='binomial')
anova(m0, m1, m2, test='Chisq')