forked from SumitraBadrinathan/psci-338
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path10-GLM.R
111 lines (88 loc) · 4.34 KB
/
10-GLM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# #######################################################################
# File-Name: 10-GLM.R
# Version: R 3.4.3
# Date: Dec 10, 2018
# Author: Sumitra Badrinathan <[email protected]>
# Purpose: Logit & probit regressions to model admit success probability
# + predicted probability plots + marginal effects
# Machine: macOS 10.14
# #######################################################################
set.seed(1221)
rm(list=ls()) # remove objects from R workspace
# set working directory
setwd("~/Dropbox/PSCI338") #macs
#setwd("C:/Users/name/Dropbox/PSCI338") #windows
# load and install required packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(readstata13, margins, effects)
# Download data file Binary.dta from this repository and save it in the Data/Raw folder
# File contains data on whether or not students were admitted to grad school.
# We will model admit success as a function of GRE, GPA, and rank of undergrad school
# load in data
binarydata <- read.dta13("Data/Raw/Binary.dta")
stargazer(binarydata, type = "text")
# run OLS regressing admit on GRE + GPA + rank of school
binarydata$rank.f <- factor(binarydata$rank)
reg1 <- lm(admit ~ gre + gpa + rank.f, data = binarydata)
summary(reg1)
# save predicted values in separate variable
binarydata$predictedvalues <- predict(reg1)
# are there values below 0?
summary(binarydata$predictedvalues)
belowzero <- subset(binarydata, predictedvalues < 0)
print(belowzero)
# use a logit model on the same data
output <- glm(admit ~ gre + gpa + rank.f, family=binomial(logit), data = binarydata)
summary(output)
# interpret constant: -3.98 is the log odds of getting into a school when everything else is 0
# predicted prob of Y for the first row
# calculates predicted probability of acceptenace for someone with a GRE of 380,
# GPA of 3.61, and from undergraduate school in the third quartile
binarydata[1,]
exp(-3.989979 + 0.002264*380+0.804038*3.61+-0.675443*0+-1.340204*1+-1.551464*0) /
(1 + exp(-3.989979 + 0.002264*380+0.804038*3.61+-0.675443*0+-1.340204*1+-1.551464*0))
# plot predicted probabilities for all ranks
# give the values that you want to set the variable to
inputs <- cbind(1, 380, 3.61, 1:4)
# specify the names of the variables that you want to apply these values to
colnames(inputs) <- c("constant", "gre", "gpa", "rank.f")
# turn into a data frame
inputs <- as.data.frame(inputs)
# because rank.f is a factor, need to turn into a factor within the dataframe
inputs$rank.f <- as.factor(inputs$rank.f)
# gets the predicted probability of voting for every combination of covariates included in inputs and puts into vector forecast
forecast <- predict(output, newdata = inputs, type = "response")
print(forecast)
# plot it
dev.off()
par(mfrow=c(1,1))
plot(y = forecast, x = 1:4, xaxt="n", ylim = c(0, 1), type = "o",
xlab = "Rank of School", ylab = "Predicted Probabilty of Admitance", col = "black", lty=1)
axis(side=1, at=c(1, 2, 3, 4), labels = TRUE)
title(main="Rank of Undergraduate School and Probability of Grad School Acceptance",
sub="Applicant with 380 GRE and 3.61 GPA")
# plotting predicted probabilities (from the "margins" package))
# prediced prob of being admitted given rank
cplot(output, "rank.f", what = "prediction",
main = "Predicted Admit Probability, Given Rank")
# prediced prob of being admitted given gpa
cplot(output, "gpa", what = "prediction", main = "Predicted Admit Probability, Given GPA",
col = "navyblue", se.type = "shade", se.fill = "lightblue")
# marginal effect of gpa
m <- margins(output)
summary(m)
# marginal effect
# predicted increment of the response variable
# associated with a unit increase in one of the covariates keeping the others constant.
cplot(output, "gpa", what = "effect", main = "Marginal Effect of GPA")
# if you want a predicted probability plot for each predictor variable
# use the plot(allEffects()) command from the "effects" package
plot(allEffects(output))
# probit example
output2 <- glm(admit ~ gre + gpa + rank.f, family=binomial(probit), data = binarydata)
summary(output2)
# similar results for logit and probit
stargazer(output, output2, type="text")
# interpreting probit coefficeints
print(binarydata[1,])
print(pnorm(-2.386836 + 0.001376*380+0.477730*3.61+-0.415399*0+-0.812138*1+-0.935899*0))