-
Notifications
You must be signed in to change notification settings - Fork 5
/
IPEDSFallEnrl02to18SrsA.do
190 lines (162 loc) · 8.41 KB
/
IPEDSFallEnrl02to18SrsA.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
set more off
clear all
cls
// Use this code to download, build, and save to the local computer data
// from the FALL ENROLLMENT survey (A Series) at the US DOE's Integrated
// Postsecondary Education Data Stystem. From IPEDS Website: "Race/ethnicity,
// gender, attendance status, and level of student."
// Dec/2019: Adam Ross Nelson - Updated for 2017 & 2018 data.
// Mar/2018: Naiya Patel - Completed A Series.
// Feb/2018: Naiya Patel - Original author, initial build.
/*#############################################################################
File maintained at
https://github.com/adamrossnelson/StataIPEDSAll
##############################################################################*/
// Utilizes preckage version of sshnd (interactive file picker)
do https://raw.githubusercontent.com/adamrossnelson/sshnd/master/sshnd.do
capture log close // Close stray log files.
log using "$loggbl", append // Append sshnd established log file.
local sp char(13) char(10) char(13) char(10) // Define spacer.
version 13 // Enforce version compatibility.
di c(pwd) // Confrim working directory.
// Loop designed to download zip files and NCES provided Stata do files.
// Stata do files need cleaning (removal of stray char(13) + char(10) + char(34)).
forvalues yindex = 2002 / 2018 {
// Copy, unzip, and import data files.
// Stata 13 introduced support for copy to work with https.
// Use command -update all- if Stata 13 and copy returns an error.
copy https://nces.ed.gov/ipeds/datacenter/data/EF`yindex'A_Data_Stata.zip .
unzipfile EF`yindex'A_Data_Stata.zip, replace
// Download the NCES provided do file for A series
copy https://nces.ed.gov/ipeds/datacenter/data/EF`yindex'A_Stata.zip .
unzipfile EF`yindex'A_Stata.zip, replace
// Read do file into scalar for modification.
scalar fcontents = fileread("ef`yindex'a.do")
// Remove default "insheet" command designed to import data.
// Remove defualt "save" command designed to save data.
scalar fcontents = subinstr(fcontents, "insheet", "// insheet", 1)
scalar fcontents = subinstr(fcontents, "save", "// save", .)
// Remove unexpected carriage returns and line feeds.
scalar sstring = char(13) + char(10) + char(34)
scalar fcontents = subinstr(fcontents, sstring, char(34), .)
// Save, rename, and run the revised and working do file.
scalar fcontents = subinstr(fcontents, "label define label_line", "// label define label_line", .)
scalar fcontents = subinstr(fcontents, "label values line label_line", "// label values line label_line", .)
scalar byteswritten = filewrite("EF`yindex'a.do", fcontents, 1)
// File name conventions not consistent through the years.
// 2007, 2008, 2010-2015 provide _rv_ editions of the data.
if inlist(`yindex',2007,2008,2010,2011,2012,2013,2014,2015,2016,2017) {
import delimited ef`yindex'a_rv_data_stata.csv, clear
}
else {
import delimited ef`yindex'a_data_stata.csv, clear
}
di "QUIET RUN OF EF`yindex'a.do" // Provides user with informaiton for log file
qui do EF`yindex'a.do // Quietly run NCES provided do files.
drop x* // Remove imputation variables.
di `sp' // Spacing to assist reading output.
if (`yindex' < 2008) {
rename efrace24 eftotlt // Grand total
rename efrace15 eftotlm // Grand total men
rename efrace16 eftotlw // Grand toatl women
rename efrace19 efaiant // American Indian or Alaska Native total
rename efrace05 efaianm // American Indian or Alaska Native total men
rename efrace06 efaianw // American Indian or Alaska Native total women
rename efrace20 efasiat // Asian total
rename efrace07 efasiam // Asian total men
rename efrace08 efasiaw // Asian total women
rename efrace18 efbkaat // Black or African American total
rename efrace03 efbkaam // Black or African American total men
rename efrace04 efbkaaw // Black or African American toatl women
rename efrace21 efhispt // Hispanic total
rename efrace09 efhispm // Hispanic total men
rename efrace10 efhispw // Hispanic total women
rename efrace22 efwhitt // White total
rename efrace11 efwhitm // White total men
rename efrace12 efwhitw // White total women
rename efrace23 efunknt // Race/ethnicity unknown total
rename efrace13 efunknm // Race/ethnicity unknonw total men
rename efrace14 efunknw // Race/ethnicity unknown total women
rename efrace17 efnralt // Nonresident alien total
rename efrace01 efnralm // Nonresident alien total men
rename efrace02 efnralw // Nonresident alien total women
gen ef2mort = . // Two or more races total
gen ef2morm = . // Two or more races men
gen ef2morw = . // Two or more races women
gen efnhpit = . // Native Hawaiian or Other Pacific Islander total
gen efnhpim = . // Native Hawaiian or Other Pacific Islander men
gen efnhpiw = . // Native Hawaiian or Other Pacific Islander women
label variable ef2mort "Two or more races total"
label variable ef2morm "Two or more races men"
label variable ef2morw "Two or more races women"
label variable efnhpit "Native Hawaiian or Other Pacific Islander total"
label variable efnhpim "Native Hawaiian or Other Pacific Islander men"
label variable efnhpiw "Native Hawaiian or Other Pacific Islander women"
}
// Establish local for varlist.
local thevars efnralm efnralw efbkaam efbkaaw efaianm efaianw ///
efasiam efasiaw efhispm efhispw efwhitm efwhitw efunknm efunknw ///
eftotlm eftotlw efnralt efbkaat efaiant efasiat efhispt efwhitt ///
efunknt eftotlt ef2mort ef2morm ef2morw efnhpit efnhpim efnhpiw
// Loop to save variable label names for reapplication after reshape.
foreach varname in `thevars' {
local l`varname' : variable label `varname'
}
// Simplify dataset
keep unitid efalevel `thevars'
keep if inlist(efalevel, 1, 2, 12, 21, 22, 32, 41, 42, 52)
levelsof efalevel, local(levels)
// Get the value lable name associated with var efalevel
local lbe : value label efalevel
// Get individual value labels from valuelabel associated with var efalevel
foreach l of local levels {
local ms_`l' : label `lbe' `l'
local ms_`l' = subinstr("`ms_`l''","student","stdt",.)
local ms_`l' = subinstr("`ms_`l''","Undergraduate","Ugrd",.)
local ms_`l' = subinstr("`ms_`l''","Graduate","Grad",.)
local ms_`l' = subinstr("`ms_`l''","total","Tot",.)
local ms_`l' = subinstr("`ms_`l''","Full-time","Fltime",.)
local ms_`l' = subinstr("`ms_`l''","Part-time","Pttime",.)
}
// Reshape
reshape wide `thevars', i(unitid) j(efalevel)
// Reapply variable label names following reshape.
foreach lev of local levels {
foreach varname in `thevars' {
label variable `varname'`lev' "`ms_`lev'' `l`varname''"
}
}
// Add isYr index and order new variable.
gen int isYr = `yindex'
order isYr, after (unitid)
saveold "ef`yindex'a_data_stata.dta", version(13) replace // Save cleaned data file.
di `sp' // Spacer for the output.
}
use ef2018a_data_stata.dta, clear
forvalues yindex = 2017(-1)2002 {
display "Appending data file from `yindex'" // Output for log file.
append using "ef`yindex'a_data_stata.dta", force
di `sp' // Spacing for log file.
}
// Correct duplicate value labels (which will work with Stata). But causes
// error when using data in other settings (e.g. Python Pandas).
// Documentation: https://stackoverflow.com/a/46038793/9572143
qui labelbook, length(12)
qui return list, all
qui numlabel `r(nuniq)', add
// Move up file directory level, compress, add notes.
// Save resulting panel data set.
cd ..
compress
label data "PanelBuildInfo: https://github.com/adamrossnelson/StataIPEDSAll"
notes _dta: "PanelBuildInfo: https://github.com/adamrossnelson/StataIPEDSAll"
notes _dta: "Panel built on `c(current_date)'"
saveold "$dtagbl", replace version(13)
qui {
noi di "#####################################################################"
noi di ""
noi di " Saved $dtagbl"
noi di ""
noi di "######################################################################"
}
log close