-
Notifications
You must be signed in to change notification settings - Fork 5
/
IPEDSInstChar02to18.do
176 lines (157 loc) · 8.11 KB
/
IPEDSInstChar02to18.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
set more off
clear all
cls
// Use this code to download, build, and save to the local computer
// data from the INSTITUTIONAL CHRACTERISTICS survey at the US DOE's
// Integrated Postsecondary Education Data Stystem.
// Dec/2019: Adam Ross Nelson - Updated to include 2018 ADM files.
// Oct/2019: Adam Ross Nelson - Updated to include 2018 IC datafiles.
// The 2018 ADM files not yet published.
// Jan/2019: Adam Ross Nelson - Refactored, reduced line count.
// Dec/2018: Adam Ross Nelson - Updated to include 2017 datafiles.
// Jan/2018: Naiya Patel - Updated to include 2016 (rv) datafiles.
// Oct/2017: Adam Ross Nelson - Updated to include 2016 datafiles.
// Sep/2017: Adam Ross Nelson - Updated to use sshnd file picker.
// Sep/2017: Adam Ross Nelson - GitHub ReBuild.
// Apr/2017: Adam Ross Nelson - Initial Build.
/*#############################################################################
File maintained at
https://github.com/adamrossnelson/StataIPEDSAll
##############################################################################*/
// Utilizes preckage version of sshnd (interactive file picker)/
// Stable 1.0 version of sshnd documentation available at:
// https://github.com/adamrossnelson/sshnd/tree/1.0
do https://raw.githubusercontent.com/adamrossnelson/sshnd/master/sshnd.do
capture log close // Close stray log files.
log using "$loggbl", append // Append sshnd established log file.
local sp char(13) char(10) char(13) char(10) // Define spacer.
version 13 // Enforce version compatibility.
di c(pwd) // Confrim working directory.
// Loop designed to download zip files and NCES provided Stata do files.
// Stata do files need cleaning (remove stray char(13) + char(10) + char(34)).
// ADM series (Admissions and Test Scores) Introduced in 2014
forvalues fname = 2014/2018 {
// Copy and unzip data and do files.
// Stata 13 introduced support for copy to work with https.
// Use command -update all- if Stata 13 and copy returns an error.
copy https://nces.ed.gov/ipeds/datacenter/data/ADM`fname'_Data_Stata.zip .
unzipfile ADM`fname'_Data_Stata.zip, replace
copy https://nces.ed.gov/ipeds/datacenter/data/ADM`fname'_Stata.zip .
unzipfile ADM`fname'_Stata.zip
// The NCES provided do files have some lines that need to be removed
// before we can call them from this master -do-file.
scalar fcontents = fileread("adm`fname'.do")
scalar fcontents = subinstr(fcontents, "insheet", "// insheet", 1)
scalar fcontents = subinstr(fcontents, "save", "// save", .)
// Save edited do file.
scalar byteswritten = filewrite("adm`fname'.do", fcontents, 1)
di `sp' // Spacing to assist reading output.
}
// TODO: Merge the below and the above for loops into a single loop.
forvalues fname = 2014/2018 {
if `fname' > 2007 & `fname' < 2018 {
import delimited adm`fname'_rv_data_stata.csv, clear
}
else {
import delimited adm`fname'_data_stata.csv, clear
}
// import delimited adm`fname'_rv_data_stata.csv, clear
di "QUIET RUN OF adm`fname'.do" // Provide uers with information for log file.
qui do adm`fname' // Quietly run NCES provided do file.
gen isYr = `fname' // Add the isYr index for later merge.
order isYr, after(unitid) // Order isYr after unitid, easier browsing.
compress
saveold adm`fname'_data_stata.dta, version(13) replace
di `sp'
}
// Loop designed to downlaod zip files and NCES provided Stata do files.
// Stata do files need cleaning (remove stray char(13) + char(10) + char(34)).
forvalues fname = 2002 / 2018 {
// Copy, unzip, and import data.
copy https://nces.ed.gov/ipeds/datacenter/data/IC`fname'_Data_Stata.zip .
unzipfile IC`fname'_Data_Stata.zip, replace
// File name conventions not consistent through the years. 2002-2007
// and 2009 no _rv_ file. 2008 and 2010-2016 _rv_ file available.
if `fname' == 2008 | (`fname' > 2009 & `fname' < 2018) {
import delimited ic`fname'_rv_data_stata.csv, clear
}
else {
import delimited ic`fname'_data_stata.csv, clear
}
// Add isYr for later panel merge. Order new variable.
gen int isYr = `fname'
order isYr, after(unitid)
// Download NCES provided do files.
copy https://nces.ed.gov/ipeds/datacenter/data/IC`fname'_Stata.zip .
unzipfile IC`fname'_Stata.zip
// Read do file into scalar for modification.
// Remove default "insheet" command designed to import data.
// Remove default "save" command designed to save data.
scalar fcontents = fileread("ic`fname'.do")
scalar fcontents = subinstr(fcontents, "insheet", "// insheet", 1)
scalar fcontents = subinstr(fcontents, "save", "// save", .)
// These lines clear erroneous code form the 2003 do file.
scalar fcontents = subinstr(fcontents, "label define label_chfnm", "*label define label_chfnm Alpha", .)
// Remove unexpected carriage returns and line feeds.
scalar sstring = char(13) + char(10) + char(13) + char(10) + char(34)
scalar fcontents = subinstr(fcontents, sstring, char(34), .)
scalar sstring = char(13) + char(10) + char(34)
scalar fcontents = subinstr(fcontents, sstring, char(34), .)
// Save (and call) the revised and working do file.
scalar byteswritten = filewrite("ic`fname'.do", fcontents, 1)
di "QUIET RUN OF ic`fname'" // Provide information for log file.
qui do ic`fname' // Quietyly run NCES provided do file.
di `sp' // Spacing to assist reading output.
// Compress and save the resulting do file.
compress
saveold ic`fname'_data_stata.dta, version(13) replace
di `sp' // Spacer for the output.
}
// Loop through dta files created above. Assemble panel data set. Starts with
// most recent dta file. Procedure assumes most recent dta value lables will
// be most valid and reliable for the intended research or analytical purpose.
di `sp' // Spacer for the output.
use ic2018_data_stata.dta, clear // Open most recent file as the
forvalues yindex = 2017(-1)2002 { // base (2018) and then, assemble
append using ic`yindex'_data_stata.dta, force // panel data set.
di `sp' // Spacer for the output.
}
forvalues yindex = 2014/2018 {
merge 1:1 unitid isYr using "adm`yindex'_data_stata.dta", ///
nogenerate update force
}
// Correct duplicate value labels (which will work with Stata). But causes
// error when using data in other settings (e.g. Python Pandas).
// Documentation: https://stackoverflow.com/a/46038793/9572143
qui labelbook, length(12)
qui return list, all
qui numlabel `r(nuniq)', add
// Move up file directory level, compress, add notes.
// Save resulting panel data set.
cd ..
drop x*
compress
label data "PanelBuildInfo: https://github.com/adamrossnelson/StataIPEDSAll"
notes _dta: "PanelBuildInfo: https://github.com/adamrossnelson/StataIPEDSAll"
notes _dta: "Panel built on `c(current_date)'"
notes _dta: "Note regarding history of IC and ADM survey files. ADM . . ."
notes _dta: "series introduced in 2014 Some variables formerly found . . ."
notes _dta: "in the IC series moved to ADM series."
saveold "$dtagbl", replace version(13)
qui {
noi di as result "#####################################################################"
noi di as result ""
noi di as result " Saved $dtagbl"
noi di as result ""
noi di as result " Note regarding history of IC and ADM survey files. ADM series"
noi di as result " introduced in 2014. Some variables formerly found in the IC"
noi di as result " series moved to ADM series. This routine builds ADM and IC"
noi di as result " sets apart. Then merges the 2014 through 2018 ADM surveys."
noi di as result ""
noi di as result " Also note description of ADM series from NCES dictionary..."
noi di as result " These [ADM] data are applicable for institutions that do not"
noi di as result " have an open admin... policy for entering first-time students."
noi di as result ""
noi di as result "######################################################################"
}
log close