-
Notifications
You must be signed in to change notification settings - Fork 0
Data Preparation 2015
In the source code below, the data supplied by the Hawaii Department of Education is a pipe delimited file named Hawaii_Data_LONG_2015.txt and is located in the relative folder Data/Base_Files. Following the text data being read into R, the data is cleaned up variable by variable to ensure that it matches previously established data naming conventions.
###########################################################################
###
### R Syntax for construction of 2015 Hawaii LONG data file
###
###########################################################################
### Load SGP Package
require(SGP)
require(data.table)
### Load tab delimited data
Hawaii_Data_LONG_2015 <- fread("Data/Base_Files/Hawaii_Data_LONG_2015.txt", colClasses=rep("character", 34))
### Tidy up data
setnames(Hawaii_Data_LONG_2015, c("Valid_Case", "year", "grade", "lastName", "firstName", "EMH Level", "ELL Status", "Complex Area"),
c("VALID_CASE", "Year", "Gr", "LName", "FName", "EMH.Level", "ELL_STATUS_MULTILEVEL", "Complex.Area"))
Hawaii_Data_LONG_2015[,VALID_CASE:="VALID_CASE"]
Hawaii_Data_LONG_2015[,Gr:=as.character(as.numeric(Gr))]
Hawaii_Data_LONG_2015[Gr %in% c("1", "2", "9", "10", "91"), VALID_CASE:="INVALID_CASE"]
Hawaii_Data_LONG_2015[,DOE_Ethnic:=as.character(DOE_Ethnic)]
Hawaii_Data_LONG_2015[,Fed7_Ethnic:=as.factor(Fed7_Ethnic)]
Hawaii_Data_LONG_2015[,Fed5_Ethnic:=as.factor(Fed5_Ethnic)]
Hawaii_Data_LONG_2015[,Disadv:=as.factor(Disadv)]
Hawaii_Data_LONG_2015[,ELL:=as.factor(ELL)]
Hawaii_Data_LONG_2015[,SpEd:=as.factor(SpEd)]
Hawaii_Data_LONG_2015[,Migrant:=as.factor(Hawaii_Data_LONG_2015$Migrant)]
Hawaii_Data_LONG_2015[,Scale_Score:=as.numeric(Scale_Score)]
Hawaii_Data_LONG_2015[,FSY:=as.factor(FSY)]
Hawaii_Data_LONG_2015[,ETHNICITY:=as.character(Fed7_Ethnic)]
Hawaii_Data_LONG_2015[DOE_Ethnic %in% c("Native Hawaiian", "Part-Hawaiian"), ETHNICITY:="Native Hawaiian"]
Hawaii_Data_LONG_2015[,ETHNICITY:=as.factor(Hawaii_Data_LONG_2015$ETHNICITY)]
levels(Hawaii_Data_LONG_2015$ETHNICITY)[c(3,4)] <- c("Black or African American", "Hispanic or Latino")
Hawaii_Data_LONG_2015[District=="Charter", District:="Charter Schools"]
Hawaii_Data_LONG_2015[,Complex:=as.factor(Hawaii_Data_LONG_2015$Complex)]
levels(Hawaii_Data_LONG_2015$Complex) <- as.vector(sapply(levels(Hawaii_Data_LONG_2015$Complex), capwords))
levels(Hawaii_Data_LONG_2015$Complex)[c(21,24,38)] <- paste(levels(Hawaii_Data_LONG_2015$Complex)[c(21,24,38)], "Complex")
levels(Hawaii_Data_LONG_2015$Complex) <- as.vector(sapply(sapply(strsplit(sapply(levels(Hawaii_Data_LONG_2015$Complex), capwords), " "), head, -1), paste, collapse=" "))
levels(Hawaii_Data_LONG_2015$Complex)[29] <- "McKinley"
Hawaii_Data_LONG_2015[,Complex.Area:=as.factor(Complex.Area)]
levels(Hawaii_Data_LONG_2015$Complex.Area)[8] <- "Hilo-Laupahoehoe-Waiakea"
Hawaii_Data_LONG_2015[,Sex:=as.factor(Sex)]
Hawaii_Data_LONG_2015[,ELL_STATUS_MULTILEVEL:=as.factor(ELL_STATUS_MULTILEVEL)]
Hawaii_Data_LONG_2015[,School_Admin_Rollup:=as.factor(School_Admin_Rollup)]
Hawaii_Data_LONG_2015[,District:=as.factor(District)]
Hawaii_Data_LONG_2015[,STATE_ENROLLMENT_STATUS:=as.factor(STATE_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2015[,SCHOOL_ENROLLMENT_STATUS:=as.factor(SCHOOL_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2015[,DISTRICT_ENROLLMENT_STATUS:=as.factor(DISTRICT_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2015[,COMPLEX_ENROLLMENT_STATUS:=as.factor(COMPLEX_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2015[,COMPLEX_AREA_ENROLLMENT_STATUS:=as.factor(COMPLEX_AREA_ENROLLMENT_STATUS)]
Hawaii_Data_LONG_2015[,FSY_SchCode:=as.integer(FSY_SchCode)]
Hawaii_Data_LONG_2015[,HIGH_NEED_STATUS_DEMOGRAPHIC:=
factor(2, levels=1:2, labels=c("High Need Status: ELL, Special Education, or Disadvantaged Student", "High Need Status: Non-ELL, Non-Special Education, and Non-Disadvantaged Student"))]
Hawaii_Data_LONG_2015$HIGH_NEED_STATUS_DEMOGRAPHIC[Hawaii_Data_LONG_2015$Disadv=="Disadvantaged: Yes" | Hawaii_Data_LONG_2015$ELL=="ELL Status: Yes" | Hawaii_Data_LONG_2015$SpEd=="Special Education: Yes"] <- "High Need Status: ELL, Special Education, or Disadvantaged Student"
Hawaii_Data_LONG_2015[,SCHOOL_FSY_ENROLLMENT_STATUS:=factor(2, levels=1:2, labels=c("Enrolled School: No", "Enrolled School: Yes"))]
Hawaii_Data_LONG_2015$SCHOOL_FSY_ENROLLMENT_STATUS[Hawaii_Data_LONG_2015$SCHOOL_ENROLLMENT_STATUS=="Enrolled School: No" | Hawaii_Data_LONG_2015$FSY=="Full School Year Status: No"] <- "Enrolled School: No"
### Reorder variables
my.variable.order <- c(
"VALID_CASE", "Domain", "Year", "Gr", "IDNO", "LName", "FName", "SCode_Admin_Rollup", "School_Admin_Rollup", "FSY_SchCode", "EMH.Level", "DCode", "District", "CCode", "Complex",
"CACode", "Complex.Area", "Sex", "ETHNICITY", "HIGH_NEED_STATUS_DEMOGRAPHIC", "DOE_Ethnic", "Fed7_Ethnic", "Fed5_Ethnic", "Disadv", "ELL", "ELL_STATUS_MULTILEVEL", "SpEd",
"Migrant", "Scale_Score", "Proficiency_Level", "FSY", "SCHOOL_ENROLLMENT_STATUS", "DISTRICT_ENROLLMENT_STATUS", "COMPLEX_ENROLLMENT_STATUS", "COMPLEX_AREA_ENROLLMENT_STATUS",
"STATE_ENROLLMENT_STATUS", "SCHOOL_FSY_ENROLLMENT_STATUS")
setcolorder(Hawaii_Data_LONG_2015, my.variable.order)
### Save results
save(Hawaii_Data_LONG_2015, file="Data/Hawaii_Data_LONG_2015.Rdata")
Note that the naming conventions associated with Hawaii data provided by the DOE and the naming conventions used by the SGP Package are different. The
SGP Package accommodates different state data naming conventions through the use of a meta-data lookup table
embedded within the SGPstateData Rdata object in the package. The variable name lookup table for
Hawaii is shown below with names.provided giving the state specific names, names.sgp the SGP Package specific names, names.type giving the type
associated with the variable which is used within summarizeSGP
to construct
group summaries, names.info providing meta-data associated with the variable, and names.output a Boolean indicator of whether variable should be used
with summarizeSGP
.
names.provided | names.sgp | names.type | names.info | names.output |
---|---|---|---|---|
Year | YEAR | time | Year (testing year) associated with record | |
IDNO | ID | individual | Unique student identifier | |
LName | LAST_NAME | label | Last name of student | |
FName | FIRST_NAME | label | First name of student | |
Gr | GRADE | institution_level | Grade level of test taken | |
Domain | CONTENT_AREA | content | Content area (Mathematics or Reading) | |
Scale_Score | SCALE_SCORE | measure | Student scale score | |
Proficiency_Level | ACHIEVEMENT_LEVEL | measure | Achievement level associated with student score | |
SCode_Admin_Rollup | SCHOOL_NUMBER | institution | School number rolled up for administration purposes possibly containing multiple SCHOOL_NUMBERs | |
School_Admin_Rollup | SCHOOL_NAME | label | School name rolled up for administration purposes possibly containing multiple SCHOOL_NUMBERs | |
FSY_SchCode | SCHOOL_NUMBER_FSY | institution | School number rolled up for FSY reporting | |
GradeRange | GRADE_RANGE | institution_type | Grade range of school | |
SchlType | SCHOOL_TYPE | institution_type | Type of School (Charter/DOE/Special) | |
EMH.Level | EMH_LEVEL | institution_type | Elementary/Middle/High School designation | |
Type | SCHOOL_LEVEL | institution_type | Elementary/Elementary-Middle/Middle/Middle-High/High School designation | |
DCode | DISTRICT_NUMBER | institution | District number | |
District | DISTRICT_NAME | label | District name | |
CCode | COMPLEX_NUMBER | institution | Complex number | |
Complex | COMPLEX_NAME | label | Complex name | |
CACode | COMPLEX_AREA_NUMBER | institution | Complex area number | |
Complex.Area | COMPLEX_AREA_NAME | label | Complex area name | |
ETHNICITY | ETHNICITY | demographic | Student ethnicity combining FED7 and DOE for Native Hawaiian | TRUE |
DOE_Ethnic | ETHNICITY_DOE | demographic | Student ethnicity using DOE categories | TRUE |
Fed5_Ethnic | ETHNICITY_FED5 | demographic | Student ethnicity using FED 5 categories | FALSE |
Fed7_Ethnic | ETHNICITY_FED7 | demographic | Student ethnicity using FED 7 categories | FALSE |
Disadv | DISADVANTAGED_STATUS | demographic | Disadvantaged status | TRUE |
ELL | ELL_STATUS | demographic | Student ELL status indicator | TRUE |
ELL_STATUS_MULTILEVEL | ELL_STATUS_MULTILEVEL | demographic | Student ELL multi-level status indicator | TRUE |
SpEd | SPECIAL_EDUCATION_STATUS | demographic | Student special education status indicator | TRUE |
Sex | GENDER | demographic | Student gender | TRUE |
Source | TEST_ADMINISTRATION | type of test | HSA/HSAA/HLIP/HAPA/Linapuni | |
Migrant | MIGRANT_STATUS | demographic | Student migrant status | TRUE |
HIGH_NEED_STATUS | HIGH_NEED_STATUS | demographic | High need status flag | TRUE |
HIGH_NEED_STATUS_DEMOGRAPHIC | HIGH_NEED_STATUS_DEMOGRAPHIC | demographic | High need status flag indicating ELL or SPED or DISADVANTAGED | TRUE |
FSY | FULL_SCHOOL_YEAR_STATUS | demographic | Full school year status indicator | TRUE |
SCHOOL_ENROLLMENT_STATUS | SCHOOL_ENROLLMENT_STATUS | institution_inclusion | School inclusion/accountability indicator | |
SCHOOL_FSY_ENROLLMENT_STATUS | SCHOOL_FSY_ENROLLMENT_STATUS | institution_inclusion | School inclusion/accountability indicator | |
DISTRICT_ENROLLMENT_STATUS | DISTRICT_ENROLLMENT_STATUS | institution_inclusion | District inclusion/accountability indicator | |
COMPLEX_ENROLLMENT_STATUS | COMPLEX_ENROLLMENT_STATUS | institution_inclusion | Complex inclusion/accountability indicator | |
COMPLEX_AREA_ENROLLMENT_STATUS | COMPLEX_AREA_ENROLLMENT_STATUS | institution_inclusion | Complex Area inclusion/accountability indicator | |
STATE_ENROLLMENT_STATUS | STATE_ENROLLMENT_STATUS | institution_inclusion | State inclusion/accountability indicator | |
VALID_CASE | VALID_CASE | individual inclusion | Valid case indicator |
In addition to preparation the student level file for analysis, Hawaii also utilizes the SGP package SGP Package for the summarization of instructor level data. Utilization of this requires a student-instructor lookup file so that appropriate summary level variables can be created. In the source code below, student-instructor data provided by the Hawaii Department of Education is read into R and cleaned for use in SGP analyses.
################################################################################
###
### R Syntax for for the construction of the 2015 student-instructor lookup table
###
################################################################################
### Load SGP Package
require(SGP)
require(data.table)
###########################################################################
###
### Create teacher linkage file
###
###########################################################################
### Load data
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER <- fread("Data/Base_Files/BFK_Cleaned_Spring_2015.txt", colClasses=rep("character", 13))
### Remove duplicates
setkeyv(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER, names(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER))
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[!duplicated(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER)]
### Extract relevant variables
variables.to.use <- c("StaffUniqueID", "StaffLastName", "StaffFirstName", "SchoolName", "SchoolCode_[NEW]", "SubjectName", "StateStudentID", "rounded_terms")
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,variables.to.use, with=FALSE]
### Tidy up data
setnames(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER,
names(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER),
c("INSTRUCTOR_NUMBER", "INSTRUCTOR_LAST_NAME", "INSTRUCTOR_FIRST_NAME", "SCHOOL_NAME_INSTRUCTOR", "SCHOOL_NUMBER_INSTRUCTOR", "CONTENT_AREA", "ID", "TERMS"))
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER$YEAR <- "2015"
setcolorder(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER,
c("ID","CONTENT_AREA", "YEAR", "INSTRUCTOR_NUMBER", "INSTRUCTOR_LAST_NAME", "INSTRUCTOR_FIRST_NAME", "SCHOOL_NUMBER_INSTRUCTOR", "SCHOOL_NAME_INSTRUCTOR", "TERMS"))
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[CONTENT_AREA=="Math", CONTENT_AREA := "MATHEMATICS"]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[CONTENT_AREA=="ELA", CONTENT_AREA := "READING"]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[, INSTRUCTOR_ENROLLMENT_STATUS := factor(1, levels=0:1, labels=c("Enrolled Instructor: No", "Enrolled Instructor: Yes"))]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,TERMS:=as.numeric(TERMS)]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,INSTRUCTOR_LAST_NAME:=as.factor(INSTRUCTOR_LAST_NAME)]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,INSTRUCTOR_FIRST_NAME:=as.factor(INSTRUCTOR_FIRST_NAME)]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,SCHOOL_NUMBER_INSTRUCTOR := as.integer(SCHOOL_NUMBER_INSTRUCTOR)]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,SCHOOL_NAME_INSTRUCTOR:=as.factor(SCHOOL_NAME_INSTRUCTOR)]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,VALID_CASE:="VALID_CASE"]
### Create TERMS variable from sum of TERMS
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,TERMS:=sum(TERMS, na.rm=TRUE), keyby=list(ID, CONTENT_AREA, INSTRUCTOR_NUMBER)]
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER <- Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[!duplicated(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER)]
### Create Weight Variable
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,INSTRUCTOR_WEIGHT:=round(TERMS/sum(TERMS, na.rm=TRUE), digits=2), by=list(ID, CONTENT_AREA)]
### NULL out extraneous variables
Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER[,TERMS:=NULL]
### Set column order
tmp.column.order <- c('ID', 'CONTENT_AREA', 'YEAR', 'INSTRUCTOR_NUMBER', 'INSTRUCTOR_LAST_NAME', 'INSTRUCTOR_FIRST_NAME',
'SCHOOL_NUMBER_INSTRUCTOR', 'SCHOOL_NAME_INSTRUCTOR', 'INSTRUCTOR_WEIGHT', 'INSTRUCTOR_ENROLLMENT_STATUS', 'VALID_CASE')
setcolorder(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER, tmp.column.order)
setkeyv(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER, c("ID", "CONTENT_AREA", "YEAR"))
### Save results
save(Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER, file="Data/Hawaii_Data_LONG_2015_INSTRUCTOR_NUMBER.Rdata")
SGP - Student Growth Percentiles SGP Blog | SGP GitHub Repo | SGP on CRAN