-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ff7dfe9
commit bc20322
Showing
6 changed files
with
352 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.Rapp.history | ||
.DS_Store | ||
.RData | ||
.Rdata | ||
/Data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
############################################################################################################################ | ||
#### | ||
#### Code for preparation of New_Jersey data | ||
#### | ||
############################################################################################################################ | ||
|
||
library("SGP") | ||
library("doMC") | ||
#registerDoMC(4) # Can run sequentially if you prefer/can't parallelize | ||
#getDoParWorkers() | ||
|
||
### | ||
### Reading in .csv files as data.tables - pipe (|) seperated files. | ||
### | ||
|
||
my.files <- c("NJASK_2010_2011_Math.csv", "NJASK_2010-2011_ELA.csv") | ||
|
||
setwd("../Data") | ||
New_Jersey_Data <- list(Student = foreach (i = my.files, .combine = "rbind", .packages = "data.table", .inorder=FALSE) %dopar% {data.table(read.csv(i, sep="|"))}) | ||
setwd("../SGP") | ||
|
||
|
||
### | ||
### Re-Name the data - substitute _ for . and capitalize names. Here's a function to do it: | ||
### | ||
|
||
subSpecial <- function(x) { | ||
s <- strsplit(x, split=".", fixed=TRUE)[[1]] | ||
s <- paste(toupper(substring(s, 1,1)), toupper(substring(s, 2)), sep="", collapse="_") | ||
s <- strsplit(s, split="__")[[1]][1] | ||
return(s) | ||
} | ||
|
||
|
||
# rename: | ||
|
||
for (j in 1:dim(New_Jersey_Data[["Student"]])[2]) names(New_Jersey_Data[["Student"]])[j] <- subSpecial(names(New_Jersey_Data$Student)[j]) | ||
|
||
sapply(New_Jersey_Data[["Student"]], class) | ||
|
||
names(New_Jersey_Data[["Student"]])[1] <- "TESTING_YEAR" | ||
New_Jersey_Data[["Student"]]$SCALED_SCORE <- as.numeric(New_Jersey_Data[["Student"]]$SCALED_SCORE) # For SGP function - doesn't always like integers... | ||
New_Jersey_Data[["Student"]]$SUBJECT <- toupper(New_Jersey_Data[["Student"]]$SUBJECT) # Any field that gets 'keyed' on needs to be ALL CAPS | ||
|
||
|
||
############################################################################################################################ | ||
### IDENTIFY VALID CASES | ||
### | ||
### Duplicate rows for individual students may be the only issue: | ||
### ALL scores are in range of 100 - 300, so NO SCALED_SCORE greater than LOSS and less than HOSS | ||
### All students in grades 3 - 8 (assume taking grade level examinations?) | ||
### | ||
############################################################################################################################ | ||
|
||
New_Jersey_Data[["Student"]][["VALID_CASE"]] <- factor(1, levels=1:2, labels=c("VALID_CASE", "INVALID_CASE")) | ||
|
||
### Duplicated Records | ||
|
||
key(New_Jersey_Data[["Student"]]) <- c("STUDENT_ID", "TESTING_YEAR", "SUBJECT") | ||
|
||
# Inspect the dublicates first to see what's going on. | ||
dup.ids<-New_Jersey_Data[["Student"]]$STUDENT_ID[which(duplicated(New_Jersey_Data[["Student"]]))] | ||
dups<-New_Jersey_Data[["Student"]][New_Jersey_Data[["Student"]]$STUDENT_ID %in% dup.ids] | ||
length(dup.ids) # only a handful, but we'll try to keep the best of the lot | ||
dim(dups) | ||
summary(dups) | ||
|
||
# Invalidate lowest score for duplicates. | ||
key(New_Jersey_Data[["Student"]]) <- c("VALID_CASE", "STUDENT_ID", "TESTING_YEAR", "SUBJECT", "SCALED_SCORE") | ||
key(New_Jersey_Data[["Student"]]) <- c("VALID_CASE", "STUDENT_ID", "TESTING_YEAR", "SUBJECT") | ||
New_Jersey_Data[["Student"]][["VALID_CASE"]][which(duplicated(New_Jersey_Data[["Student"]]) & New_Jersey_Data[["Student"]]$VALID_CASE=="VALID_CASE")-1] <- "INVALID_CASE" | ||
|
||
############################################################################################################################ | ||
### | ||
### Create Additional Variables: | ||
### | ||
############################################################################################################################ | ||
|
||
### Prior Performance Levels | ||
# Make the Performance Levels an ORDERED factor | ||
New_Jersey_Data[["Student"]]$PERFORMANCE_LEVEL <- ordered(New_Jersey_Data[["Student"]]$PERFORMANCE_LEVEL, levels=c("Partially Proficient", "Proficient", "Advanced Proficient")) | ||
|
||
# Use data.table to select each "Valid" student record from last year and tack on the scaled score and performance level from that record onto the current year record (as *_PRIOR). | ||
key(New_Jersey_Data[["Student"]]) <- c("STUDENT_ID", "SUBJECT", "TESTING_YEAR", "VALID_CASE") | ||
New_Jersey_Data[["Student"]]$SCALED_SCORE_PRIOR <- New_Jersey_Data[["Student"]][SJ(STUDENT_ID, SUBJECT, TESTING_YEAR-1, "VALID_CASE"), mult="last"][,SCALED_SCORE] | ||
New_Jersey_Data[["Student"]]$PERFORMANCE_LEVEL_PRIOR <- New_Jersey_Data[["Student"]][SJ(STUDENT_ID, SUBJECT, TESTING_YEAR-1, "VALID_CASE"), mult="last"][,PERFORMANCE_LEVEL] | ||
|
||
### Unique School Identifier ? | ||
|
||
summary(New_Jersey_Data[["Student"]]$DISTRICT_CODE[New_Jersey_Data[["Student"]]$SCHOOL_CODE==150]) #tried with 50, 100, 999 - defintely not a unique ID | ||
summary(New_Jersey_Data[["Student"]]$SCHOOL_CODE) # All 3 digit numbers | ||
|
||
New_Jersey_Data[["Student"]]$UNIQUE_SCHOOL_NUMBER <- New_Jersey_Data[["Student"]]$DISTRICT_CODE*1000 + New_Jersey_Data[["Student"]]$SCHOOL_CODE | ||
|
||
summary(New_Jersey_Data[["Student"]]$UNIQUE_SCHOOL_NUMBER) | ||
summary(New_Jersey_Data[["Student"]]$UNIQUE_SCHOOL_NUMBER %% 1000) # Modulo 1000 returns SCHOOL_CODE | ||
|
||
############################################################################################################################ | ||
### | ||
### SGP standard variable names: | ||
### | ||
############################################################################################################################ | ||
|
||
my.names <- c("YEAR", "Testing.Program", "CONTENT_AREA", "GRADE", "ID", "DISTRICT_NUMBER", "School.Code", "County.Name", | ||
"DISTRICT_NAME", "SCHOOL_NAME", "DFG", "Gender", "Race.Ethnicity.Combined", "Title.I.LAL", "Title.I.Math", | ||
"Special.Education..SE.", "General.ED", "Former.LEP", "Current.LEP", "Time.in.District.Less.Than.1.Year", | ||
"Economically.Disadvantaged", "Migrant", "SCALE_SCORE", "ACHIEVEMENT_LEVEL", "VALID_CASE", "SCALE_SCORE_PRIOR", | ||
"ACHIEVEMENT_LEVEL_PRIOR", "SCHOOL_NUMBER") | ||
|
||
names(New_Jersey_Data[["Student"]]) <- my.names | ||
New_Jersey_Data[["Student"]]$SCALE_SCORE_PRIOR <- NULL | ||
New_Jersey_Data[["Student"]]$ID <- factor(New_Jersey_Data[["Student"]]$ID) | ||
levels(New_Jersey_Data[["Student"]]$CONTENT_AREA) <- c("ELA", "MATHEMATICS") | ||
New_Jersey_Data[["Student"]]$Gender[New_Jersey_Data[["Student"]]$Gender==""] <- NA | ||
New_Jersey_Data[["Student"]]$Gender <- droplevels(New_Jersey_Data[["Student"]]$Gender) | ||
levels(New_Jersey_Data[["Student"]]$Gender) <- c("Female", "Male") | ||
levels(New_Jersey_Data[["Student"]]$Race.Ethnicity.Combined) <- c("Asian", "Black", "Hispanic", "Native American", "Other", "Pacific Islander", "White") | ||
levels(New_Jersey_Data[["Student"]]$Title.I.LAL) <- c("No", "Yes") | ||
levels(New_Jersey_Data[["Student"]]$Title.I.Math) <- c("No", "Yes") | ||
levels(New_Jersey_Data[["Student"]]$General.ED) <- c("No", "Yes") | ||
levels(New_Jersey_Data[["Student"]]$Economically.Disadvantaged) <- c("No", "Yes") | ||
levels(New_Jersey_Data[["Student"]]$Migrant) <- c("No", "Yes") | ||
|
||
# Save with original variable names in place | ||
|
||
New_Jersey_Data_LONG_2011 <- New_Jersey_Data$Student | ||
save(New_Jersey_Data_LONG_2011, file="../Data/New_Jersey_Data_LONG_2011.Rdata", compress=TRUE) # | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
################################################################################ | ||
### | ||
### Create New Jersey Data LONG for 2012 | ||
### | ||
################################################################################ | ||
|
||
### Load SGP Package | ||
|
||
require(SGP) | ||
|
||
|
||
### Load data | ||
|
||
New_Jersey_Data_ELA <- read.csv("Data/Base_Files/NJASK_2011_2012_ELA.csv", sep="|") | ||
New_Jersey_Data_MATH <- read.csv("Data/Base_Files/NJASK_2011_2012_MATH.csv", sep="|") | ||
|
||
|
||
### Combine ELA and MATH | ||
|
||
New_Jersey_Data_LONG_2012 <- rbind(New_Jersey_Data_ELA, New_Jersey_Data_MATH) | ||
|
||
|
||
### Tidy up data | ||
|
||
names(New_Jersey_Data_LONG_2012) <- c("YEAR", "Testing.Program", "CONTENT_AREA", "GRADE", "ID", "Student.ID..SSID.", "DISTRICT_NUMBER", "School.Code", "County.Name", | ||
"DISTRICT_NAME", "SCHOOL_NAME", "DFG", "Gender", "Race.Ethnicity.Combined","Title.I.LAL", "Title.I.Math", "Special.Education..SE.", "General.ED", "Former.LEP", | ||
"Current.LEP", "Time.in.District.Less.Than.1.Year", "Economically.Disadvantaged", "Migrant", "Homeless", "SCALE_SCORE", "ACHIEVEMENT_LEVEL") | ||
|
||
New_Jersey_Data_LONG_2012$Homeless <- NULL | ||
New_Jersey_Data_LONG_2012$Former.LEP <- NULL | ||
New_Jersey_Data_LONG_2012$Testing.Program <- NULL | ||
|
||
New_Jersey_Data_LONG_2012$CONTENT_AREA <- as.character(New_Jersey_Data_LONG_2012$CONTENT_AREA) | ||
New_Jersey_Data_LONG_2012$CONTENT_AREA[New_Jersey_Data_LONG_2012$CONTENT_AREA=="Math"] <- "MATHEMATICS" | ||
|
||
New_Jersey_Data_LONG_2012$ID <- as.character(New_Jersey_Data_LONG_2012$ID) | ||
|
||
New_Jersey_Data_LONG_2012$Gender[New_Jersey_Data_LONG_2012$Gender==""] <- NA | ||
New_Jersey_Data_LONG_2012$Gender <- factor(New_Jersey_Data_LONG_2012$Gender) | ||
levels(New_Jersey_Data_LONG_2012$Gender) <- c("Female", "Male") | ||
|
||
levels(New_Jersey_Data_LONG_2012$Race.Ethnicity.Combined) <- c("Asian", "Black", "Hispanic", "Native American", "Other", "Pacific Islander", "White") | ||
|
||
levels(New_Jersey_Data_LONG_2012$General.ED) <- c("General Education: No", "General Education: Yes") | ||
|
||
New_Jersey_Data_LONG_2012$Current.LEP[New_Jersey_Data_LONG_2012$Current.LEP==""] <- NA | ||
New_Jersey_Data_LONG_2012$Current.LEP <- factor(New_Jersey_Data_LONG_2012$Current.LEP) | ||
levels(New_Jersey_Data_LONG_2012$Current.LEP) <- c("Less than 1 Year", "1 Year", "2 Years", "3 Years", "Yes") | ||
|
||
levels(New_Jersey_Data_LONG_2012$Time.in.District.Less.Than.1.Year) <- c("Time in District Less than 1 Year: No", "Time in District Less than 1 Year: Yes") | ||
|
||
levels(New_Jersey_Data_LONG_2012$Economically.Disadvantaged) <- c("Economically Disadvantaged: Yes", "Economically Disadvantaged: No", "Economically Disadvantaged: Yes") | ||
New_Jersey_Data_LONG_2012$Economically.Disadvantaged <- as.character(New_Jersey_Data_LONG_2012$Economically.Disadvantaged) | ||
New_Jersey_Data_LONG_2012$Economically.Disadvantaged <- factor(New_Jersey_Data_LONG_2012$Economically.Disadvantaged) | ||
|
||
levels(New_Jersey_Data_LONG_2012$Migrant) <- c("Migrant: No", "Migrant: Yes") | ||
|
||
levels(New_Jersey_Data_LONG_2012$Title.I.LAL) <- "Title I LAL: No" | ||
levels(New_Jersey_Data_LONG_2012$Title.I.Math) <- "Title I Math: No" | ||
|
||
New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL[New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL==""] <- NA | ||
New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL <- factor(New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL) | ||
New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL <- factor(New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL, levels=c("Partially Proficient", "Proficient", "Advanced Proficient"), ordered=TRUE) | ||
|
||
New_Jersey_Data_LONG_2012$ID[New_Jersey_Data_LONG_2012$ID=="NULL"] <- NA | ||
|
||
New_Jersey_Data_LONG_2012$SCHOOL_NUMBER <- New_Jersey_Data_LONG_2012$DISTRICT_NUMBER*1000 + New_Jersey_Data_LONG_2012$School.Code | ||
|
||
levels(New_Jersey_Data_LONG_2012$Title.I.LAL) <- "Title I LAL: No" | ||
levels(New_Jersey_Data_LONG_2012$Title.I.Math) <- "Title I Math: No" | ||
|
||
### Indentify Valid Cases | ||
|
||
New_Jersey_Data_LONG_2012$VALID_CASE <- "VALID_CASE" | ||
New_Jersey_Data_LONG_2012$VALID_CASE[is.na(New_Jersey_Data_LONG_2012$ID)] <- "INVALID_CASE" | ||
|
||
New_Jersey_Data_LONG_2012 <- as.data.table(New_Jersey_Data_LONG_2012) | ||
|
||
setkeyv(New_Jersey_Data_LONG_2012, c("VALID_CASE", "ID", "YEAR", "CONTENT_AREA")) | ||
|
||
# Inspect the dublicates first to see what's going on. | ||
dup.ids<-New_Jersey_Data_LONG_2012$ID[which(duplicated(New_Jersey_Data_LONG_2012))] | ||
dups<-New_Jersey_Data_LONG_2012[New_Jersey_Data_LONG_2012$ID %in% dup.ids] | ||
length(dup.ids) # only a handful, but we'll try to keep the best of the lot | ||
dim(dups) | ||
summary(dups) | ||
|
||
# Invalidate lowest score for duplicates. | ||
|
||
setkeyv(New_Jersey_Data_LONG_2012, c("VALID_CASE", "ID", "YEAR", "CONTENT_AREA", "SCALE_SCORE")) | ||
setkeyv(New_Jersey_Data_LONG_2012, c("VALID_CASE", "ID", "YEAR", "CONTENT_AREA")) | ||
New_Jersey_Data_LONG_2012[["VALID_CASE"]][which(duplicated(New_Jersey_Data_LONG_2012) & New_Jersey_Data_LONG_2012$VALID_CASE=="VALID_CASE")-1] <- "INVALID_CASE" | ||
|
||
|
||
# ENROLLMENT_STATUS | ||
|
||
New_Jersey_Data_LONG_2012$STATE_ENROLLMENT_STATUS <- factor(1, levels=0:1, labels=c("Enrolled State: Yes", "Enrolled State: No")) | ||
New_Jersey_Data_LONG_2012$DISTRICT_ENROLLMENT_STATUS <- factor(1, levels=0:1, labels=c("Enrolled District: Yes", "Enrolled District: No")) | ||
New_Jersey_Data_LONG_2012$SCHOOL_ENROLLMENT_STATUS <- factor(1, levels=0:1, labels=c("Enrolled School: Yes", "Enrolled School: No")) | ||
|
||
# Save the results | ||
|
||
save(New_Jersey_Data_LONG_2012, file="Data/New_Jersey_Data_LONG_2012.Rdata") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#################################################################### | ||
### | ||
### Code to update SGP analyses for New Jersey | ||
### | ||
#################################################################### | ||
|
||
### Load SGP Package | ||
|
||
require(SGP) | ||
|
||
|
||
### Load data | ||
|
||
load("../Data/New_Jersey_Data_LONG_2011.Rdata") | ||
load("../Data/Base_Files/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### Merge files | ||
|
||
New_Jersey_SGP@Data <- as.data.table(rbind.fill(as.data.frame(New_Jersey_Data_LONG_2011), as.data.frame(New_Jersey_SGP@Data))) | ||
|
||
|
||
### prepareSGP | ||
|
||
New_Jersey_SGP <- prepareSGP(New_Jersey_SGP) | ||
|
||
save(New_Jersey_SGP, file="../Data/New_Jersey_SGP.Rdata") | ||
|
||
### analyzeSGP | ||
|
||
New_Jersey_SGP <- analyzeSGP(New_Jersey_SGP, | ||
years=2011, | ||
simulate.sgps=FALSE) | ||
|
||
save(New_Jersey_SGP, file="../Data/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### combineSGP | ||
|
||
New_Jersey_SGP <- combineSGP(New_Jersey_SGP, | ||
years=2011) | ||
|
||
save(New_Jersey_SGP, file="../Data/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### summarizeSGP | ||
|
||
New_Jersey_SGP <- summarizeSGP(New_Jersey_SGP) | ||
|
||
### visualizeSGP | ||
|
||
visualizeSGP(New_Jersey_SGP, sgPlot.demo.report=TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#################################################################### | ||
### | ||
### Code to update SGP analyses for New Jersey | ||
### | ||
#################################################################### | ||
|
||
### Load SGP Package | ||
|
||
require(SGP) | ||
options(error=recover) | ||
|
||
### Load data | ||
|
||
load("Data/New_Jersey_Data_LONG_2012.Rdata") | ||
load("Data/Base_Files/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### Merge files | ||
|
||
New_Jersey_SGP@Data <- as.data.table(rbind.fill(New_Jersey_Data_LONG_2012, New_Jersey_SGP@Data)) | ||
|
||
|
||
### prepareSGP | ||
|
||
New_Jersey_SGP <- prepareSGP(New_Jersey_SGP) | ||
save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### analyzeSGP | ||
|
||
New_Jersey_SGP <- analyzeSGP( | ||
New_Jersey_SGP, | ||
years=2012, | ||
sgp.percentiles=TRUE, | ||
sgp.projections=TRUE, | ||
sgp.projections.lagged=TRUE, | ||
sgp.percentiles.baseline=TRUE, | ||
sgp.projections.baseline=TRUE, | ||
sgp.projections.lagged.baseline=TRUE, | ||
simulate.sgps=FALSE, | ||
parallel.config=list(BACKEND="PARALLEL", WORKERS=list(PERCENTILES=15, BASELINE_PERCENTILES=30, PROJECTIONS=10, LAGGED_PROJECTIONS=8, SUMMARY=30, GA_PLOTS=10, SG_PLOTS=1))) | ||
|
||
save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### combineSGP | ||
|
||
New_Jersey_SGP <- combineSGP(New_Jersey_SGP) | ||
|
||
save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata") | ||
|
||
|
||
### summarizeSGP | ||
|
||
New_Jersey_SGP <- summarizeSGP(New_Jersey_SGP, parallel.config=list(BACKEND="PARALLEL", WORKERS=list(SUMMARY=10))) | ||
|
||
save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata") | ||
|
||
### visualizeSGP | ||
|
||
visualizeSGP(New_Jersey_SGP, sgPlot.demo.report=TRUE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
New_Jersey | ||
New Jersey | ||
========== | ||
|
||
SGP source code and documentation associated with New Jersey SGP analyses | ||
SGP source code and documentation associated with New Jersey SGP analyses |