Skip to content

Commit

Permalink
First commit to repository
Browse files Browse the repository at this point in the history
  • Loading branch information
dbetebenner committed Mar 7, 2013
1 parent ff7dfe9 commit bc20322
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.Rapp.history
.DS_Store
.RData
.Rdata
/Data
128 changes: 128 additions & 0 deletions New_Jersey_Data_LONG_2011.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
############################################################################################################################
####
#### Code for preparation of New_Jersey data
####
############################################################################################################################

library("SGP")
library("doMC")
#registerDoMC(4) # Can run sequentially if you prefer/can't parallelize
#getDoParWorkers()

###
### Reading in .csv files as data.tables - pipe (|) seperated files.
###

my.files <- c("NJASK_2010_2011_Math.csv", "NJASK_2010-2011_ELA.csv")

setwd("../Data")
New_Jersey_Data <- list(Student = foreach (i = my.files, .combine = "rbind", .packages = "data.table", .inorder=FALSE) %dopar% {data.table(read.csv(i, sep="|"))})
setwd("../SGP")


###
### Re-Name the data - substitute _ for . and capitalize names. Here's a function to do it:
###

subSpecial <- function(x) {
s <- strsplit(x, split=".", fixed=TRUE)[[1]]
s <- paste(toupper(substring(s, 1,1)), toupper(substring(s, 2)), sep="", collapse="_")
s <- strsplit(s, split="__")[[1]][1]
return(s)
}


# rename:

for (j in 1:dim(New_Jersey_Data[["Student"]])[2]) names(New_Jersey_Data[["Student"]])[j] <- subSpecial(names(New_Jersey_Data$Student)[j])

sapply(New_Jersey_Data[["Student"]], class)

names(New_Jersey_Data[["Student"]])[1] <- "TESTING_YEAR"
New_Jersey_Data[["Student"]]$SCALED_SCORE <- as.numeric(New_Jersey_Data[["Student"]]$SCALED_SCORE) # For SGP function - doesn't always like integers...
New_Jersey_Data[["Student"]]$SUBJECT <- toupper(New_Jersey_Data[["Student"]]$SUBJECT) # Any field that gets 'keyed' on needs to be ALL CAPS


############################################################################################################################
### IDENTIFY VALID CASES
###
### Duplicate rows for individual students may be the only issue:
### ALL scores are in range of 100 - 300, so NO SCALED_SCORE greater than LOSS and less than HOSS
### All students in grades 3 - 8 (assume taking grade level examinations?)
###
############################################################################################################################

New_Jersey_Data[["Student"]][["VALID_CASE"]] <- factor(1, levels=1:2, labels=c("VALID_CASE", "INVALID_CASE"))

### Duplicated Records

key(New_Jersey_Data[["Student"]]) <- c("STUDENT_ID", "TESTING_YEAR", "SUBJECT")

# Inspect the dublicates first to see what's going on.
dup.ids<-New_Jersey_Data[["Student"]]$STUDENT_ID[which(duplicated(New_Jersey_Data[["Student"]]))]
dups<-New_Jersey_Data[["Student"]][New_Jersey_Data[["Student"]]$STUDENT_ID %in% dup.ids]
length(dup.ids) # only a handful, but we'll try to keep the best of the lot
dim(dups)
summary(dups)

# Invalidate lowest score for duplicates.
key(New_Jersey_Data[["Student"]]) <- c("VALID_CASE", "STUDENT_ID", "TESTING_YEAR", "SUBJECT", "SCALED_SCORE")
key(New_Jersey_Data[["Student"]]) <- c("VALID_CASE", "STUDENT_ID", "TESTING_YEAR", "SUBJECT")
New_Jersey_Data[["Student"]][["VALID_CASE"]][which(duplicated(New_Jersey_Data[["Student"]]) & New_Jersey_Data[["Student"]]$VALID_CASE=="VALID_CASE")-1] <- "INVALID_CASE"

############################################################################################################################
###
### Create Additional Variables:
###
############################################################################################################################

### Prior Performance Levels
# Make the Performance Levels an ORDERED factor
New_Jersey_Data[["Student"]]$PERFORMANCE_LEVEL <- ordered(New_Jersey_Data[["Student"]]$PERFORMANCE_LEVEL, levels=c("Partially Proficient", "Proficient", "Advanced Proficient"))

# Use data.table to select each "Valid" student record from last year and tack on the scaled score and performance level from that record onto the current year record (as *_PRIOR).
key(New_Jersey_Data[["Student"]]) <- c("STUDENT_ID", "SUBJECT", "TESTING_YEAR", "VALID_CASE")
New_Jersey_Data[["Student"]]$SCALED_SCORE_PRIOR <- New_Jersey_Data[["Student"]][SJ(STUDENT_ID, SUBJECT, TESTING_YEAR-1, "VALID_CASE"), mult="last"][,SCALED_SCORE]
New_Jersey_Data[["Student"]]$PERFORMANCE_LEVEL_PRIOR <- New_Jersey_Data[["Student"]][SJ(STUDENT_ID, SUBJECT, TESTING_YEAR-1, "VALID_CASE"), mult="last"][,PERFORMANCE_LEVEL]

### Unique School Identifier ?

summary(New_Jersey_Data[["Student"]]$DISTRICT_CODE[New_Jersey_Data[["Student"]]$SCHOOL_CODE==150]) #tried with 50, 100, 999 - defintely not a unique ID
summary(New_Jersey_Data[["Student"]]$SCHOOL_CODE) # All 3 digit numbers

New_Jersey_Data[["Student"]]$UNIQUE_SCHOOL_NUMBER <- New_Jersey_Data[["Student"]]$DISTRICT_CODE*1000 + New_Jersey_Data[["Student"]]$SCHOOL_CODE

summary(New_Jersey_Data[["Student"]]$UNIQUE_SCHOOL_NUMBER)
summary(New_Jersey_Data[["Student"]]$UNIQUE_SCHOOL_NUMBER %% 1000) # Modulo 1000 returns SCHOOL_CODE

############################################################################################################################
###
### SGP standard variable names:
###
############################################################################################################################

my.names <- c("YEAR", "Testing.Program", "CONTENT_AREA", "GRADE", "ID", "DISTRICT_NUMBER", "School.Code", "County.Name",
"DISTRICT_NAME", "SCHOOL_NAME", "DFG", "Gender", "Race.Ethnicity.Combined", "Title.I.LAL", "Title.I.Math",
"Special.Education..SE.", "General.ED", "Former.LEP", "Current.LEP", "Time.in.District.Less.Than.1.Year",
"Economically.Disadvantaged", "Migrant", "SCALE_SCORE", "ACHIEVEMENT_LEVEL", "VALID_CASE", "SCALE_SCORE_PRIOR",
"ACHIEVEMENT_LEVEL_PRIOR", "SCHOOL_NUMBER")

names(New_Jersey_Data[["Student"]]) <- my.names
New_Jersey_Data[["Student"]]$SCALE_SCORE_PRIOR <- NULL
New_Jersey_Data[["Student"]]$ID <- factor(New_Jersey_Data[["Student"]]$ID)
levels(New_Jersey_Data[["Student"]]$CONTENT_AREA) <- c("ELA", "MATHEMATICS")
New_Jersey_Data[["Student"]]$Gender[New_Jersey_Data[["Student"]]$Gender==""] <- NA
New_Jersey_Data[["Student"]]$Gender <- droplevels(New_Jersey_Data[["Student"]]$Gender)
levels(New_Jersey_Data[["Student"]]$Gender) <- c("Female", "Male")
levels(New_Jersey_Data[["Student"]]$Race.Ethnicity.Combined) <- c("Asian", "Black", "Hispanic", "Native American", "Other", "Pacific Islander", "White")
levels(New_Jersey_Data[["Student"]]$Title.I.LAL) <- c("No", "Yes")
levels(New_Jersey_Data[["Student"]]$Title.I.Math) <- c("No", "Yes")
levels(New_Jersey_Data[["Student"]]$General.ED) <- c("No", "Yes")
levels(New_Jersey_Data[["Student"]]$Economically.Disadvantaged) <- c("No", "Yes")
levels(New_Jersey_Data[["Student"]]$Migrant) <- c("No", "Yes")

# Save with original variable names in place

New_Jersey_Data_LONG_2011 <- New_Jersey_Data$Student
save(New_Jersey_Data_LONG_2011, file="../Data/New_Jersey_Data_LONG_2011.Rdata", compress=TRUE) #

104 changes: 104 additions & 0 deletions New_Jersey_Data_LONG_2012.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
################################################################################
###
### Create New Jersey Data LONG for 2012
###
################################################################################

### Load SGP Package

require(SGP)


### Load data

New_Jersey_Data_ELA <- read.csv("Data/Base_Files/NJASK_2011_2012_ELA.csv", sep="|")
New_Jersey_Data_MATH <- read.csv("Data/Base_Files/NJASK_2011_2012_MATH.csv", sep="|")


### Combine ELA and MATH

New_Jersey_Data_LONG_2012 <- rbind(New_Jersey_Data_ELA, New_Jersey_Data_MATH)


### Tidy up data

names(New_Jersey_Data_LONG_2012) <- c("YEAR", "Testing.Program", "CONTENT_AREA", "GRADE", "ID", "Student.ID..SSID.", "DISTRICT_NUMBER", "School.Code", "County.Name",
"DISTRICT_NAME", "SCHOOL_NAME", "DFG", "Gender", "Race.Ethnicity.Combined","Title.I.LAL", "Title.I.Math", "Special.Education..SE.", "General.ED", "Former.LEP",
"Current.LEP", "Time.in.District.Less.Than.1.Year", "Economically.Disadvantaged", "Migrant", "Homeless", "SCALE_SCORE", "ACHIEVEMENT_LEVEL")

New_Jersey_Data_LONG_2012$Homeless <- NULL
New_Jersey_Data_LONG_2012$Former.LEP <- NULL
New_Jersey_Data_LONG_2012$Testing.Program <- NULL

New_Jersey_Data_LONG_2012$CONTENT_AREA <- as.character(New_Jersey_Data_LONG_2012$CONTENT_AREA)
New_Jersey_Data_LONG_2012$CONTENT_AREA[New_Jersey_Data_LONG_2012$CONTENT_AREA=="Math"] <- "MATHEMATICS"

New_Jersey_Data_LONG_2012$ID <- as.character(New_Jersey_Data_LONG_2012$ID)

New_Jersey_Data_LONG_2012$Gender[New_Jersey_Data_LONG_2012$Gender==""] <- NA
New_Jersey_Data_LONG_2012$Gender <- factor(New_Jersey_Data_LONG_2012$Gender)
levels(New_Jersey_Data_LONG_2012$Gender) <- c("Female", "Male")

levels(New_Jersey_Data_LONG_2012$Race.Ethnicity.Combined) <- c("Asian", "Black", "Hispanic", "Native American", "Other", "Pacific Islander", "White")

levels(New_Jersey_Data_LONG_2012$General.ED) <- c("General Education: No", "General Education: Yes")

New_Jersey_Data_LONG_2012$Current.LEP[New_Jersey_Data_LONG_2012$Current.LEP==""] <- NA
New_Jersey_Data_LONG_2012$Current.LEP <- factor(New_Jersey_Data_LONG_2012$Current.LEP)
levels(New_Jersey_Data_LONG_2012$Current.LEP) <- c("Less than 1 Year", "1 Year", "2 Years", "3 Years", "Yes")

levels(New_Jersey_Data_LONG_2012$Time.in.District.Less.Than.1.Year) <- c("Time in District Less than 1 Year: No", "Time in District Less than 1 Year: Yes")

levels(New_Jersey_Data_LONG_2012$Economically.Disadvantaged) <- c("Economically Disadvantaged: Yes", "Economically Disadvantaged: No", "Economically Disadvantaged: Yes")
New_Jersey_Data_LONG_2012$Economically.Disadvantaged <- as.character(New_Jersey_Data_LONG_2012$Economically.Disadvantaged)
New_Jersey_Data_LONG_2012$Economically.Disadvantaged <- factor(New_Jersey_Data_LONG_2012$Economically.Disadvantaged)

levels(New_Jersey_Data_LONG_2012$Migrant) <- c("Migrant: No", "Migrant: Yes")

levels(New_Jersey_Data_LONG_2012$Title.I.LAL) <- "Title I LAL: No"
levels(New_Jersey_Data_LONG_2012$Title.I.Math) <- "Title I Math: No"

New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL[New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL==""] <- NA
New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL <- factor(New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL)
New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL <- factor(New_Jersey_Data_LONG_2012$ACHIEVEMENT_LEVEL, levels=c("Partially Proficient", "Proficient", "Advanced Proficient"), ordered=TRUE)

New_Jersey_Data_LONG_2012$ID[New_Jersey_Data_LONG_2012$ID=="NULL"] <- NA

New_Jersey_Data_LONG_2012$SCHOOL_NUMBER <- New_Jersey_Data_LONG_2012$DISTRICT_NUMBER*1000 + New_Jersey_Data_LONG_2012$School.Code

levels(New_Jersey_Data_LONG_2012$Title.I.LAL) <- "Title I LAL: No"
levels(New_Jersey_Data_LONG_2012$Title.I.Math) <- "Title I Math: No"

### Indentify Valid Cases

New_Jersey_Data_LONG_2012$VALID_CASE <- "VALID_CASE"
New_Jersey_Data_LONG_2012$VALID_CASE[is.na(New_Jersey_Data_LONG_2012$ID)] <- "INVALID_CASE"

New_Jersey_Data_LONG_2012 <- as.data.table(New_Jersey_Data_LONG_2012)

setkeyv(New_Jersey_Data_LONG_2012, c("VALID_CASE", "ID", "YEAR", "CONTENT_AREA"))

# Inspect the dublicates first to see what's going on.
dup.ids<-New_Jersey_Data_LONG_2012$ID[which(duplicated(New_Jersey_Data_LONG_2012))]
dups<-New_Jersey_Data_LONG_2012[New_Jersey_Data_LONG_2012$ID %in% dup.ids]
length(dup.ids) # only a handful, but we'll try to keep the best of the lot
dim(dups)
summary(dups)

# Invalidate lowest score for duplicates.

setkeyv(New_Jersey_Data_LONG_2012, c("VALID_CASE", "ID", "YEAR", "CONTENT_AREA", "SCALE_SCORE"))
setkeyv(New_Jersey_Data_LONG_2012, c("VALID_CASE", "ID", "YEAR", "CONTENT_AREA"))
New_Jersey_Data_LONG_2012[["VALID_CASE"]][which(duplicated(New_Jersey_Data_LONG_2012) & New_Jersey_Data_LONG_2012$VALID_CASE=="VALID_CASE")-1] <- "INVALID_CASE"


# ENROLLMENT_STATUS

New_Jersey_Data_LONG_2012$STATE_ENROLLMENT_STATUS <- factor(1, levels=0:1, labels=c("Enrolled State: Yes", "Enrolled State: No"))
New_Jersey_Data_LONG_2012$DISTRICT_ENROLLMENT_STATUS <- factor(1, levels=0:1, labels=c("Enrolled District: Yes", "Enrolled District: No"))
New_Jersey_Data_LONG_2012$SCHOOL_ENROLLMENT_STATUS <- factor(1, levels=0:1, labels=c("Enrolled School: Yes", "Enrolled School: No"))

# Save the results

save(New_Jersey_Data_LONG_2012, file="Data/New_Jersey_Data_LONG_2012.Rdata")

52 changes: 52 additions & 0 deletions New_Jersey_SGP_2011.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
####################################################################
###
### Code to update SGP analyses for New Jersey
###
####################################################################

### Load SGP Package

require(SGP)


### Load data

load("../Data/New_Jersey_Data_LONG_2011.Rdata")
load("../Data/Base_Files/New_Jersey_SGP.Rdata")


### Merge files

New_Jersey_SGP@Data <- as.data.table(rbind.fill(as.data.frame(New_Jersey_Data_LONG_2011), as.data.frame(New_Jersey_SGP@Data)))


### prepareSGP

New_Jersey_SGP <- prepareSGP(New_Jersey_SGP)

save(New_Jersey_SGP, file="../Data/New_Jersey_SGP.Rdata")

### analyzeSGP

New_Jersey_SGP <- analyzeSGP(New_Jersey_SGP,
years=2011,
simulate.sgps=FALSE)

save(New_Jersey_SGP, file="../Data/New_Jersey_SGP.Rdata")


### combineSGP

New_Jersey_SGP <- combineSGP(New_Jersey_SGP,
years=2011)

save(New_Jersey_SGP, file="../Data/New_Jersey_SGP.Rdata")


### summarizeSGP

New_Jersey_SGP <- summarizeSGP(New_Jersey_SGP)

### visualizeSGP

visualizeSGP(New_Jersey_SGP, sgPlot.demo.report=TRUE)
61 changes: 61 additions & 0 deletions New_Jersey_SGP_2012.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
####################################################################
###
### Code to update SGP analyses for New Jersey
###
####################################################################

### Load SGP Package

require(SGP)
options(error=recover)

### Load data

load("Data/New_Jersey_Data_LONG_2012.Rdata")
load("Data/Base_Files/New_Jersey_SGP.Rdata")


### Merge files

New_Jersey_SGP@Data <- as.data.table(rbind.fill(New_Jersey_Data_LONG_2012, New_Jersey_SGP@Data))


### prepareSGP

New_Jersey_SGP <- prepareSGP(New_Jersey_SGP)
save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata")


### analyzeSGP

New_Jersey_SGP <- analyzeSGP(
New_Jersey_SGP,
years=2012,
sgp.percentiles=TRUE,
sgp.projections=TRUE,
sgp.projections.lagged=TRUE,
sgp.percentiles.baseline=TRUE,
sgp.projections.baseline=TRUE,
sgp.projections.lagged.baseline=TRUE,
simulate.sgps=FALSE,
parallel.config=list(BACKEND="PARALLEL", WORKERS=list(PERCENTILES=15, BASELINE_PERCENTILES=30, PROJECTIONS=10, LAGGED_PROJECTIONS=8, SUMMARY=30, GA_PLOTS=10, SG_PLOTS=1)))

save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata")


### combineSGP

New_Jersey_SGP <- combineSGP(New_Jersey_SGP)

save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata")


### summarizeSGP

New_Jersey_SGP <- summarizeSGP(New_Jersey_SGP, parallel.config=list(BACKEND="PARALLEL", WORKERS=list(SUMMARY=10)))

save(New_Jersey_SGP, file="Data/New_Jersey_SGP.Rdata")

### visualizeSGP

visualizeSGP(New_Jersey_SGP, sgPlot.demo.report=TRUE)
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
New_Jersey
New Jersey
==========

SGP source code and documentation associated with New Jersey SGP analyses
SGP source code and documentation associated with New Jersey SGP analyses

0 comments on commit bc20322

Please sign in to comment.