-
Notifications
You must be signed in to change notification settings - Fork 1
/
Indiana_Data_LONG_2024.R
50 lines (38 loc) · 2.5 KB
/
Indiana_Data_LONG_2024.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
###########################################################################################
###
### Script for creating Indiana LONG data set for 2024
###
###########################################################################################
### Load SGP Package:
require(data.table)
### Load base data files
Indiana_Data_LONG_2024 <- fread("Data/Base_Files/ILEARN_2024_Damian_Export_062824.csv", colClasses=rep("character", 7))
Indiana_Demographics_2024 <- fread("Data/Base_Files/ILEARN_2024_demographics.csv", colClasses=rep("character", 6))
### Prepare Data
setnames(Indiana_Data_LONG_2024, c("IDOE_CORPORATION_ID", "IDOE_SCHOOL_ID", "STN", "STUDENT_ID", "GRADE_ID", "ELA_SCALE_SCORE", "MATH_SCALE_SCORE"))
Indiana_Data_LONG_2024[,"STN":=NULL]
Indiana_Data_LONG_2024 <- rbindlist(list(Indiana_Data_LONG_2024[,c(1:5), with=FALSE], Indiana_Data_LONG_2024[,c(1:4,6), with=FALSE]), use.names=FALSE)
setnames(Indiana_Data_LONG_2024, "ELA_SCALE_SCORE", "SCALE_SCORE")
Indiana_Data_LONG_2024[,CONTENT_AREA:=rep(c("ELA", "MATHEMATICS"), each=dim(Indiana_Data_LONG_2024)[1]/2)]
Indiana_Data_LONG_2024[,VALID_CASE:="VALID_CASE"]
Indiana_Data_LONG_2024[,SCHOOL_YEAR:="2024"]
Indiana_Data_LONG_2024[,SCALE_SCORE:=as.numeric(SCALE_SCORE)]
Indiana_Data_LONG_2024[,GRADE_ID:=as.character(as.numeric(GRADE_ID))]
### Prepare Indiana_Demographics_2024
setnames(Indiana_Demographics_2024, c("STUDENT_ID", "ETHNICITY", "SPECIAL_EDUCATION_STATUS", "SOCIO_ECONOMIC_STATUS", "ENGLISH_LANGUAGE_LEARNER_STATUS", "GENDER"))
Indiana_Demographics_2024[,SCHOOL_YEAR:="2024"][,VALID_CASE:="VALID_CASE"]
setkey(Indiana_Demographics_2024, VALID_CASE, SCHOOL_YEAR, STUDENT_ID)
setkey(Indiana_Data_LONG_2024, VALID_CASE, SCHOOL_YEAR, STUDENT_ID)
### Merge in demographics
Indiana_Data_LONG_2024 <- Indiana_Demographics_2024[Indiana_Data_LONG_2024]
Indiana_Data_LONG_2024[,ETHNICITY:=as.factor(ETHNICITY)]
### Tidy up column order
setcolorder(Indiana_Data_LONG_2024, c(8, 13, 7, 11, 1, 12, 2, 3, 4, 5, 6, 9, 10))
### Take highest score for duplicates
setkey(Indiana_Data_LONG_2024, VALID_CASE, SCHOOL_YEAR, CONTENT_AREA, GRADE_ID, STUDENT_ID, SCALE_SCORE)
setkey(Indiana_Data_LONG_2024, VALID_CASE, SCHOOL_YEAR, CONTENT_AREA, GRADE_ID, STUDENT_ID)
Indiana_Data_LONG_2024[which(duplicated(Indiana_Data_LONG_2024, by=key(Indiana_Data_LONG_2024)))-1, VALID_CASE:="INVALID_CASE"]
### Setkey final time
setkey(Indiana_Data_LONG_2024, VALID_CASE, SCHOOL_YEAR, CONTENT_AREA, GRADE_ID, STUDENT_ID)
### Save results
save(Indiana_Data_LONG_2024, file="Data/Indiana_Data_LONG_2024.Rdata")