-
Notifications
You must be signed in to change notification settings - Fork 3
/
pref.R
94 lines (65 loc) · 3.48 KB
/
pref.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#Prefecture level analysis
library(foreign)
edat<- read.csv("~/Desktop/edsproject/data_cleaned.csv")
dat<- read.dta("~/Desktop/edsproject/china_univ_by_municipality.dta")
#merge two data frames by year and cityid
total<- merge(edat, dat, by=c("cityid", "year"), all.x=TRUE, all.y=TRUE)
#Create subsets
pref<- total[which(total$urban==0),]
urb<-total[which(total$urban==1),]
#coplot(ncampus ~ year|provcat, typle ="l", data=pref)
#View(prefecture[,c("year","provcat")])
#generate log values of variables(rescale)
l.poptota<- log(pref$poptota)
l.fditota<- log((pref$fditota+1)/pref$gdpdef)
l.gdptota<- log(pref$gdptota/pref$gdpdef)
l.empavea<- log(pref$empavea/pref$gdpdef)
#generate quadtic terms of variables
psqua<- l.poptota*l.poptota
fsqua<- l.fditota*l.fditota
gsqua<- l.gdptota*l.gdptota
esqua<-pref$empavea*pref$empavea
pref<- cbind(pref, l.poptota, l.fditota, l.gdptota,l.empavea, psqua, fsqua,gsqua,esqua)
#look at the scatterplot for variables.
#1999
plot(pref$ncampus[pref$year==1998]~pref$poptota[pref$year==1998], xlab="population in the prefecture level", ylab = "number of campuses in the prefecture level")
plot(pref$ncampus[pref$year==2000]~pref$poptota[pref$year==2000], xlab="population in the prefecture level", ylab = "number of campuses in the prefecture level")
#not run #lines(lowess(pref$l.poptota,pref$ncampus),col="red")
plot(pref$ncampus~pref$l.gdptota, xlab="log of real GDP per capita in the prefecture level", ylab = "number of campuses in the prefecture level")
#not run #temp<-loess(pref$ncampus~pref$l.gdptota)
#not run#j<-order(pref$l.gdptota)
#not run# lines(pref$l.gdptota[j],temp$fitted[j],col="red")
plot(pref$ncampus~pref$empavea, xlab="Average wage in the prefecture level", ylab = "number of campuses in the prefecture level")
plot(pref$ncampus~pref$fditota, xlab="Foreign direct investment per capita in the prefecture level", ylab = "number of campuses in the prefecture level")
##OLS fixed effect model
library(plm)
M1<- plm(ncampus ~ l.poptota + l.fditota+l.gdptota+l.empavea+gsqua+fsqua+esqua+psqua,data = pref, index=c("cityid", "year"), effect= "individual", model="within")
print(summary(M1))
G2<- glm(ncampus ~ l.poptota+log(fditota+1)+l.gdptota+l.empavea, data=pref,family =poisson(link=log), weights = pref$phat)
summary(M2)
#####This one did not work###
#library(glmmML)
#M1<- glmmML(ncampus~l.poptota, data=prefecture, cluster=cityid)
#error: yvalues must between 0 and 1;
#using lfe package;
#library(lfe)
#MM1<- felm(ncampus ~ l.poptota + l.gdptota+ fditota, data = pref,weights = pref$phat, na.action = TRUE)
#print(summary(MM1))
#using lmer package
#library(lme4)
#MMM1<- lmer(ncampus ~ l.poptota + (l.poptota|cityid),data = pref,na.action = TRUE)
#print(summary(MMM1))
#using GLM to apply poisson;
#library(MASS)
#MMMM1<- glm(ncampus ~ l.poptota+ l.fditota+ l.gdptota + l.empavea, family=poisson(link="log"), data = pref,na.action = TRUE)
##Logit model for panel data
#library(pglm)
#M2<- pglm(dcampus ~ lpoptota+fditota+l.gdptota+empavea, data=pref, effect="individual",family= binomial('probit'), model="within")
#print(summary(M2))
#using lme4 package, treat this model as mixed structure model.
#library(dplyr)
#pref2 <- pref[!is.na(pref$dcampus) & !is.na(pref$l.poptota) & !is.na(pref$l.fditota) & !is.na(pref$l.gdptota) & !is.na(pref$l.empavea),]
#View(pref2)
M2<- glm(dcampus ~ l.poptota+log(fditota+1)+l.gdptota+l.empavea, data=pref,family = "binomial")
summary(M2)
#MM2<- nlmer(dcampus ~ l.poptota|cityid+ log(fditota+1)+l.gdptota+l.empavea, data=pref,family = binomial)