-
Notifications
You must be signed in to change notification settings - Fork 260
/
chapter_9.html
1069 lines (755 loc) · 32.5 KB
/
chapter_9.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="" xml:lang="">
<head>
<title>chapter_9.knit</title>
<meta charset="utf-8" />
<meta name="author" content="" />
<script src="libs/header-attrs-2.20/header-attrs.js"></script>
<link href="libs/remark-css-0.0.1/default.css" rel="stylesheet" />
<link href="libs/panelset-0.2.6/panelset.css" rel="stylesheet" />
<script src="libs/panelset-0.2.6/panelset.js"></script>
<script src="libs/htmlwidgets-1.5.4/htmlwidgets.js"></script>
<link href="libs/datatables-css-0.0.0/datatables-crosstalk.css" rel="stylesheet" />
<script src="libs/datatables-binding-0.27/datatables.js"></script>
<script src="libs/jquery-3.6.0/jquery-3.6.0.min.js"></script>
<link href="libs/dt-core-1.12.1/css/jquery.dataTables.min.css" rel="stylesheet" />
<link href="libs/dt-core-1.12.1/css/jquery.dataTables.extra.css" rel="stylesheet" />
<script src="libs/dt-core-1.12.1/js/jquery.dataTables.min.js"></script>
<link href="libs/crosstalk-1.2.0/css/crosstalk.min.css" rel="stylesheet" />
<script src="libs/crosstalk-1.2.0/js/crosstalk.min.js"></script>
<link rel="stylesheet" href="css/zh-CN.css" type="text/css" />
<link rel="stylesheet" href="css/Custumed_Style.css" type="text/css" />
</head>
<body>
<textarea id="source">
class: center, middle
<span style="font-size: 50px;">**第九章**</span> <br>
<span style="font-size: 50px;">回归模型(二):分层线性模型</span> <br>
<span style="font-size: 30px;">胡传鹏</span> <br>
<span style="font-size: 20px;"> </span> <br>
<span style="font-size: 30px;">2024-04-29</span> <br>
<span style="font-size: 20px;"> Made with Rmarkdown</span> <br>
---
<style type="text/css">
.bigfont {
font-size: 30px;
}
.size5{
font-size: 24px;
}
.tit_font{
font-size: 60px;
}
</style>
## 准备工作
```r
# Packages
if (!requireNamespace('pacman', quietly = TRUE)) {
install.packages('pacman')
}
pacman::p_load(
# 本节课需要用到的 packages
here, tidyverse,
# ANOVA & HLM
bruceR, lmerTest, lme4, broom, afex, interactions,
# 生成课件
xaringan, xaringanthemer, xaringanExtra, knitr)
options(scipen=99999,digits = 5)
```
---
class: inverse, middle ,center
# 一般线性模型回顾
---
# 0.1 线性模型与方差分析等价性
<br>
| | R自带函数 | 线性模型 | 解释 |
|-------|-------|-------|-------|
| 单样本*t* | t.test(y, mu = 0) | lm(y ~ 1)| 仅有截距的回归模型 |
| 独立样本*t* | t.test( `\(y_1\)`, `\(y_2\)`) | lm(y ~ 1 + `\(G_2\)`)| 自变量为二分变量的回归模型 |
| 配对样本*t* | t.test( `\(y_1\)`, `\(y_2\)`, paired=T) | lm( `\(y_1\)` - `\(y_2\)` ~ 1)| 仅有截距的回归模型)|
| 单因素ANOVA | aov(y ~ G) | lm(y ~ 1 + `\(G_1\)` + `\(G_2\)` + ...)| 一个离散自变量的回归模型 |
| 多因素ANOVA | aov(y ~ G * S) | lm(y ~ `\(G_1\)` + `\(G_2\)` + ... + `\(S_1\)` + `\(S_2\)` + ...)| 多个离散自变量的回归模型 |
<br>
<br>
---
# 0.2 虚拟编码
.panelset[
.panel[.panel-name[预处理]
```r
df.penguin <- bruceR::import(here::here('data', 'penguin', 'penguin_rawdata.csv')) %>%
dplyr::mutate(subjID = row_number()) %>%
dplyr::select(subjID,Temperature_t1, Temperature_t2, socialdiversity,
Site, DEQ, romantic, ALEX1:ALEX16) %>%
dplyr::filter(!is.na(Temperature_t1) & !is.na(Temperature_t2) & !is.na(DEQ)) %>%
dplyr::mutate(romantic = factor(romantic, levels = c(1,2),
labels = c("恋爱", "单身")), # 转化为因子
Temperature = rowMeans(select(., starts_with("Temperature"))))
# 设定相应的标签
breaks <- c(0, 35, 50, 66.5)
labels <- c('热带', '温带', '寒温带')
# 创建新的变量
df.penguin$climate <- cut(df.penguin$DEQ,
breaks = breaks,
labels = labels)
```
.panel[.panel-name[虚拟编码]
```r
# 比较不同气候条件下个体的体温是否存在差异:
## 虚拟编码
contrasts(df.penguin$climate) <- stats::contr.treatment(unique(df.penguin$climate))
### contr.treatment本质上创建了一个矩阵
### 由于3个分组,所以矩阵为2列
## 建立回归模型
lm_temp <- stats::lm(Temperature ~ climate,data = df.penguin)
```
.panel[.panel-name[结果]
.pull-left[
```r
## 输出回归系数
lm_temp %>%
tidy() %>%
select(1:3) %>%
mutate(across(where(is.numeric),
~round(., 3)))
```
```
## # A tibble: 3 × 3
## term estimate std.error
## <chr> <dbl> <dbl>
## 1 (Intercept) 36.6 0.022
## 2 climate寒温带 -0.178 0.028
## 3 climate温带 -0.299 0.03
```
]
.pull-right[
```r
## 可以看到回归的结果以热带为基准,系数则为均值之差
df.penguin %>%
group_by(climate) %>%
summarise(mean = mean(Temperature)) %>%
as.data.frame()
```
```
## climate mean
## 1 热带 36.555
## 2 温带 36.377
## 3 寒温带 36.255
```
]
]]]]
.footnote[
----------
虚拟编码方式很多,可参考[这里](https://stats.oarc.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/#ORTHOGONAL)
]
---
class: center, middle, inverse
<span style="font-size: 55px;">重复测量方差分析与回归模型 </span>
---
# 1 重复测量方差分析
## 1.1 数据与问题介绍
.size5[
以match_raw.csv为例,一个2x2的被试内实验设计(Identity: Self vs. Other) x Valence: Moral vs. Immoral)),我们希望知道这两种条件之下被试的反应时是否存在显著差异]
.panelset[
.panel[.panel-name[预处理]
```r
mt_raw <- bruceR::import(here::here('data','match','match_raw.csv'))
mt_raw <- mt_raw %>%
tidyr::extract(Shape,
into = c("Valence", "Identity"),
regex = "(moral|immoral)(Self|Other)",
remove = FALSE)
```
.panel[.panel-name[数据展示]
<div id="htmlwidget-d783bfb79768b27a6bab" style="width:100%;height:auto;" class="datatables html-widget"></div>
<script type="application/json" data-for="htmlwidget-d783bfb79768b27a6bab">{"x":{"filter":"none","vertical":false,"data":[["1","2","3","4","5"],[7302,7302,7302,7302,7302],[22,22,22,22,22],["female","female","female","female","female"],["R","R","R","R","R"],[1,1,1,1,1],[1,1,1,1,1],[1,2,3,4,5],["immoralSelf","moralOther","immoralOther","moralSelf","immoralSelf"],["immoral","moral","immoral","moral","immoral"],["Self","Other","Other","Self","Self"]],"container":"<table class=\"display\">\n <thead>\n <tr>\n <th> <\/th>\n <th>Sub<\/th>\n <th>Age<\/th>\n <th>Sex<\/th>\n <th>Hand<\/th>\n <th>Block<\/th>\n <th>Bin<\/th>\n <th>Trial<\/th>\n <th>Shape<\/th>\n <th>Valence<\/th>\n <th>Identity<\/th>\n <\/tr>\n <\/thead>\n<\/table>","options":{"columnDefs":[{"className":"dt-right","targets":[1,2,5,6,7]},{"orderable":false,"targets":0}],"order":[],"autoWidth":false,"orderClasses":false}},"evals":[],"jsHooks":[]}</script>
]]]
---
## 1.2 数据结构
<img src="picture/chp9/data.png" width="70%" style="display: block; margin: auto;" />
<br>
.size5[
- 将总变异分解为组间与组内
- 组内变异是主要关注对象,组间变异则要被控制
]
---
## 1.3 重复测量方差分析的实现
.panelset[
.panel[.panel-name[ANOVA-bruceR]
```r
mt_mean <- mt_raw %>%
dplyr::filter(!is.na(RT) & Match == "match" & ACC == 1) %>%
dplyr::group_by(Sub,Identity,Valence) %>%
dplyr::summarise(RT = mean(RT)) %>%
dplyr::ungroup()
## 本例为长数据
## RUN IN CONSOLE!
## 球形检验输出:
bruceR::MANOVA(data = mt_mean,
subID = 'Sub', # 被试编号
dv= 'RT', # dependent variable
within = c('Identity', 'Valence')) %>% capture.output() %>% .[c(33:37)]
```
```
## [1] "Levene’s Test for Homogeneity of Variance:"
## [2] "No between-subjects factors. No need to do the Levene’s test."
## [3] ""
## [4] "Mauchly’s Test of Sphericity:"
## [5] "The repeated measures have only two levels. The assumption of sphericity is always met."
```
.panel[.panel-name[ANOVA-bruceR(输出)]
```r
bruceR::MANOVA(data = mt_mean,
subID = 'Sub',
dv = 'RT',
within = c('Identity', 'Valence')) %>%
capture.output() %>% .[c(15:31)]
```
```
## [1] "ANOVA Table:"
## [2] "Dependent variable(s): RT"
## [3] "Between-subjects factor(s): –"
## [4] "Within-subjects factor(s): Identity, Valence"
## [5] "Covariate(s): –"
## [6] "─────────────────────────────────────────────────────────────────────────────────"
## [7] " MS MSE df1 df2 F p η²p [90% CI of η²p] η²G"
## [8] "─────────────────────────────────────────────────────────────────────────────────"
## [9] "Identity 0.008 0.003 1 43 2.554 .117 .056 [.000, .198] .009"
## [10] "Valence 0.128 0.002 1 43 64.374 <.001 *** .600 [.440, .706] .131"
## [11] "Identity * Valence 0.033 0.002 1 43 14.364 <.001 *** .250 [.085, .417] .037"
## [12] "─────────────────────────────────────────────────────────────────────────────────"
## [13] "MSE = mean square error (the residual variance of the linear model)"
## [14] "η²p = partial eta-squared = SS / (SS + SSE) = F * df1 / (F * df1 + df2)"
## [15] "ω²p = partial omega-squared = (F - 1) * df1 / (F * df1 + df2 + 1)"
## [16] "η²G = generalized eta-squared (see Olejnik & Algina, 2003)"
## [17] "Cohen’s f² = η²p / (1 - η²p)"
```
.panel[.panel-name[ANOVA-afex]
```r
## bruceR::MANOVA 是对afex的封装
m_aov <- afex::aov_ez(
data = mt_mean,
id = 'Sub',
dv = 'RT',
within = c('Identity', 'Valence'))
m_aov
```
```
## Anova Table (Type 3 tests)
##
## Response: RT
## Effect df MSE F ges p.value
## 1 Identity 1, 43 0.00 2.55 .009 .117
## 2 Valence 1, 43 0.00 64.37 *** .131 <.001
## 3 Identity:Valence 1, 43 0.00 14.36 *** .037 <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
```
]]]]
---
# 1.4 重复测量方差分析的不足
.bigfont[
重复测量方差分析有没有局限性?
]
--
<br>
.bigfont[
- 个体间差异同样无法估计;
]
.bigfont[
- 处理缺失值只能将整行观测删除,会导致标准误增加、功效降低;
]
.bigfont[
- 对因变量(连续)和自变量(分类)的类型有要求;
]
.bigfont[
- 对每一个试次中数据利用率低,造成试次的浪费
]
<br>
---
# 1.4 重复测量方差分析的不足
.bigfont[
重复测量方差分析有没有局限性?
]
<img src="picture/chp9/Neuron_LMM.png" width="45%" style="display: block; margin: auto;" />
.bigfont[
*因此,现在越来越多的期刊推荐使用多层线性回归(Hierarchical Linear Model) ,如[Neuron](https://www.sciencedirect.com/science/article/pii/S089662732100845X)。*
]
---
# 2 多层线性模型(HLM)简介
.size5[
分层线性模型/多层线性模型(HLM): 用于处理"多层嵌套数据",在一个以上层次上变化参数的线性模型。但多层线性模型的名字非常多,不同学科称呼不同,有许多“近义词”:]
- 层级/分层模型(Hierarchical Model,HM)
- 多水平模型(Multilevel Model,MLM)
- 线性混合模型(Linear Mixed Model)
- 混合效应模型(Mixed Effects Model)
- 随机效应模型(Random Effects Model)
- 随机系数模型(Random Coefficients Model).....
.size5[
但在注意与多元回归(multiple regression)进行区分,即逐步引入自变量到回归模型中,以检验每个自变量对因变量的影响是否独立于其他自变量
]
---
layout: true
# 2.1 多层线性模型(HLM)的关键概念:固定效应与随机效应
---
.size5[
在回归模型中一般会在截距和斜率上分别讨论**固定效应**和**随机效应**。
例如,关于研究教师的工龄(Experience)与薪水(Salary)之间是否存在关系。在某校随机抽取了5个学院的教师信息,具体数据如下:]
<br>
<div id="htmlwidget-1ef060a1855194719671" style="width:100%;height:auto;" class="datatables html-widget"></div>
<script type="application/json" data-for="htmlwidget-1ef060a1855194719671">{"x":{"filter":"none","vertical":false,"data":[["1","2","3","4","5","6"],[1,2,3,4,5,6],["sociology","biology","english","informatics","statistics","sociology"],[39628.505,54578.738,57113.633,75941.271,87351.301,39802.471],[3,5,0,8,7,1],[1819.75,488.662,518.375,1770.109,471.033,2112.536],[45087.753,57022.049,57113.633,90102.143,90648.531,41915.007]],"container":"<table class=\"display\">\n <thead>\n <tr>\n <th> <\/th>\n <th>ids<\/th>\n <th>department<\/th>\n <th>bases<\/th>\n <th>experience<\/th>\n <th>raises<\/th>\n <th>salary<\/th>\n <\/tr>\n <\/thead>\n<\/table>","options":{"columnDefs":[{"className":"dt-right","targets":[1,3,4,5,6]},{"orderable":false,"targets":0}],"order":[],"autoWidth":false,"orderClasses":false}},"evals":[],"jsHooks":[]}</script>
.footnote[
----------
数据来源见[这里](https://github.com/mkfreeman/hierarchical-models/blob/master/generate-data.R)
]
---
layout: true
## 2.2 固定效应与随机效应
---
.size5[问题:是否可用工龄预测某个学校员工的工资?]
.panelset[
.panel[.panel-name[数据结构]
<img src="picture/chp9/nest.png" width="80%" style="display: block; margin: auto;" />
.panel[.panel-name[OLS]
<img src="chapter_9_files/figure-html/unnamed-chunk-16-1.png" width="540" style="display: block; margin: auto;" />
]]]
---
.size[
似乎可以。但有两个问题:
- 不同学院的底薪有可能存在差异(存在/不存在)
- 不同学院间间,工资与工龄的关系存在差异(存在/不存在)
这意味有可能会出现四种情况
]
--
<br>
.size[
对应在图中,则会在截距与斜率之间出现差异:
1. 不同学院的底薪相同,工资涨幅也相同;(固定截距,固定斜率)
2. 不同学院间底薪不同,但工资涨幅相同;(随机截距,固定斜率)
3. 不同学院间底薪相同,但工资涨幅不同;(固定截距,随机斜率)
4. 不同学院间底薪和工资涨幅都不相同。(随机截距,随机斜率)
画图看看.
]
---
.panelset[
.panel[.panel-name[fixI-fixS]
<img src="chapter_9_files/figure-html/unnamed-chunk-17-1.png" width="540" style="display: block; margin: auto;" />
.panel[.panel-name[ranI-fixS]
<img src="chapter_9_files/figure-html/unnamed-chunk-18-1.png" width="540" style="display: block; margin: auto;" />
.panel[.panel-name[fixI-ranS]
<img src="chapter_9_files/figure-html/unnamed-chunk-19-1.png" width="540" style="display: block; margin: auto;" />
.panel[.panel-name[ranI-ranS]
<img src="chapter_9_files/figure-html/unnamed-chunk-20-1.png" width="540" style="display: block; margin: auto;" />
]]]]]
---
layout: false
## 2.3 两种数据结构的比较
.size5[
无论在我们的数据中,还是刚才的数据中,其实都出现了层级或嵌套关系;只是对于match数据,每个变量都嵌套在一个被试中,而每个被试都可视为一条回归线。
]
.panelset[
.panel[.panel-name[两种效应]
.size5[
- 固定效应代表了实验中稳定的、总体水平上的效应,即它们在不同个体、群体或条件之间的影响是一致的,如match数据中Identity和Valence的效应
- 随机效应则表示了数据中的随机变异或个体间的差异的程度,以及这种变异程度如何随着特定分组因素的变化而变化。
]
.panel[.panel-name[match_data]
<img src="picture/chp9/data.png" width="70%" style="display: block; margin: auto;" />
.panel[.panel-name[shcool]
<img src="picture/chp9/nest.png" width="80%" style="display: block; margin: auto;" />
]]]]
---
.size5[
对于match数据,类比与学校员工薪水数据,我们也可以设想:
- 不同被试总体上会不会存在反应时上的差异:有些个体普遍反应速度更快,而有些反应速度普遍更慢(随机截距)
- 在自我条件下,两种Valence的差异是否完全相同?还是会有个体差异?(随机斜率)
画图尝试一下,计算被试平均反应时后进行排序,选取首尾的几名被试:
]
<img src="chapter_9_files/figure-html/unnamed-chunk-23-1.png" width="540" style="display: block; margin: auto;" />
---
# 3 多层线性模型的应用
## 3.1 基本形式
.size5[
我们使用lme4包对多层线性模型建模,具体语句形式如下:
]
<br>
```r
fit <- lme4::lmer(
data = ,
* formula = DV ~ Fixed_Factor + (Random_intercept + Random_Slope | Random_Factor))
```
<br>
--
.size5[
注:
- **但在建立模型之前,需要考虑好在我们的数据中,随机效应和固定效应分别是什么**。一般都会添加随机截距,而随机斜率的加入应当考虑是否有充足理由;
- 另外,由于随机效应是从某些总体中抽样的离散单位,因而本质上是分类变量
]
---
## 3.2 建立模型
.panelset[
.panel[.panel-name[ranI]
```r
## 随机截距 固定斜率
model <- lme4::lmer(data = mt_raw,
RT ~ Identity * Valence + (1|Sub))
```
.size5[
- Identity\*Valence: \*表示两变量间所有可能的形式,等同与Identity + Valence + Identity:Valence
- (1|Sub): 1表示随机截距(0则表示固定截距); 管道(|)右侧Sub为随机因子
]
.panel[.panel-name[ranI ranS]
```r
## 随机截距 随机斜率
model_full <- lme4::lmer(data = mt_raw,
RT ~ Identity * Valence + (1 + Identity * Valence|Sub))
```
.size5[
- Identity \* Valence: \*表示两变量间所有可能的形式,等同与Identity + Valence + Identity:Valence
- (1|Sub): 1表示随机截距(0则表示固定截距); 管道(|)右侧Sub为随机因子
]
.panel[.panel-name[模型比较]
```r
## 模型比较
stats::anova(model, model_full) %>% capture.output()
```
```
## [1] "Data: mt_raw"
## [2] "Models:"
## [3] "model: RT ~ Identity * Valence + (1 | Sub)"
## [4] "model_full: RT ~ Identity * Valence + (1 + Identity * Valence | Sub)"
## [5] " npar AIC BIC logLik deviance Chisq Df Pr(>Chisq) "
## [6] "model 6 -29173 -29124 14592 -29185 "
## [7] "model_full 15 -30024 -29902 15027 -30054 869 9 <0.0000000000000002 ***"
## [8] "---"
## [9] "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"
```
<br>
<br>
<br>
------
注:模型比较的指标、计算方法及其优劣请参考[《认知建模中模型比较的方法》](https://chinaxiv.org/abs/202308.00658)
]]]]
---
## 3.3 似然比检验(Likelihood-ratio tests)
.panelset[
.panel[.panel-name[问题]
.size5[
在建立模型后,我们希望知道固定效应大小是否显著,但由于多层线性模型中对于自由度的估计有多种方法,比较复杂,所以lme4::lmer()中没有提供显著性。
]
.panel[.panel-name[anova]
```r
# 建立没有固定效应的“空模型”
model_null <- lme4::lmer(data = mt_raw,
RT ~ (1 + Identity*Valence|Sub))
## 根据似然比进行模型比较
stats::anova(model_full, model_null)
```
```
## Data: mt_raw
## Models:
## model_null: RT ~ (1 + Identity * Valence | Sub)
## model_full: RT ~ Identity * Valence + (1 + Identity * Valence | Sub)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model_null 12 -29992 -29894 15008 -30016
## model_full 15 -30024 -29902 15027 -30054 38.1 3 0.000000026 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
```
.panel[.panel-name[mixed]
在模型非常复杂时(多层嵌套),如果仅仅只想对固定效应进行检验,可以使用afex::mixed(),设置method 参数为 LRT(Likelihood-ratio tests)
```r
afex::mixed(data = mt_raw,
RT ~ Identity * Valence + (1 + Identity*Valence|Sub),
* method = 'LRT')
```
```
## Mixed Model Anova Table (Type 3 tests, LRT-method)
##
## Model: RT ~ Identity * Valence + (1 + Identity * Valence | Sub)
## Data: mt_raw
## Df full model: 15
## Effect df Chisq p.value
## 1 Identity 1 1.73 .188
## 2 Valence 1 30.27 *** <.001
## 3 Identity:Valence 1 12.03 *** <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
```
.panel[.panel-name[lmerTest]
lmerTest是一个与lme4包兼容的包,主要用于对混合效应模型进行假设检验;其中也包含了lmerTest::lmer()函数,与lme4::lmer()不同的是,其结果报告了显著性(使用Satterthwaite分母自由)
```r
lmer_model <- lmerTest::lmer(data = mt_raw,
RT ~ Identity * Valence + (1 + Identity * Valence|Sub))
# summary(lmer_model)
# 如果使用lmerTest包进行建模,可以使用bruceR::HLM_summary()进行输出
## RUN IN CONSOLE
# HLM_summary(lmer_model)
```
]]]]]
---
## 3.4 模型解读
.panelset[
.panel[.panel-name[随机效应]
注意:
- 相关矩阵会体现自变量效应在个体上的差异,尤其是第一列(截距与斜率的相关),而具体的解释也应考虑对应固定效应系数本身的正负;
- 有可能会提供天花板与地板效应的相关信息,如任务过于简单,数据变化较小,有可能出现截距与斜率为负相关。
```
## [1] "Random effects:"
## [2] " Groups Name Variance Std.Dev. Corr "
## [3] " Sub (Intercept) 0.00425 0.0652 "
## [4] " IdentitySelf 0.00147 0.0383 -0.24 "
## [5] " Valencemoral 0.00219 0.0468 -0.22 0.49 "
## [6] " IdentitySelf:Valencemoral 0.00510 0.0714 -0.02 -0.53 -0.80"
## [7] " Residual 0.01801 0.1342 "
## [8] "Number of obs: 25920, groups: Sub, 44"
```
.panel[.panel-name[固定效应]
```
## [1] "Fixed effects:"
## [2] " Estimate Std. Error t value"
## [3] "(Intercept) 0.72328 0.00997 72.51"
## [4] "IdentitySelf 0.01329 0.00624 2.13"
## [5] "Valencemoral -0.00913 0.00744 -1.23"
## [6] "IdentitySelf:Valencemoral -0.04150 0.01128 -3.68"
```
.panel[.panel-name[交互效应的可视化]
```r
## 一种快捷的方法
interactions::cat_plot(model = model_full,
pred = Identity,
modx = Valence)
```
<img src="chapter_9_files/figure-html/unnamed-chunk-33-1.png" width="432" style="display: block; margin: auto;" />
]]]]
---
## 3.5 模型等价性
.size5[
如果我们观察一下使用lme4::lmer()的结果与重复测量结果,可以发现存在差异:
]
.panelset[
.panel[.panel-name[anova]
```r
m_aov
```
```
## Anova Table (Type 3 tests)
##
## Response: RT
## Effect df MSE F ges p.value
## 1 Identity 1, 43 0.00 2.55 .009 .117
## 2 Valence 1, 43 0.00 64.37 *** .131 <.001
## 3 Identity:Valence 1, 43 0.00 14.36 *** .037 <.001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
```
.panel[.panel-name[lme4]
```r
## 使用lme4建立的模型
model_full %>% anova()
```
```
## Analysis of Variance Table
## npar Sum Sq Mean Sq F value
## Identity 1 0.013 0.013 0.74
## Valence 1 0.811 0.811 45.00
## Identity:Valence 1 0.244 0.244 13.54
```
.panel[.panel-name[lmerTest]
```r
## 使用lmerTest建立的模型
lmer_model %>% anova()
```
```
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Identity 0.031 0.031 1 43.0 1.73 0.19578
## Valence 0.765 0.765 1 43.3 42.47 0.000000063 ***
## Identity:Valence 0.244 0.244 1 43.0 13.54 0.00065 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
```
]]]]
---
```r
# http://www.dwoll.de/rexrepos/posts/anovaMixed.html#two-way-repeated-measures-anova-rbf-pq-design
model_aov <- mt_mean %>%
# dplyr::filter(!is.na(RT) & Match == "match" & ACC == 1) %>%
lmerTest::lmer(
data = .,
RT ~ Identity * Valence + (1|Sub) + (1|Identity:Sub) + (1|Valence:Sub)
)
model_aov %>% anova()
```
```
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Identity 0.0055 0.0055 1 43 2.55 0.11735
## Valence 0.1282 0.1282 1 86 59.89 0.000000000018 ***
## Identity:Valence 0.0329 0.0329 1 86 15.36 0.00018 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
```
---
# 4 HLM的应用
.size5[
HLM 非常复杂,本节课仅以重复测量为例,下面提供了一些其他的应用场景:
]
.panelset[
.panel[.panel-name[嵌套数据]
<img src="picture/chp9/nestdesign.jpg" width="700" style="display: block; margin: auto;" />
.panel[.panel-name[元分析1]
通过综合一组研究来评估效应量的大小。由于不同研究的结果之间往往涉及到不同质的群体或不同的实验条件。
<img src="picture/chp9/meta1.jpg" width="85%" style="display: block; margin: auto;" />
.panel[.panel-name[元分析2]
<img src="picture/chp9/meta2.jpg" width="60%" style="display: block; margin: auto;" />
]]]]
---
# 5 可能遇到的问题
.size5[
- 模型不收敛:[混合线性模型的实现](https://zhuanlan.zhihu.com/p/63092231)
- 自由度问题:[多层线性模型(HLM)及其自由度问题](https://zhuanlan.zhihu.com/p/50048784)
- [统计学中的固定效应 vs. 随机效应](https://zhuanlan.zhihu.com/p/60528092)
- [「固定效应、主效应、简单效应」概念辨析](https://zhuanlan.zhihu.com/p/513227882)
]
---
class: center, middle
.tit_font[
思考
]
<br>
<span style="font-size: 50px;">当因变量不服从正态分布(如ACC)时如何处理?</span> <br>
</textarea>
<style data-target="print-only">@media screen {.remark-slide-container{display:block;}.remark-slide-scaler{box-shadow:none;}}</style>
<script src="https://remarkjs.com/downloads/remark-latest.min.js"></script>
<script>var slideshow = remark.create({
"highlightLines": true,
"highlightStyle": "github",
"countIncrementalSlides": false,
"seal": true,
"ratio": "16:9"
});
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
window.dispatchEvent(new Event('resize'));
});
(function(d) {
var s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
if (!r) return;
s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
d.head.appendChild(s);
})(document);
(function(d) {
var el = d.getElementsByClassName("remark-slides-area");
if (!el) return;
var slide, slides = slideshow.getSlides(), els = el[0].children;
for (var i = 1; i < slides.length; i++) {
slide = slides[i];
if (slide.properties.continued === "true" || slide.properties.count === "false") {
els[i - 1].className += ' has-continuation';
}
}
var s = d.createElement("style");
s.type = "text/css"; s.innerHTML = "@media print { .has-continuation { display: none; } }";
d.head.appendChild(s);
})(document);
// delete the temporary CSS (for displaying all slides initially) when the user
// starts to view slides
(function() {
var deleted = false;
slideshow.on('beforeShowSlide', function(slide) {
if (deleted) return;
var sheets = document.styleSheets, node;
for (var i = 0; i < sheets.length; i++) {
node = sheets[i].ownerNode;
if (node.dataset["target"] !== "print-only") continue;
node.parentNode.removeChild(node);
}
deleted = true;
});
})();
// add `data-at-shortcutkeys` attribute to <body> to resolve conflicts with JAWS
// screen reader (see PR #262)
(function(d) {
let res = {};
d.querySelectorAll('.remark-help-content table tr').forEach(tr => {
const t = tr.querySelector('td:nth-child(2)').innerText;
tr.querySelectorAll('td:first-child .key').forEach(key => {
const k = key.innerText;
if (/^[a-z]$/.test(k)) res[k] = t; // must be a single letter (key)
});
});
d.body.setAttribute('data-at-shortcutkeys', JSON.stringify(res));
})(document);
(function() {
"use strict"
// Replace <script> tags in slides area to make them executable
var scripts = document.querySelectorAll(
'.remark-slides-area .remark-slide-container script'
);
if (!scripts.length) return;