-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.Rmd
1184 lines (968 loc) · 76.2 KB
/
test.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: Study on association of air polluants with epidemic trend of hemorrhagic fever
with renal syndrome in Zhejiang province
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
html_document:
df_print: paged
word_document: default
pdf_document: default
always_allow_html: true
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE,
message = F,
warning = F,
ft.keepnext = T,
results = 'asis',
ft.split = F)
# PACKAGES
library(dplyr)
library(haven)
library(vroom)
library(tidyr)
library(gtsummary)
library(tidyverse)
library(ggplot2)
library(ggsci)
library(officer)
library(flextable)
library(openxlsx)
library(readr)
library(readxl)
library(sf)
library(ggthemes)
library(survminer)
library(trend)
library(spdep)
library(knitr)
hemorrhagic_fever <- read_excel("hemorrhagic_fever.xlsx")
airdata <- read_excel("airdata.xlsx")
dat <- hemorrhagic_fever %>%
mutate(Age = as.numeric(Age),
Age.cat = case_when(Age <20 ~ "<20",
Age >= 20 & Age <= 29 ~ "20-29",
Age >= 30 & Age <= 39 ~ "30-39",
Age >= 40 & Age <= 49 ~ "40-49",
Age >= 50 & Age <= 59 ~ "50-59",
Age >= 60 & Age <= 69 ~ "60-69",
Age >= 70 & Age <= 79 ~ "70-79",
Age >= 80 ~ ">80"),
Diagonse_Date.new = str_sub(as.character(Diagonse_Date), 1, 10),
Diagnose.year = str_sub(Diagonse_Date.new, 1, 4),
Diagnose.year = as.character(Diagnose.year),
Diagonse_Date.new = as.Date(Diagonse_Date.new),
Diagonse.month = str_sub(Diagonse_Date.new, 6, 7),
Diagonse.month = as.numeric(Diagonse.month),
Diagonse.week = week(Diagonse_Date.new),
Diagonse.week = as.numeric(Diagonse.week),
Illness_Date.new = as.Date(Illness_Date),
Illness.year = str_sub(Illness_Date.new, 1, 4),
Illness.month = str_sub(Illness_Date.new, 6, 7),
Illness.month = as.character(Illness.month),
Illness.month = as.numeric(Illness.month),
Illness.week = week(Illness_Date.new),
Illness.week = as.character(Illness.week),
difftime = as.numeric(Diagonse_Date.new - Illness_Date.new),
difftime.year = as.numeric(difftime/365.25),
difftime.day_7 = ifelse(difftime>7, 1, 0),
difftime.day_14 = ifelse(difftime>14, 1, 0),
difftime.day_30 = ifelse(difftime>30, 1, 0),
Illness.season = case_when( Illness.month %in% c(2:4) ~ "Spring",
Illness.month %in% c(5:7) ~ "Summer",
Illness.month %in% c(8:10) ~ "Autumn",
Illness.month %in% c(1,11:12) ~ "Winter"
),
#difftime.month_6 = ifelse(difftime.year>(6/12), 1, 0),
#difftime.month_9 = ifelse(difftime.year>(9/12), 1, 0),
#difftime.month_12 = ifelse(difftime.year>1, 1, 0),
city = `Prefecture-level City` ,
Diagonse.week.f = factor(Diagonse.week, levels = c(1:53)),
Illness.week.f = factor(Illness.week, levels = c(1:53)),
Age.cat = fct_relevel(Age.cat, "<20", "20-29", "30-39", "40-49",
"50-59", "60-69", "70-79", ">80"),
occupation = ifelse(!Classifications %in% c("Worker" , "Unemployment", "Farmer"), "Others", Classifications),
Death = ifelse(Death_Date == ".", 0, 1),
log.difftime = log(difftime),
year = as.numeric(Illness.year),
month = as.numeric(Illness.month),
city = `Prefecture-level City`)
dat$Illness.season <- factor(dat$Illness.season)
dat$Illness.season <- fct_relevel(dat$Illness.season, "Spring", "Summer", "Autumn", "Winter" )
names(airdata)[c(1,3)] <- c("city", "time")
airdata <- airdata %>%
mutate(year = substr(time, 1, 4),
month = substr(time, 6, 7),
city = case_when(#city == "临安" ~ ,
city == "丽水" ~ "Lishui",
#city == "义乌" ~ ,
city == "台州" ~ "Taizhou",
city == "嘉兴" ~ "Jiaxing",
city == "宁波" ~ "Ningbo",
#city == "富阳" ~ ,
city == "杭州" ~ "Hangzhou",
city == "温州" ~ "Wenzhou",
city == "湖州" ~ "Huzhou",
city == "绍兴" ~ "Shaoxing",
city == "舟山" ~ "Zhoushan",
city == "衢州" ~ "Quzhou",
#city == "诸暨" ~ ,
city == "金华" ~ "Jinhua"),
aqi = as.numeric(aqi),
pm2_5 = as.numeric(pm2_5),
pm10 = as.numeric(pm10),
so2 = as.numeric(so2),
no2 = as.numeric(no2),
co = as.numeric(co),
o3 = as.numeric(o3),
month = as.numeric(month),
year = as.numeric(year),
season = case_when(month %in% c(3:5) ~ "Spring",
month %in% c(6:8) ~ "Summer",
month %in% c(9:11) ~ "Autumn",
month %in% c(12,1:2) ~ "Winter"))
final.data <- dat %>%
left_join(airdata, by = c("year", "month", "city")) %>%
filter(year != 2004)
temp <- read.xlsx("temp.xlsx")
names(temp) <- c("city", "x", "year", "month", "temperature", "Humidity")
str(temp)
temp.x <- temp %>%
mutate(year = as.numeric(year),
month = as.numeric(month),
temperature = as.numeric(temperature),
Humidity = as.numeric(Humidity),
city = case_when(#city == "临安" ~ ,
city == "丽水" ~ "Lishui",
#city == "义乌" ~ ,
city == "台州" ~ "Taizhou",
city == "嘉兴" ~ "Jiaxing",
city == "宁波" ~ "Ningbo",
#city == "富阳" ~ ,
city == "杭州" ~ "Hangzhou",
city == "温州" ~ "Wenzhou",
city == "湖州" ~ "Huzhou",
city == "绍兴" ~ "Shaoxing",
city == "舟山" ~ "Zhoushan",
city == "衢州" ~ "Quzhou",
#city == "诸暨" ~ ,
city == "金华" ~ "Jinhua"),
x = case_when(#city == "临安" ~ ,
x == "丽水" ~ "Lishui",
#city == "义乌" ~ ,
x == "台州" ~ "Taizhou",
x == "嘉兴" ~ "Jiaxing",
x == "宁波" ~ "Ningbo",
#city == "富阳" ~ ,
x == "杭州" ~ "Hangzhou",
x == "温州" ~ "Wenzhou",
x == "湖州" ~ "Huzhou",
x == "绍兴" ~ "Shaoxing",
x == "舟山" ~ "Zhoushan",
x == "衢州" ~ "Quzhou",
#city == "诸暨" ~ ,
x == "金华" ~ "Jinhua"))
temp.x <- temp.x %>% filter(year >= 2005 & year <= 2020)
temp.x$month <- rep(1:12, 368)
temp.x.agg <- temp.x %>% group_by(city, year, month) %>%
summarise( Temperature = mean(temperature, na.rm = T),
Humidity = mean(Humidity, na.rm = T)) %>%
ungroup()
final.data.x <- final.data %>%
left_join(temp.x.agg, by = c("year", "month", "city"))
# 下载中国区县级行政地图
# raster::getData('ISO3') 各个国家或地区的 ISO3 代码
china_map <- raster::getData(
name = "GADM",
country = "CHN", # 中国的 ISO3 代码
level = 2, # 国家=0 省=1 市=2 县=3
type = "sf", # 返回数据类型为 sf 类型
path = "mapdata/" # 保存到本地目录,以便复用
)
zj_map <- china_map[china_map$NAME_1 == "Zhejiang", ]
airdata$season <- fct_relevel(airdata$season, "Spring","Summer","Autumn","Winter")
```
# 1. Abstract
A total of 7,724 hemorrhagic fever with renal syndrome (HFRS) cases were reported in Zhejiang Province from 2005 to 2020, resulting in 25 deaths. There were two incidence peaks each year, one in late spring and early summer (May-June) and another in winter (November-January). The top three areas with the highest cumulative cases were Ningbo (1,875, 24.27%), Taizhou (1,642, 21.25%), and Shaoxing (1,123, 14.54%). Among the reported cases, the male-to-female ratio was 2.73∶1 (5,656∶2,068). The majority of HFRS cases affected middle-aged and elderly individuals, with cases aged 41-70 years accounting for 60.95%. Most HFRS cases were farmers, making up 69.89% (5,398 out of 7,724). The spatial distribution of HFRS cases was correlated in most years.
Hemorrhagic Fever with Renal Syndrome (HFRS) has experienced a resurgence in China since 1963, with environmental factors being identified as potential contributors. In our analysis, data encompassing the years 2013 to 2020 were scrutinized, focusing on six air pollutants and meteorological variables, including temperature and humidity. Notably, a low to moderate correlation was observed between HFRS incidence and monthly Air Quality Index (AQI) (r = 0.27), Nitrogen Dioxide (NO2) (r = 0.42), and Sulfur Dioxide (SO2) (r = 0.25). Additionally, a negative correlation was noted with temperature (r = -0.22). Spatial autocorrelation was further assessed using the Moran test, revealing patterns in the geographical distribution of HFRS cases. It is essential to emphasize that while correlations were identified, a direct causative relationship has not been established through our analysis.
# 2. Introduction
HFRS is found throughout the world, which is a rodent-borne illness caused by hantaviruses including Hantaan (HTNV), Seoul (SEOV), Dobrava-Belgrade virus, Saaremma, and Puumala. Most HFRS patients are infected through direct exposure to the aerosolized droppings or body fluids of infected rodents, but the human-to-human transmission is rare. Main clinical manifestations include fever, vomiting, abdominal pains, hypotension, kidney injury, thrombocytopenia, and shockHaantan virus is widely distributed in eastern Asia, particularly in China, Russia, and Korea. Puumala virus is found in Scandinavia, western Europe, and western Russia. Dobrava virus is found primarily in the Balkans, and Seoul virus is found worldwide. Saaremaa is found in central Europe and Scandinavia. In the Americas, hantaviruses cause a different disease known as hantavirus pulmonary syndrome.
The analyzed data on HFRS cases in Zhejiang province during 2005-2020 were collected from Zhejiang Provincial Center for Disease Control and Prevention (CDC). For a descriptive analysis, The study consisted of `r dim(dat)[1]` HFRS in China from 2004 to 2020. The data contains the basic characteristic like age, gender living addresses, occupations and the year of diagnostics and illness. Since in year 2004 we only get few data, we think it is measurement problem. So, we exclude that from our analysis later. Zhejiang has an population of 64,567,588 in 2020. The geographical scope of our study encompasses the main cities, specifically Hangzhou, Huzhou, Jiaxing, Jinhua, Lishui, Ningbo, Quzhou, Shaoxing, Taizhou, Wenzhou, and Zhoushan. These cities might eventually be further subdivided into county levels for more detailed analysis.
This research also investigates the impact of diverse factors, including environmental, meteorological factors, on the incidence of infectious diseases. Monthly air quality data, including the Air Quality Index (AQI) and concentrations of pollutants such as PM2.5, PM10, SO2, NO2, CO, and O3, were obtained from National Air Quality Monitoring Stations in China. The study specifically examines the spatio temporal clustering distribution characteristics and trends in HFRS outbreaks in Zhejiang Province. The findings contribute valuable data for a comprehensive exploration of the epidemiological characteristics and influencing factors of HFRS. Furthermore, the results inform the development of predictive warning models and strategies for precise control of HFRS. The multifaceted analysis enhances our understanding of the intricate dynamics influencing the occurrence of infectious diseases. The heat map shows seasonal patterns, which mean the HFRS showed semiannual peaks of activity, including a peak in May and June followed by a peak in November and December. HFRS predominantly locally circulated in the north, northeast, and
northwest of Zhejiang.
During 2013–2018, the national monthly mean concentrations were 51.28 μg/m3 for PM2.5, 90.75 μg/m3 for PM10, 24.35 μg/m3 for SO2, 33.63 μg/m3 for NO2, and 1.08 mg/m3 for CO, with a daytime 8-hour mean concentration for O3 at 86 mg/m3. The monthly concentrations of PM2.5 and PM10 exceeded the 2018 China guidelines II level. Boxplots depicting monthly variation in air pollution concentrations revealed a distinct seasonal pattern. We observe a noteworthy correlation among various air pollutants, indicating a level of interdependence. This suggests that the presence or concentration of one air pollutant is associated with the behavior or characteristics of others. However, temporal variations were evident in their trends. The average monthly concentrations of PM2.5, PM10, and CO experienced a significant annual decrease. In contrast, O3 values demonstrated a notable increase over the 6-year period (2013–2018), while NO2 exhibited a volatile upward trend from 2016 onward, following a declining pattern during 2013–2016. Besides, concentrations of NO2 and O3 were positively correlated with HFRS incidences in quantile groups. Meteorological data, including temperature and humidity, were also collected for analysis.
In the realm of public health, understanding the spread of infectious diseases is crucial for effective prevention and control measures. Recent studies have delved into the intricate dynamics of epidemics by employing sophisticated spatiotemporal modeling techniques. These approaches not only provide insights into the geographical patterns of disease transmission but also unravel the temporal aspects that influence the course of an outbreak. There are also various studies focusing on the epidemiology of infectious diseases with spatio-temporal modeling. Spatio temporal modeling in epidemiology involves the integration of space (geographical location) and time into the analysis of disease spread. This multidimensional approach enables researchers to discern patterns, identify hotspots, and predict the trajectory of infectious diseases more accurately. By incorporating geographical information systems (GIS). Creating models that capture the complex interplay between space and time in the context of disease dynamics. One key aspect of spatio temporal modeling is the exploration of geographical patterns in disease transmission. can By mapping the spatial distribution of cases, revealing clusters or areas with higher vulnerability. Understanding how diseases propagate across different regions allows for targeted interventions, resource allocation, and the development of region-specific public health strategies. The integration of spatio temporal modeling in epidemiology also empowers to develop predictive models. These models can forecast the spread of diseases based on current trends, environmental factors, and population dynamics. Such foresight enables public health authorities to implement preventive measures in advance, potentially mitigating the severity of an outbreak and reducing its impact on communities. Nazia N et al [14] analyzed the The spread of the COVID-19 pandemic by reviewing 154 published peer-reviewed articles on COVID-19 that applied various Bayesian and Frequentist spatial methods to identify spatial variations of the disease risk and associated socioeconomic, demographic, and climatic factors for such spatial variations of the risk.
Epidemiology has a rich tradition of investigating factors that influence the fluctuation in the occurrence or fatality rates of infectious and chronic diseases. Among these factors, geographical or spatial variances in health outcomes play a pivotal role in assessing the distribution and effectiveness of healthcare. These spatial variations not only provide crucial insights into patterns of dependence and noise levels in the data but also serve as a foundation for appraising healthcare performance.
Liu et al. (2019) utilized the Autoregressive Integrated Moving Average (ARIMA) model to evaluate and forecast HFRS incidence in China, employing historical time series data. Although the ARIMA model proves beneficial for estimating continuous time series data, it tends to overlook correlations among different spatial locations.
Recognizing this limitation, Santosha Rathod (2018)[13] introducedAn improved Space-Time Autoregressive Moving Average (STARMA) model for Modelling and Forecasting of Spatio-Temporal time-series data. However, it is crucial to note that the STARMA model treats the spatial influences of data in distinct locations as individual factors. To improve the precision of spatial epidemic analysis, particularly in the context of HFRS, a more comprehensive strategy involves considering both spatial and temporal characteristics, incorporating interactive influences. This integrative approach holds the potential to contribute to a more nuanced comprehension of the dynamics underlying HFRS outbreaks.
# 3. Data collection and method
All statistical analyses were performed using R (version 4.3.1, R Foundation for Statistical Computing, Vienna, Austria). A p-value of <0.05 was considered statistically significant. The
geographic information are downloaded by using the raster package (Hijmans 2022a), which allows free downloading of national administrative boundary information from the GADM website, which can be used for academic and non-commercial purposes. It provides administrative boundary data at the national, provincial, municipal, and county levels, which can be directly downloaded and imported into the R environment.
(1) Descriptive analyses
We utilized measures such as mean, standard deviation, quartiles (P25, median, P75), to characterize the distribution of HFRS incidence, air pollutants, and meteorological variables. The epidemiological characteristics of HFRS cases, including geographic distribution, seasonal pattern, gender, age, and occupation, were analyzed using descriptive methods.
(2) Mann-Kendall Testing
The Mann-Kendall test is a non-parametric, rank-based test that is commonly used in environmental sciences to determine whether or not there is a monotonic trend in a timeseries. The assumption that you need to consider before using Mann-Kendall is that there is no seasonality in the dataset (there is a seasonal Kendall test that you can use for timeseries collected over mutliple seasons).
![](mkfor.png)
(3) Anova Test
The Analysis of Variance (ANOVA) test is a statistical method used to assess whether the means of two or more groups are significantly different from each other. It is particularly valuable when comparing means across multiple levels of a categorical variable. The ANOVA test achieves this by partitioning the total variance in the data into different components associated with each group. The approach aimed to examine the time differences between diagnostics and illness onset.
![](anova.png)
(4) Correlation plot
The Pearson correlation coefficient is a metric quantifying the linear correlation between two sets of data. Calculated as the ratio of the covariance of two variables to the product of their standard deviations, it provides a normalized measure of covariance.
![](corr.png)
We also used correlation plot to display the association between different factors. Correlation plot is also well known as correlation matrix or heatmap, is a visual representation of the correlation coefficients between variables in a dataset. The correlation plot is particularly useful for identifying patterns, relationships, and dependencies among different variables. The correlation coefficient quantifies the strength and direction of the linear relationship between two variables. It ranges from -1 to 1, where -1 indicates a perfect negative correlation, 1 indicates a perfect positive correlation, and 0 indicates no correlation. A high positive correlation indicates that as one variable increases, the other variable tends to increase as well. A high negative correlation indicates that as one variable increases, the other variable tends to decrease.
(5) The global spatial autocorrelation analysis
To quantify the presence of spatial autocorrelation in the residuals from this model we can
compute Moran’s I statistic (Moran 1950) and conduct a permutation test. The permutation
test has the null hypothesis of no spatial autocorrelation and an alternative hypothesis of
positive spatial autocorrelation, and is conducted using the moran.mc() function from the
spdep package in R. Moran's I is a statistical measure employed to evaluate spatial autocorrelation in data, specifically the correlation observed among neighboring locations in spatial domains. Unlike one-dimensional autocorrelation, spatial autocorrelation operates within multi-dimensional spatial contexts, typically 2 or 3 dimensions, considering various directions within the spatial framework. This metric is widely used in spatial statistics and geographical analysis to identify patterns of clustering, dispersion, or randomness in spatial datasets.
![](moran.png)
Commonly employed in various statistical analyses, fixed effects models are a staple in data modeling, including applications involving spatial data. However, the suitability of a fixed effects model for spatial data hinges on the intrinsic nature of the data and the associated assumptions. Spatial data analysis often grapples with challenges related to spatial autocorrelation, signifying the interdependence of observations in space. The presence of spatial autocorrelation can contravene the assumption of independence of observations, a fundamental premise in many standard statistical models, notably fixed effects models.
In instances where concerns about spatial dependence arise, researchers may turn to spatial econometric models or spatial random effects models. These alternatives explicitly accommodate spatial autocorrelation by integrating spatial structures like spatial lag or spatial error terms. This nuanced approach enhances the model's ability to capture intricate spatial relationships within the data. While fixed effects models can be adapted to spatial data, a critical consideration involves assessing their appropriateness for the specific data characteristics and their effectiveness in addressing spatial dependencies. Depending on the spatial structure of the data and the research objectives, alternative spatial modeling approaches may prove more fitting. It is imperative to conduct diagnostic tests for spatial autocorrelation and thoroughly evaluate the assumptions of the selected model to ensure the reliability and validity of the results.
# 4. Result
The descriptive statistics provide a general summary of the occurrence of HFS over the years. Continuous variables are presented with mean, standard deviation, as well as median and interquartile range (IQR). Categorical variables are displayed in terms of frequency and percentage.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
dat %>%
select(Gender, Age, Age.cat, Illness.season,
city, District, occupation, Death, Illness.year
)%>%
tbl_summary(
label = list(),
missing = "ifany",
statistic = list(
all_continuous() ~ "{mean}({sd})\n{median}[{p25},{p75}]",
all_categorical() ~ "{n}\n({p}%)"
)
) %>%
# modify_table_body(
# ~.x %>%
# mutate(
# across(all_stat_cols(), ~gsub("^0.*", "-", .))
# )
#
as_flex_table() %>%
set_table_properties(layout = "autofit") %>% autofit()
```
epidemiological overview: During the period spanning 2005 to 2020, Zhejiang Province documented a cumulative total of 7,724 cases of HFRS, resulting in an average annual incidence rate of 0.9065 per 100,000 individuals. The yearly incidence rates (/100,000) for the aforementioned time frame were as follows: 1.52, 1.53, 1.49, 1.09, 0.84, 0.89, 0.99, 0.91, 0.95, 0.70, 0.66, 0.62, 0.63, 0.59, 0.63, and 0.46, accompanied by 25 reported fatalities. The epidemiological landscape in Zhejiang Province exhibited a declining trend commencing in 2007, maintaining relative stability from 2008 to 2013, entering a plateau phase from 2014, and manifesting a substantial reduction in 2020. Monthly distribution patterns of HFRS cases disclosed two peaks annually, one occurring in May-June (late spring to early summer) and the other spanning from November to January of the subsequent year (winter). Notably, the summer peaks in 2009, 2013, and 2014 surpassed the corresponding winter peaks. Conversely, in 2019 and 2020, the summer and winter peaks exhibited comparable magnitudes. In the remaining years, the winter peaks outpaced their summer counterparts.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(final.data.x, aes(x = Illness.year)) +
geom_bar(fill="skyblue") +
ylab("")+
xlab("")+
theme_pubclean()
t1 <- final.data.x %>% group_by(Illness.year) %>% summarise(count =n()) %>% ungroup()
trend::mk.test(t1$count)
```
The output of Mann-Kendall trend test is -0.8, which indicates a strong, monotonic decrease in annual incidence over the 16 years observed time period. This degree of negative monotonicity is significant with the p-value of 1.893e-05. The limitations of this test in the trend analysis sense is that it does not provide any insight into the magnitude of the trend.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(dat, aes(x = Illness.season)) +
geom_bar(fill="skyblue", width = 0.5) +
ylab("")+
xlab("")+
theme_pubclean()
t2 <- final.data.x %>% group_by(Illness.season) %>% summarise(count =n()) %>% ungroup()
trend::mk.test(t2$count)
```
We observe a seasonal variation in the occurrence of HFRS, with a higher incidence during winter (December, January, February), followed by summer (June, July, August). There is no noticeable difference in HFRS occurrence between spring (March, April, May) and autumn (September, October, November).
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(dat, aes(x = city)) +
geom_bar(fill="skyblue", width = 0.5) +
ylab("")+
xlab("")+
theme_pubclean()
```
Spatial distribution: HFRS cases have been reported in all 11 cities, with the top three cities in terms of cumulative cases and their respective proportions being Ningbo (1,875 cases, 24.27%), Taizhou (1,642 cases, 21.25%), and Shaoxing (1,123 cases, 14.54%). High-incidence counties (cities, districts) for annual incidence rates are mainly distributed in the eastern, western, central, and southwestern regions of Zhejiang Province.
The provided chart illustrates the distribution of disease incidence categorized by occupation (farmer, unemployment, worker, and others) over the specified years. It is evident that the most prevalent group is comprised of farmers, constituting 26% of the total cases. The subsequent largest category is labeled as "others." Furthermore, notable trends emerge, particularly from 2016 to 2020, where the unemployment category exhibits a discernible escalation in risk.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(dat, aes(x = Illness.year, fill = occupation)) +
geom_bar(position="fill", width = 0.5) +
ylab("")+
xlab("")+
theme_pubclean()
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(dat, aes(x = Age.cat, fill = Gender)) +
geom_bar(position="fill", width = 0.5) +
ylab("")+
xlab("")+
theme_pubclean()
```
Population Distribution: among the 7,724 cases, the male-to-female ratio is 2.73:1 (5,656:2,068). The distribution by age groups is as follows: ≤20 years old, 261 cases (3.38%); 21-30 years old, 797 cases (10.32%); 31-40 years old, 1,492 cases (19.32%); 41-50 years old, 1,954 cases (25.30%); 51-60 years old, 1,806 cases (23.38%); 61-70 years old, 948 cases (12.27%); and >70 years old, 466 cases (6.03%), with the 41-70 age group accounting for 60.95%. The majority of cases are reported in individuals with an occupation as farmers, accounting for 69.89% (5,398/7,724). Upon scrutinizing age groups and gender, it becomes evident that approximately 75% of cases can be attributed to a particular age group. Moreover, a conspicuous trend emerges, indicating that as women age, they exhibit a higher likelihood of contracting HFRS in comparison to men.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
final.data %>% group_by(year, month) %>% summarise(count=n()) %>% ungroup() %>%
ggplot(,mapping = aes(x = month, y = year, fill = count)) +
geom_tile() +
scale_fill_gradient(name = "",
low = "#FFFFFF",
high = "#012345") +
theme(strip.placement = "outside") +
scale_x_continuous(breaks = seq(from = 1, to = 12, by = 1))+
theme_pubclean()
```
The heat map illustrates elevated incidence rates in May, June, November, and December compared to other months throughout the year. Notably, from 2005 to 2020, there is a discernible decrease in overall incidences.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
breaks = c(50, 100, 200, 500, 100)
mycolours = c("white","grey70", "grey50", "orange", "red")
zj <- china_map[china_map$NAME_1 == "Zhejiang", ]
zj$city <- zj$NAME_2
merge.year <- final.data.x %>% filter(year == 2005) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2005")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2006) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2006")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2007) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2007")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2008) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2008")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2009) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2009")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2010) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2010")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2011) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2011")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2012) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2012")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2013) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2013")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2014) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2014")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2015) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2015")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2016) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2016")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2017) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
labs(title=("2017")) +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2018) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2019) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
merge.year <- final.data.x %>% filter(year == 2020) %>% group_by(city) %>% summarise(count =n()) %>% ungroup()
zj.x <- zj %>% left_join(merge.year)
ggplot(zj.x) +
geom_sf(aes(fill = count), color = NA) +
scale_fill_gradientn(colors = mycolours, breaks=breaks, labels=format(breaks))+
coord_sf(datum = NA) +
geom_sf_label(aes(label = city)) +
theme_map() +
theme(legend.position="right",
plot.title = element_text(hjust = 0.5,
color = "Gray40",
size = 16,
face = "bold"),
plot.subtitle = element_text(color = "blue"),
plot.caption = element_text(color = "Gray60")) +
guides(fill = guide_colorbar(title = "",
title.position = "bottom",
title.theme = element_text(size = 10,
face = "bold",
colour = "gray70",
angle = 0)))
s1 <- zj.x %>% filter(NAME_2 != "Zhoushan")%>% drop_na(count) %>%select(count, geometry)
nb <- poly2nb(s1, queen=TRUE)
lw <- nb2listw(nb, style="W", zero.policy=TRUE)
moran.test(s1$count,lw)
```
Spatial Distribution: HFRS cases have been reported in 11 cities, with the top three cities in terms of cumulative cases and composition being Ningbo City (1,875 cases, 24.27%), Taizhou City (1,642 cases, 21.25%), and Shaoxing City (1,123 cases, 14.54%). The top five counties (cities, districts) in terms of cumulative cases are Tiantai County (606 cases), Longquan City (490 cases), Yinzhou District (447 cases), Zhuji City (407 cases), and Kaihua County (402 cases), while Haiyan County and Shengsi County have not reported any cases. We observed dynamic variations on the spatial changes of color indicators from 2005 to 2020. Descriptive statistics suggest that counties (cities, districts) with high incidence rates over the years are predominantly distributed in the eastern, western, central, and southwestern regions of Zhejiang Province. We also conduct a Moran test for each year, obtaining similar results indicating spatial correlation. However, we exclude Zhoushan City from the analysis due to its island status, signifying isolation from other cities.
```{r}
#prop_trend_test(xtab, score = NULL)
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(dat, aes( x = Illness.season, fill = Gender)) +
geom_bar(width = 0.5) +
xlab("") +
ylab("") +
#facet_grid(occupation ~.) +
theme_pubclean()
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(dat, aes(y = log(difftime), x = Illness.season, fill = Gender)) +
geom_boxplot() +
xlab("") +
ylab("") +
#facet_grid(occupation ~.) +
theme_pubclean()
dat$log.difftime <- ifelse(dat$log.difftime <0, 0, dat$log.difftime)
res.aov2 <- aov(log.difftime ~ Illness.season + Gender, data = dat)
summary(res.aov2)
```
We also analyzed the logarithmic transformation of the difference between diagnosis time and the onset of illness. Boxplots were created to visualize it. A two-way ANOVA was performed to test for the the effect. From the ANOVA table, we can conclude that only the season of illness is statistically significant. There is no significant difference in this time difference based on gender.
Spatial Autocorrelation: The global Moran's I coefficient is consistently greater than 0 for all years, with most years having a p-value less than 0.05. This indicates a significant positive spatial autocorrelation of HFRS incidence at the county level in Zhejiang Province for most years.
In summary, from 2004 to 2020, HFRS primarily affected middle-aged and elderly individuals, males, and farmers in Zhejiang Province. Outbreaks were more common in the eastern regions during late spring, early summer, and winter. It is recommended to implement precision control measures for key populations in high-risk areas before the epidemic season arrives. In these key areas, a combination of health education and public hygiene campaigns should be carried out as comprehensive preventive measures. Continuous monitoring of inter-species epidemics among animals is essential for effectively safeguarding the health of high-risk populations.
```{r}
final.data.x %>%
select(aqi, pm2_5, pm10, so2, no2, co ,o3, Temperature, Humidity
)%>%
tbl_summary(
label = list(),
missing = "no",
statistic = list(
all_continuous() ~ "{mean}({sd})\n{median}[{p25},{p75}]",
all_categorical() ~ "{n}\n({p}%)"
)
) %>%
# modify_table_body(
# ~.x %>%
# mutate(
# across(all_stat_cols(), ~gsub("^0.*", "-", .))
# )
#
as_flex_table() %>%
set_table_properties(layout = "autofit")
```
The table shows the descriptive statistics about air pollution characteristics and meteorological factor temperature and humidity in Zhejiang province, during 2013-2020, the year average was 41 μg/m3 for PM2.5, 65 μg/m3 for PM10, 13 μg/m3 for SO2, 35 μg/m3 for NO2, and 0.81 mg/m3 for CO, and the daytime 8-h mean concentration for O3 was 89 mg/m3. The monthly concentrations of PM2.5 and PM10 were much higher than the China guidelines II level issued in 2018. The boxplots of monthly variation of air pollution concentrations show an obvious seasonal pattern. The peaks of PM2.5, PM10, and NO2 concentration mostly appeared in December and January, while the peaks of O3 appeared in late spring to late summer, from May to August. The mean temperature is 17 Celsius, standard deviation 8. The average humidity is 72 g/m3 with standard deviation 6. The average temperature, which reminds that in the areas where temperature is suitable, personal protection should be taken when going out as to avoid contact with rodents.
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(airdata, aes(x = season, y = aqi, fill = season)) +
geom_boxplot() +
xlab("") +
theme(legend.position="none")+
theme_pubclean()
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(airdata, aes(x = season, y = aqi, fill = season)) +
geom_boxplot() +
xlab("") +
theme(legend.position="none")+
theme_pubclean()
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(airdata, aes(x = season, y = pm2_5, fill = season)) +
geom_boxplot() +
xlab("") +
theme(legend.position="none")+
theme_pubclean()
summary(aov(airdata$pm2_5 ~ airdata$season))
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(airdata, aes(x = season, y = pm10, fill = season)) +
geom_boxplot() +
xlab("") +
theme(legend.position="none")+
theme_pubclean()
summary(aov(airdata$pm10 ~ airdata$season))
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(airdata, aes(x = season, y = so2, fill = season)) +
geom_boxplot() +
xlab("") +
theme(legend.position="none")+
theme_pubclean()
summary(aov(airdata$pm2_5 ~ airdata$season))
```
```{r fig.height=5, fig.width=9,fig.fullwidth=TRUE, fig.margin=TRUE}
ggplot(airdata, aes(x = season, y = no2, fill = season)) +