-
Notifications
You must be signed in to change notification settings - Fork 4
/
Data Visualization Project.R
93 lines (52 loc) · 3.13 KB
/
Data Visualization Project.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
### Data Visualization Project
## In this section of the Data Visualization Project, we will have to change the working directory to get the specified location to gather the information to do the project.
setwd("./R Bootcamp/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Capstone and Data Viz Projects/Data Visualization Project")
## Loading Data
# In this section, we are going to load the libraries and data as needed in order to perform The Economist data graph as made on the internet.
library(ggplot2)
library(data.table)
library(ggthemes)
df <- fread('Economist_Assignment_Data.csv', drop=1)
summary(df)
head(df)
tail(df)
## Creating a Scatterplot
# Based on the information that is given, we are going to create a scatterplot based on the region that is given.
plot <- ggplot(df,aes(x=CPI,y=HDI,color=Region)) + geom_point(aes(color=factor(Region)))
print(plot)
## Linear Models
# In this section, we are going to use different trend lines to determine the API and HDI differences.
plot <- ggplot(df,aes(x=CPI,y=HDI,color=Region)) + geom_point(size=4,shape=1)
print(plot)
plot2 <- plot + geom_smooth(aes(group=1))
print(plot2)
plot2 <- plot + geom_smooth(aes(group=1),method='lm', formula = y~log(x),se=F,color='red')
print(plot2)
## Adding Text
# For this section, we are going to add the labels of the countries that are represented in the graph. With the result, the labels have overlapped in the graph.
plot <- ggplot(df,aes(x=CPI,y=HDI,color=Region)) + geom_point(size=4,shape=1)
print(plot)
plot2 <- plot + geom_smooth(aes(group=1),method='lm', formula = y~log(x),se=F,color='red')
plot3 <- plot2 + geom_text(aes(label=Country))
print(plot3)
## Subsetting the Labels
# Continuing from the previous section, we are only going to pick a select group of countries that will be displayed in the plot we created earlier.
pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
"Afghanistan", "Congo", "Greece", "Argentina", "Brazil",
"India", "Italy", "China", "South Africa", "Spain",
"Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
"United States", "Germany", "Britain", "Barbados", "Norway",
"Japan","New Zealand", "Singapore")
plot3 <- plot2 + geom_text(aes(label = Country), color = "gray20",
data = subset(df, Country %in% pointsToLabel),check_overlap = TRUE)
print(plot3 + theme_economist_white())
plot4 <- plot3 + scale_x_continuous(limits=c(.9,10.5),breaks = 1:10)
print(plot4 + theme_economist_white())