-
Notifications
You must be signed in to change notification settings - Fork 0
/
anime_data_analysis.R
60 lines (45 loc) · 1.31 KB
/
anime_data_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Install necessary packages
install.packages("tidyverse")
install.packages("skimr")
install.packages("DataExplorer")
install.packages("GGally")
# Load the libraries
library(tidyverse)
library(skimr)
library(DataExplorer)
library(GGally)
#Reading CSv File
data=read.csv("C:\\Users\\ACER\\OneDrive\\Documents\\animelist22.csv")
#display the structure of data
str(data)
#Summary of data/summary statistics
summary(data)
#view the first few rows of the data set
head(data)
#view last few rows of the data set
tail(data)
#Data cleaning
#check for missing values
sum(is.na(data))
#visualize missing data
plot_missing(data)
#finds the location of missing values
which(is.na(data))
#Handle missing values
data_clean<-na.omit(data)
#UNIVARIATE ANALYSIS
#Examine the distribution of individual variables
#summary statistics using skimr
skim(data_clean)
#visualize numerical variables
plot_histogram(data_clean)
#Visualize categorical variables
plot_bar(data_clean)
#BIVARIATE ANALYSIS
#Explore relationships between pairs of variable
#correlation matrix for numerical variables
plot_correlation(data_clean,type='continuous')
#scatter plot matrix for numerical variables
ggpairs(data_clean)
#Box plots
ggplot(data_clean,aes(x=factor_variable,y=numerical_variable))+geom_boxplot()