-
Notifications
You must be signed in to change notification settings - Fork 0
/
eda.py
85 lines (67 loc) · 3.1 KB
/
eda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Exploratory data-analysis of the V-Dem dataset
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
# Load the dataset
df = pd.read_csv('V-Dem-CY-Core-v14.csv')
# Display the first 5 rows of the dataframe
print(df.head())
# Display the basic information about the dataframe
print(df.info())
# Display the summary statistics of the dataframe
print(df.describe())
# Display the number of unique values for each column
print(df.nunique())
# Draw a graph of 'v2x_polyarchy' progress over 'year' where 'country_name' is "Nicaragua"
import seaborn as sns
import matplotlib.pyplot as plt
# Filter the dataset for Nicaragua
nicaragua_df = df[df['country_name'] == 'Nicaragua']
# Plot 'v2x_polyarchy' progress over 'year'
sns.lineplot(data=nicaragua_df, x='year', y='v2x_polyarchy')
plt.title('Progress of v2x_polyarchy in Nicaragua over Years')
plt.xlabel('Year')
plt.ylabel('v2x_polyarchy')
plt.show()
# Filter the dataset for India
india_df = df[df['country_name'] == 'India']
# Plot progress over 'year' for multiple democracy indices
plt.figure(figsize=(10, 6))
sns.lineplot(data=india_df, x='year', y='v2x_polyarchy', label='Electoral')
sns.lineplot(data=india_df, x='year', y='v2x_libdem', label='Liberal')
sns.lineplot(data=india_df, x='year', y='v2x_partipdem', label='Participatory')
sns.lineplot(data=india_df, x='year', y='v2x_delibdem', label='Deliberative')
sns.lineplot(data=india_df, x='year', y='v2x_egaldem', label='Egalitarian')
plt.title('Progress of Democracy Indices in India over Years')
plt.xlabel('Year')
plt.ylabel('Index Score')
plt.legend()
plt.show()
# Calculate the average of the democracy indices for each year
average_indices_df = df.groupby('year')[['v2x_polyarchy', 'v2x_libdem', 'v2x_partipdem', 'v2x_delibdem', 'v2x_egaldem']].mean().reset_index()
# Plot progress over 'year' for the average of the five democracy indices
plt.figure(figsize=(10,6))
sns.lineplot(data=average_indices_df, x='year', y='v2x_polyarchy', label='Electoral')
sns.lineplot(data=average_indices_df, x='year', y='v2x_libdem', label='Liberal')
sns.lineplot(data=average_indices_df, x='year', y='v2x_partipdem', label='Participatory')
sns.lineplot(data=average_indices_df, x='year', y='v2x_delibdem', label='Deliberative')
sns.lineplot(data=average_indices_df, x='year', y='v2x_egaldem', label='Egalitarian')
plt.title('Average Progress of Democracy Indices over Years')
plt.xlabel('Year')
plt.ylabel('Average Index Score')
plt.legend()
plt.show()
# Load the world map
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# Merge the world map with the 2015 data for 'v2x_libdem'
libdem_2015 = df[df['year'] == 2015][['country_name', 'v2x_libdem']]
world = world.merge(libdem_2015, how="left", left_on="name", right_on="country_name")
# Plotting the cloropleth map for 'v2x_libdem' in 2015
fig, ax = plt.subplots(1, 1, figsize=(10,6))
ax.axis('off')
world.boundary.plot(ax=ax)
world.plot(column='v2x_libdem', ax=ax, legend=False,
legend_kwds={'label': "Liberal Democracy Index in 2015",
'orientation': "horizontal"})
plt.title('Global Liberal Democracy Index in 2015')
plt.show()