-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataexploration
154 lines (129 loc) · 5.65 KB
/
dataexploration
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/*
Covid 19 Data Exploration
Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
Note: It's possible to run the queries into "mode.com" to see the results.
All the tables are online accesseble after the free registration.
*/
-- I: Preparation
-- Using: SELECT, FROM, LIMIT, ORDER BY
-- Getting familiar with the data by showing all columns.
SELECT *
FROM itsnoraexplora.covid_deaths
LIMIT 100
-- Filter relevant columns
-- Deciding for a usefull order.
SELECT location, date, total_cases, new_cases, total_deaths, population
FROM itsnoraexplora.covid_deaths
ORDER BY 1,2
LIMIT 100
/* Both countries and continents are listed under the "Location" column. Unlike continents,
the "Continents" column for countries is not empty, so it is possible to filter it with the "WHERE" function. */
Select Location, date, total_cases, new_cases, total_deaths, population
From itsnoraexplora.covid_deaths
Where continent is not null
order by 3,4
-- II: General insights at country level
-- USING: ilike, arithmetic operators, aggregate functions, GROUP BY
-- Insight 1: Total Cases vs Population; Shows what percentage of population infected with Covid
Select Location, date, Population, total_cases, (total_cases/population)*100 as PercentPopulationInfected
From itsnoraexplora.covid_deaths
order by 1,2
-- Insight 2: Total Cases vs Total Deaths; Shows probability of dying if you contract covid for example in Germany
Select Location, date, total_cases,total_deaths, (total_deaths/total_cases)*100 as DeathPercentage
From itsnoraexplora.covid_deaths
Where location ilike '%germany%'
and continent is not null
order by 1,2
-- Insight 3: Countries with Highest Infection Rate compared to Population
Select Location, Population, MAX(total_cases) as HighestInfectionCount, Max((total_cases/population))*100 as PercentPopulationInfected
From itsnoraexplora.covid_deaths
Where continent is not null
Group by Location, Population
order by PercentPopulationInfected desc
-- Insight 4: Countries with Highest Death Count per Population
Select Location, MAX(total_deaths) as TotalDeathCount
From itsnoraexplora.covid_deaths
Where continent is not null
Group by Location
order by TotalDeathCount desc
-- II: General insights at continent level
-- USING: PARTITION BY, CTE, JOINs
-- Insight 1: Showing contintents with the highest death count per population
Select continent, MAX(Total_deaths) as TotalDeathCount
From itsnoraexplora.covid_deaths
Where continent is not null
Group by continent
order by TotalDeathCount desc
-- III: General insights at global level
-- Insight 1: Showing all total cases, total deaths and the mortality rate
Select SUM(new_cases) as total_cases, SUM(new_deaths) as total_deaths, SUM(new_deaths)/SUM(New_Cases)*100 as DeathPercentage
From itsnoraexplora.covid_deaths
--Opt: Where location like '%states%'
where continent is not null
--Opt: Group By date
order by 1,2
-- Note for myself: No group although aggregation function. Reason: Global view, so no group nessasary
-- Insight 2: Adding a rolling count, which sums all the vaccinations so far
-- Note: First vaccines for Afghanistan starting at row 459
Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations,
SUM(vac.new_vaccinations) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
--Opt: , (RollingPeopleVaccinated/population)*100
From itsnoraexplora.covid_deaths dea
Join itsnoraexplora.covid_vaccinations vac
On dea.location = vac.location
and dea.date = vac.date
where dea.continent is not null
order by 2,3
-- Insight 3: Adding a vaccine percantage
-- Using: CTE
With PopvsVac
as
(
Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
, SUM(vac.new_vaccinations) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
--Opt: , (RollingPeopleVaccinated/population)*100
From itsnoraexplora.covid_deaths dea
Join itsnoraexplora.covid_vaccinations vac
On dea.location = vac.location
and dea.date = vac.date
where dea.continent is not null
--Opt: order by 2,3
)
Select *, (RollingPeopleVaccinated/Population)*100 as RollingPercantageVaccinated
From PopvsVac
-- IV: Table and views
-- USING: CREATE TABEL, DROP IF EXISTS, CREATE VIEW
-- Note: This is not working at mode.com, but is still used to prepare the visualisation in Tableau.
-- Create table
DROP Table if exists #PercentPopulationVaccinated
Create Table #PercentPopulationVaccinated
(
Continent nvarchar(255),
Location nvarchar(255),
Date datetime,
Population numeric,
New_vaccinations numeric,
RollingPeopleVaccinated numeric
)
Insert into #PercentPopulationVaccinated
Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
, SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
--Opt: , (RollingPeopleVaccinated/population)*100
From itsnoraexplora.covid_deaths dea
Join itsnoraexplora.covid_vaccinations vac
On dea.location = vac.location
and dea.date = vac.date
--Opt: where dea.continent is not null
--Opt: order by 2,3
Select *, (RollingPeopleVaccinated/Population)*100
From #PercentPopulationVaccinated
-- Create view
Create View PercentPopulationVaccinated as
Select dea.continent, dea.location, dea.date, dea.population, vac.new_vaccinations
, SUM(CONVERT(int,vac.new_vaccinations)) OVER (Partition by dea.Location Order by dea.location, dea.Date) as RollingPeopleVaccinated
--Opt: , (RollingPeopleVaccinated/population)*100
From itsnoraexplora.covid_deaths dea
Join itsnoraexplora.covid_vaccinations vac
On dea.location = vac.location
and dea.date = vac.date
where dea.continent is not null