-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata exploration in SQL
142 lines (102 loc) · 5.68 KB
/
data exploration in SQL
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
--Covid 19 Data Exploration as at may 2021, using the filtered dataset gotten from https://ourworldindata.org/covid-deaths
--Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
Select *
From PortfolioProject1..coviddeaths
Where continent is not null
order by 3,4
-- Select columns to work with
Select date, Location, population, total_cases, new_cases, total_deaths
From portfolioproject1..coviddeaths
Where continent is not null
order by 1,2
-- Relationship between new Cases and Total Deaths
-- dispaly the chances of dying if you contract covid in your country based on the availabe data
Select date, Location, total_cases, total_deaths, (total_deaths/total_cases)*100 as DeathPercentage
From portfolioproject1..coviddeaths
Where continent is not null and location like '%kingdom%'
order by 1,2
-- comparing Total Cases per Population
-- Shows what percentage of population got infected with Covid
Select date, Location, total_cases, total_deaths, (total_cases/population)*100 as PopulationInfected_percentage
From portfolioproject1..coviddeaths
Where continent is not null and location like '%kingdom%'
order by 1,2
-- Countries(location) with Highest Infection Rate per Population
Select Location, Population, MAX(total_cases) as Highest_case_per_location, Max((total_cases/population))*100 as PopulationInfected_percentage_per_country
From PortfolioProject1..Coviddeaths
Group by Location, Population
order by PopulationInfected_percentage_per_country desc
-- Countries(loacation) with Highest Death Count per Population
Select Location, MAX(cast(Total_deaths as int)) as TotalDeathCount_per_country
From portfolioproject1..coviddeaths
Where continent is not null
Group by Location
--order by TotalDeathCount desc
order by 2 desc
-- Exploring data based on CONTINENT
-- display contintents with the highest death count per population
Select continent, MAX(cast(Total_deaths as int)) as TotalDeathCount_per_continent
From portfolioproject1..coviddeaths
Where continent is not null
Group by continent
order by 2 desc
-- Global exploration (total cases, total death and total death percentage)
Select SUM(new_cases) as covid_total_cases, SUM(cast(new_deaths as int)) as covid_total_deaths, SUM(cast(new_deaths as int))/SUM(New_Cases)*100 as DeathPercentage
From PortfolioProject1..coviddeaths
where continent is not null
-- comparing the total population of different places to the number of COVID-19 vaccinations given using the covid death and covid vaccination table
-- Total Population vs Vaccinations
-- Shows Percentage of Population that has recieved at least one Covid Vaccine
Select death.continent, death.location, death.date, death.population, vaccination.new_vaccinations
, SUM(CONVERT(int,vaccination.new_vaccinations)) OVER (Partition by death.Location Order by death.location, death.Date) as People_vaccinated_continuously
From PortfolioProject1..coviddeaths death
Join portfolioproject1..covidvaccinations vaccination
On death.location = vaccination.location
and death.date = vaccination.date
where death.continent is not null
order by 2,3
--Using CTE(common table expression) to perform Calculation on Partition By in the above query and showing the percentage of people vaccinated base on population
With Population_vs_Vaccination (Continent, Location, Date, Population, New_Vaccinations, People_vaccinated_continuously)
as
(
Select death.continent, death.location, death.date, death.population, vaccination.new_vaccinations
, SUM(CONVERT(int,vaccination.new_vaccinations)) OVER (Partition by death.Location Order by death.location, death.Date) as People_vaccinated_continuously
From portfolioproject1..CovidDeaths death
Join portfolioproject1..CovidVaccinations vaccination
On death.location = vaccination.location
and death.date = vaccination.date
where death.continent is not null
)
Select *, (People_vaccinated_continuously/Population)*100 as vaccination_population_percentage
From Population_vs_Vaccination
-- Using Temp Table to perform Calculation on Partition By in previous query
DROP Table if exists #vaccination_population_percentage
Create Table #vaccination_population_percentage
(
Continent nvarchar(255),
Location nvarchar(255),
Date datetime,
Population numeric,
New_vaccinations numeric,
People_vaccinated_continuously numeric
)
Insert into #vaccination_population_percentage
Select death.continent, death.location, death.date, death.population, vaccination.new_vaccinations
, SUM(CONVERT(int,vaccination.new_vaccinations)) OVER (Partition by death.Location Order by death.location, death.Date) as People_vaccinated_continuously
From PortfolioProject1..CovidDeaths death
Join PortfolioProject1..CovidVaccinations vaccination
On death.location = vaccination.location
and death.date = vaccination.date
Select *, (People_vaccinated_continuously/Population)*100 as People_vaccinated_continuously_population_percentage
From #vaccination_population_percentage
-- Creating View to store data for later visualizations
Create View vaccination_population_percentage as
Select death.continent, death.location, death.date, death.population, vaccination.new_vaccinations
, SUM(CONVERT(int,vaccination.new_vaccinations)) OVER (Partition by death.Location Order by death.location, death.Date) as People_vaccinated_continuously
--, (RollingPeopleVaccinated/population)*100
From PortfolioProject1..CovidDeaths death
Join PortfolioProject1..CovidVaccinations vaccination
On death.location = vaccination.location
and death.date = vaccination.date
where death.continent is not null
select * from vaccination_population_percentage