-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData Exploration in SQL using Covid-19 Dataset.sql
132 lines (103 loc) · 4.21 KB
/
Data Exploration in SQL using Covid-19 Dataset.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
Covid 19 Data Exploration
Skills used: Joins, CTE's, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
*/
-- overview of the Covid-19 data
SELECT *
FROM `portfolio project`.`covid vaccinations`;
-- Covid-19 in Kenya
SELECT *
FROM `portfolio project`.`covid vaccinations`
WHERE
location ='Kenya' ;
-- Covid-19 in Africa
SELECT *
FROM `portfolio project`.`covid-19 deaths`
WHERE continent IS NOT NULL
AND location LIKE'%Africa%' ;
-- total cases vs population
SELECT location,population,MAX(total_cases)AS highest_infection_count,max(total_cases/population)*100
AS percent_pop_infected
FROM `portfolio project`.`covid-19 deaths`
-- where location like '%Africa%'
GROUP BY location,population
ORDER BY percent_pop_infected desc;
-- countries with the highest death count
SELECT location,MAX(CAST(total_deaths AS unsigned)) AS total_death_count
FROM `portfolio project`.`covid-19 deaths`
-- where location like '%Africa%'
WHERE location <> 'High income'
GROUP BY location
ORDER BY total_death_count desc ;
-- breakdown by continent
-- continent with the highest death count
SELECT continent, MAX(CAST(Total_deaths AS unsigned)) AS TotalDeathCount
FROM `portfolio project`.`covid-19 deaths`
-- Where location like '%states%'
WHERE continent IS NOT NULL
GROUP BY continent
ORDER BY TotalDeathCount desc;
-- Global Numbers
SELECT SUM(new_cases)AS total_cases,SUM(CAST(new_deaths AS unsigned )) AS total_deaths,
SUM(CAST(new_deaths AS unsigned))/SUM(new_cases) AS death_percentage
FROM `portfolio project`.`covid-19 deaths`
-- where location like '%Africa%'
WHERE continent IS NOT NULL
ORDER BY 1,2;
-- total cases vs population
-- Percentage of the population infected
SELECT location,date,population,total_cases,(total_cases/population)*100 AS percent_population_infected
FROM `portfolio project`.`covid-19 deaths`
WHERE location LIKE '%Africa%'
ORDER BY 1,2;
-- countries with highest infection rate compared to population
SELECT location, population,date,MAX(total_cases) AS highest_infection_count,
MAX(total_cases/population)*100 AS percent_population_infected
FROM `portfolio project`.`covid-19 deaths`
-- WHERE location like'% Africa%'
GROUP BY location,population,date
ORDER BY percent_population_infected DESC;
-- total vaccinations vs populations
SELECT dea.continent, dea.location,dea.date,dea.population,vac.new_vaccinations
FROM `portfolio project`.`covid-19 deaths`AS dea
JOIN `portfolio project`.`covid vaccinations` AS vac
ON dea.location=vac.location
AND dea.date=vac.date
WHERE dea.continent IS NOT NULL
ORDER BY 2,3;
-- cumulative people vaccinated using Window functions
SELECT dea.continent, dea.location,dea.date,dea.population,
SUM(CAST(vac.new_vaccinations AS unsigned)) OVER(PARTITION BY dea.location ORDER BY dea.location,dea.date
) AS rolling_people_vaccinated
FROM`portfolio project`.`covid-19 deaths`AS dea
JOIN `portfolio project`.`covid vaccinations` AS vac
ON dea.location=vac.location
AND dea.date=vac.date
-- WHERE continent IS NOT NULL
ORDER BY 2,3;
-- rolling people vaccinated vs population using CTE's
WITH popvsvacc (continent,location,date,population,new_vaccinations,rolling_people_vaccinated)
AS
(SELECT dea.continent, dea.location,dea.date,dea.population,vac.new_vaccinations,
SUM(CAST(vac.new_vaccinations AS unsigned)) OVER(PARTITION BY dea.location ORDER BY dea.location,dea.date
) AS rolling_people_vaccinated
FROM `portfolio project`.`covid-19 deaths`AS dea
JOIN`portfolio project`.`covid vaccinations` AS vac
ON dea.location=vac.location
AND dea.date=vac.date
-- WHERE continent IS NOT NULL
ORDER BY 2,3)
SELECT *,(rolling_people_vaccinated/population)*100
FROM popvsvacc;
-- creating view for later Vizualization
CREATE VIEW percentpopulationvaccinated AS
SELECT dea.continent,dea.location,dea.date,dea.population,vac.new_vaccinations,SUM(CAST(vac.new_vaccinations AS unsigned)) over(partition by dea.location order by dea.location,dea.date) as rolling_people_vaccinated
FROM `portfolio project`.`covid-19 deaths`AS dea
JOIN `portfolio project`.`covid vaccinations`AS vac
ON dea.location=vac.location
AND dea.date=vac.date
WHERE dea.continent IS NOT NULL
ORDER BY 2,3;
-- view
SELECT *
FROM `portfolio project`.percentpopulationvaccinated;