diff --git a/2022_02_22_world_freedom_index.Rmd b/2022_02_22_world_freedom_index.Rmd new file mode 100644 index 0000000..e4e86c3 --- /dev/null +++ b/2022_02_22_world_freedom_index.Rmd @@ -0,0 +1,268 @@ +--- +title: "TidyTemplate" +date: 2022-02-22 +output: html_output +editor_options: + chunk_output_type: console +--- + +# TidyTuesday + +```{r setup, include=FALSE} + +knitr::opts_chunk$set(echo = TRUE) + +library(tidyverse) +library(tidytuesdayR) +library(scales) +theme_set(theme_light()) + +``` + +# Load the weekly Data + +Dowload the weekly data and make available in the `tt` object. + +```{r Load} +freedom <- tt_load("2022-02-22")$freedom %>% + janitor::clean_names() %>% + rename(civil_liberties = cl, + political_rights = pr) %>% + mutate(country_code = countrycode::countrycode(country, "country.name", "iso2c")) + +freedom %>% + count(year, sort = TRUE) %>% + arrange(desc(year)) + +freedom %>% + distinct(country, region_name) +``` + +Look just at 2020 + +```{r} +summarize_freedom <- function(tbl) { + tbl %>% + summarize(n_countries = n(), + avg_civil_liberties = mean(civil_liberties), + avg_political_rights = mean(political_rights), + pct_free = mean(status == "F"), + .groups = "drop") %>% + arrange(desc(n_countries)) +} +``` + + +```{r} +by_region <- freedom %>% + filter(year == 2020) %>% + group_by(region_name) %>% + summarize_freedom() + +by_region %>% + ggplot(aes(avg_civil_liberties, avg_political_rights)) + + geom_abline(color = "red") + + geom_point(aes(size = n_countries)) + + geom_text(aes(label = region_name), vjust = 1, hjust = 1) + + expand_limits(x = 0, y = 0, size = 0) + +freedom %>% + ggplot(aes(avg_civil_liberties, avg_political_rights)) + + geom_point() +``` + +```{r} +freedom %>% + filter(year == 2020) %>% + ggplot(aes(civil_liberties, political_rights)) + + geom_abline(color = "red") + + geom_jitter(height = .2, width = .2) + + # geom_text(aes(label = region_name), vjust = 1, hjust = 1) + + expand_limits(x = 0, y = 0, size = 0) + +freedom %>% + summarize(sd(civil_liberties), + sd(political_rights)) + +freedom %>% + filter(year == 2020) %>% + gather(metric, value, civil_liberties, political_rights) %>% + mutate(metric = str_to_title(str_replace_all(metric, "_", " ")), + region_name = fct_reorder(region_name, value)) %>% + count(region_name, metric, value) %>% + ggplot(aes(value, n)) + + geom_col() + + facet_grid(region_name ~ metric) + + labs(x = "World Freedom Index rating", + y = "# of countries", + title = "Distribution of World Freedom Index by region in 2020") +``` + +civil_liberties is a scale of 1-7 + +```{r} +freedom_gathered <- freedom %>% + gather(metric, value, civil_liberties, political_rights) %>% + mutate(metric = str_to_title(str_replace_all(metric, "_", " ")), + region_name = fct_reorder(region_name, value)) + +overall <- freedom_gathered %>% + group_by(year, metric) %>% + summarize(avg_rating = mean(value)) + +freedom_gathered %>% + group_by(year, region_name, metric) %>% + summarize(avg_rating = mean(value)) %>% + ggplot(aes(year, avg_rating)) + + geom_line(aes(color = region_name)) + + geom_line(data = overall, size = 3) + + facet_wrap(~ metric) + + expand_limits(y = 1) + + scale_y_reverse(breaks = seq(1, 7)) + + scale_color_discrete(guide = guide_legend(reverse = TRUE)) + + labs(x = "Year", + y = "World Freedom Index rating", + title = "World Freedom Index rating over time by region", + color = "Region", + subtitle = "Black line shows overall trend") +``` + +```{r} +freedom %>% + ggplot(aes(civil_liberties)) + + geom_histogram() +``` + +### Worldbank data + +```{r} +library(WDI) +library(countrycode) + +gdp_percap <- WDI(indicator = "NY.GDP.PCAP.CD", + extra = TRUE, + start = 1995, + end = 2020) %>% + as_tibble() + +freedom_joined <- freedom_gathered %>% + inner_join(gdp_percap, by = c(country_code = "iso2c", "year"), + suffix = c("", "_wdi")) %>% + mutate(income = fct_relevel(income, c("Low income", "Lower middle income", "Upper middle income"))) + +freedom_joined %>% + filter(income != "Not classified") %>% + group_by(metric, income, year) %>% + summarize(avg_rating = mean(value)) %>% + ggplot(aes(year, avg_rating)) + + geom_line(aes(color = income)) + + geom_line(data = overall, size = 3) + + facet_wrap(~ metric) + + expand_limits(y = 1) + + scale_y_reverse(breaks = seq(1, 7)) + + scale_color_discrete(guide = guide_legend(reverse = TRUE)) + + labs(x = "Year", + y = "World Freedom Index rating", + title = "World Freedom Index rating over time by region", + color = "Worldbank Income", + subtitle = "Black line shows overall trend") + +freedom_joined %>% + filter(year == 2020) %>% + ggplot(aes(NY.GDP.PCAP.CD, value)) + + geom_point() + + geom_jitter(height = .2, width = 0) + + facet_wrap(~ metric) + + scale_x_log10() +``` + +```{r} +library(broom) + +civil_liberties_2020 <- freedom_joined %>% + filter(metric == "Civil Liberties", + year == 2020, + !is.na(NY.GDP.PCAP.CD)) + +lin_mod <- civil_liberties_2020 %>% + lm(value ~ region_name + log2(NY.GDP.PCAP.CD), data = .) + +library(ggrepel) +lin_mod %>% + augment(data = civil_liberties_2020) %>% + select(country, NY.GDP.PCAP.CD, region_name, income, value, .fitted, .resid) %>% + arrange(desc(abs(.resid))) %>% + head(20) %>% + ggplot(aes(.fitted, value)) + + geom_point() + + geom_text_repel(aes(label = country)) + + geom_abline(color = "red") + + labs(x = "Expected freedom index based on region + income", + y = "Actual freedom index", + title = "What are the largest outliers?") + + expand_limits(x = 1, y = 1) +``` + +```{r} +library(fuzzyjoin) + +freedom_2020 <- freedom_joined %>% + filter(year == 2020) + +world_map_freedom_2020 <- map_data("world") %>% + as_tibble() %>% + regex_left_join(maps::iso3166, c(region = "mapname")) %>% + left_join(freedom_2020 %>% select(-region), by = c(a2 = "country_code")) %>% + filter(region != "Antarctica") + +world_map_freedom_2020 %>% + filter(metric == "Civil Liberties") %>% + ggplot(aes(long, lat, group = group)) + + geom_polygon(aes(fill = value)) + + coord_map(xlim = c(-180, 180)) + + scale_fill_gradient2(low = "blue", + high = "red", + midpoint = 3.5, + guide = guide_legend(reverse = TRUE)) + + ggthemes::theme_map() + + labs(fill = "Civil Liberties Rating", + title = "World Freedom Index: Civil Liberties", + subtitle = "In 2020") +``` + +```{r} +library(gganimate) + +world_map_freedom <- map_data("world") %>% + as_tibble() %>% + regex_left_join(maps::iso3166, c(region = "mapname")) %>% + left_join(freedom_joined %>% select(-region), by = c(a2 = "country_code")) %>% + filter(region != "Antarctica") + +world_map_freedom %>% + filter(metric == "Civil Liberties") %>% + ggplot(aes(long, lat, group = group)) + + geom_polygon(aes(fill = value)) + + coord_map(xlim = c(-180, 180)) + + scale_fill_gradient2(low = "blue", + high = "red", + midpoint = 3.5, + guide = guide_legend(reverse = TRUE)) + + ggthemes::theme_map() + + transition_manual(year) + + labs(fill = "Civil Liberties Rating", + title = "World Freedom Index: Civil Liberties ({ current_frame })") +``` + + + + +```{r} +freedom %>% + distinct(country, country_code) %>% + View() + +gdp_percap %>% + filter(str_detect(country, "Iran")) +``` +