From 0fe71c1884ba57af178dbcee80f049a4995d29f8 Mon Sep 17 00:00:00 2001 From: Florin Barnea <56800631+fbarnea@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:03:49 +0000 Subject: [PATCH] add average FA, filter for FA and ctry group --- content/Amnesty-dashboard.ipynb | 314 +++++++++++++++++--------------- 1 file changed, 169 insertions(+), 145 deletions(-) diff --git a/content/Amnesty-dashboard.ipynb b/content/Amnesty-dashboard.ipynb index 671056b..c090e45 100644 --- a/content/Amnesty-dashboard.ipynb +++ b/content/Amnesty-dashboard.ipynb @@ -30,6 +30,7 @@ "outputs": [], "source": [ "import pandas as pd\n", + "import numpy as np\n", "import geopandas as gpd\n", "from bokeh.io import output_notebook, show\n", "from bokeh.models import GeoJSONDataSource, Slider, CustomJS, LinearColorMapper, Range1d\n", @@ -37,12 +38,12 @@ "from bokeh.plotting import figure\n", "from bokeh.layouts import column, row\n", "import json\n", - "from bokeh.models import ColumnDataSource\n", "from bokeh.layouts import gridplot\n", "from bokeh.models import LinearAxis\n", "%pip install ipywidgets\n", "%pip install colorcet\n", "import ipywidgets as widgets\n", + "import colorcet\n", "output_notebook()" ] }, @@ -75,7 +76,41 @@ " elif country in EG:\n", " return 'orange'\n", " else:\n", - " return 'lightgray'" + " return 'lightgray'\n", + " \n", + "def get_country_group(country):\n", + " if country in AFR:\n", + " return 'AFR'\n", + " elif country in APAC:\n", + " return 'APAC'\n", + " elif country in GRULAC:\n", + " return 'GRULAC'\n", + " elif country in WEOG:\n", + " return 'WEOG'\n", + " elif country in EG:\n", + " return 'EG'\n", + " \n", + "def calculate_alignment(row):\n", + " if row[\"China Vote\"] == \"Abstaining\":\n", + " return 0\n", + " if row[\"Vote\"] == row[\"China Vote\"]:\n", + " return 1\n", + " else:\n", + " return -1\n", + " \n", + "\n", + "# Global widgets\n", + "fa_type=widgets.Dropdown(\n", + " options=[\"FT_ODI\", \"AEI_FDI\", \"BU_ODA\", \"ODI_MOFCOM\", \"Average FA\"], description=\"FA Type:\"\n", + ")\n", + "\n", + "country_group=widgets.Dropdown(\n", + " options = [\"All\", \"AFR\", \"APAC\", \"GRULAC\", \"WEOG\", \"EG\"], description=\"Country Group:\"\n", + ")\n", + "\n", + "fa_colour = {\n", + " \"AFR\":\"blue\", \"APAC\":\"green\", \"GRULAC\":\"yellow\", \"WEOG\":\"orange\", \"EG\":\"brown\", \"All\": \"grey\"\n", + "}" ] }, { @@ -94,12 +129,13 @@ "metadata": {}, "outputs": [], "source": [ - "import colorcet\n", "\n", "# Load your data\n", - "df = pd.read_csv(\"data/oda.csv\")\n", + "oda = pd.read_csv(\"data/oda.csv\")\n", + "votes = pd.read_csv(\"data/vote.csv\")\n", + "df = pd.concat([oda, votes], ignore_index=True)\n", + "\n", "\n", - "# Create a new column 'vote_nr' based on the values in the 'Vote' column\n", "china_votes = df[df[\"Country\"] == \"China\"][\n", " [\"Session number\", \"Text title\", \"Vote\"]\n", "].rename(columns={\"Vote\": \"China Vote\"})\n", @@ -114,11 +150,7 @@ "df[\"mapped_vote\"] = df[\"Vote\"].map(vote_mapping)\n", "df[\"mapped_china_vote\"] = df[\"China Vote\"].map(vote_mapping)\n", "\n", - "def calculate_alignment(row):\n", - " if row[\"Vote\"] == row[\"China Vote\"]:\n", - " return 1\n", - " else:\n", - " return -1\n", + "\n", "\n", "df[\"alignment_score\"] = df.apply(calculate_alignment, axis=1)\n", "df_aggregated = (\n", @@ -178,7 +210,8 @@ " \"ODI_MOFCOM\",\n", " ]\n", "]\n", - "merged_initial = merged[(merged[\"Year\"] == 2006) | merged[\"Year\"].isna()]\n", + "merged[\"Average FA\"] = merged[[\"FT_ODI\",\"AEI_FDI\",\"BU_ODA\",\"ODI_MOFCOM\"]].mean(axis=1, skipna=True)\n", + "merged_initial = merged[(merged[\"Year\"] == 2006)]\n", "# Convert to GeoJSON\n", "geojson = json.dumps(merged_initial.__geo_interface__)\n", "geojson_original = json.dumps(merged.__geo_interface__)\n", @@ -190,7 +223,7 @@ "y_range = Range1d(start=-90, end=90)\n", "\n", "\n", - "def render_layout(fa_source):\n", + "def render_layout(fa_type):\n", " tools = \"wheel_zoom,pan,reset\"\n", " va_map = figure(\n", " title=\"Voting Alignment with China\",\n", @@ -261,15 +294,15 @@ " \"xs\",\n", " \"ys\",\n", " source=geosource,\n", - " fill_color={\"field\": fa_source, \"transform\": fa_color_mapper},\n", + " fill_color={\"field\": fa_type, \"transform\": fa_color_mapper},\n", " line_color=\"black\",\n", " line_width=0.5,\n", " )\n", " tooltips = [\n", " (\"Country\", \"@Country\"),\n", " (\"Alignment Percent\", \"@alignment\"),\n", - " (\"Financial Assistance\", f\"@{fa_source}\"),\n", - " (\"Financial Assistance type\", fa_source),\n", + " (\"Financial Assistance\", f\"@{fa_type}\"),\n", + " (\"Financial Assistance type\", fa_type),\n", " ]\n", " fa_map.add_tools(HoverTool(tooltips=tooltips))\n", " layout = column(slider, row(va_map, fa_map))\n", @@ -278,10 +311,8 @@ "\n", "widgets.interact(\n", " render_layout,\n", - " fa_source=widgets.Dropdown(\n", - " options=[\"FT_ODI\", \"AEI_FDI\", \"BU_ODA\", \"ODI_MOFCOM\"], description=\"FA Source:\", value=\"FT_ODI\"\n", - " ),\n", - ")\n" + " fa_type=fa_type,\n", + ")" ] }, { @@ -289,8 +320,8 @@ "id": "8cc580a3", "metadata": {}, "source": [ - "## Comparison of country voting alignment with China\n", - "over time for all countries, absolute number of votes cast" + "## Country voting alignment % vs FA by FA type\n", + "over time for all countries" ] }, { @@ -301,7 +332,9 @@ "outputs": [], "source": [ "# Load your data\n", - "df = pd.read_csv(\"data/oda.csv\")\n", + "oda = pd.read_csv(\"data/oda.csv\")\n", + "votes = pd.read_csv(\"data/vote.csv\")\n", + "df = pd.concat([oda, votes], ignore_index=True)\n", "\n", "# Create a new column 'vote_nr' based on the values in the 'Vote' column\n", "\n", @@ -314,31 +347,25 @@ " suffixes=(\"\", \"_china\"),\n", " how=\"left\",\n", ")\n", + "df[\"Country Group\"] = df[\"Country\"].apply(get_country_group)\n", "\n", "# Create a new column 'mapped_vote' that maps three values to -1, 0, and 1\n", "vote_mapping = {\"Against\": -1, \"Abstaining\": 0, \"In Favour\": 1}\n", "df[\"mapped_vote\"] = df[\"Vote\"].map(vote_mapping)\n", "df[\"mapped_china_vote\"] = df[\"China Vote\"].map(vote_mapping)\n", "\n", - "\n", - "def calculate_alignment(row):\n", - " if row[\"Vote\"] == row[\"China Vote\"]:\n", - " return 1\n", - " else:\n", - " return -1\n", - "\n", + "df_fa = df.groupby([\"Year\", \"Country\"]).agg({\"FT_ODI\": \"first\",\n", + " \"AEI_FDI\":\"first\",\n", + " \"BU_ODA\":\"first\",\n", + " \"ODI_MOFCOM\":\"first\"})\n", "\n", "df[\"alignment_score\"] = df.apply(calculate_alignment, axis=1)\n", "df_aggregated = (\n", - " df.groupby([\"Year\", \"Country\"])\n", + " df.groupby([\"Year\", \"Country\", \"Country Group\"])\n", " .agg(\n", " {\n", " \"alignment_score\": \"sum\",\n", " \"Text title\": \"count\",\n", - " \"ODI_MOFCOM\": \"first\",\n", - " \"BU_ODA\": \"first\",\n", - " \"AEI_FDI\": \"first\",\n", - " \"FT_ODI\": \"first\",\n", " }\n", " )\n", " .reset_index()\n", @@ -350,7 +377,7 @@ "df_aggregated[\"alignment_percentage\"] = (\n", " df_aggregated[\"alignment_score\"] / df_aggregated[\"Number of votes\"]\n", ") * 100\n", - "\n", + "df_aggregated = df_aggregated.merge(df_fa, on=[\"Year\", \"Country\"], how=\"left\")\n", "# Load world geometry data (you might need to adjust the file path)\n", "years = pd.DataFrame({\"Year\": df[\"Year\"].unique()})\n", "countries = pd.DataFrame({\"Country\": world[\"ADMIN\"].unique()})\n", @@ -362,6 +389,7 @@ " [\n", " \"Year\",\n", " \"Country\",\n", + " \"Country Group\",\n", " \"ODI_MOFCOM\",\n", " \"BU_ODA\",\n", " \"AEI_FDI\",\n", @@ -372,25 +400,31 @@ " \"alignment_percentage\",\n", " ]\n", "]\n", - "cya = cya.astype({'Year': 'string'})\n", + "cya[\"Average FA\"] = cya[[\"ODI_MOFCOM\",\"BU_ODA\",\"AEI_FDI\",\"FT_ODI\"]].mean(skipna=True,axis=1)\n", "cya.dropna()\n", "\n", - "\n", - "list_plots = []\n", - "\n", - "\n", - "def plot(fa_type, nr_plots):\n", - " for country in cya[\"Country\"].unique().tolist():\n", - " temp = cya[cya[\"Country\"] == country]\n", + "def plot(fa_type, country_group, nr_plots):\n", + " list_plots = []\n", + " if country_group==\"All\":\n", + " filtered_df = cya\n", + " else:\n", + " filtered_df = cya[cya[\"Country Group\"]==country_group]\n", + " for country in filtered_df[\"Country\"].unique().tolist():\n", + " temp = filtered_df[filtered_df[\"Country\"] == country]\n", " if temp[fa_type].any():\n", + " x=temp[\"Year\"].to_numpy()\n", " plot = figure(width=450, height=350, title=country)\n", " plot.y_range = Range1d(start=-100, end=+100)\n", " plot.extra_y_ranges = {\"FA\": Range1d(start=0, end=temp[fa_type].max())}\n", " plot.add_layout(LinearAxis(y_range_name=\"FA\", axis_label=\"Financial Assistance\"), \"right\")\n", - " plot.circle(x=temp[\"Year\"].unique().tolist(), y=temp[\"alignment_percentage\"].tolist())\n", - " plot.line(x=temp[\"Year\"].unique().tolist(), y=temp[\"alignment_percentage\"].tolist(), legend_label=\"Alignment percentage\")\n", - " plot.square(x=temp[\"Year\"].unique().tolist(), y=temp[fa_type], y_range_name=\"FA\", fill_color=\"red\")\n", - " plot.line(x=temp[\"Year\"].unique().tolist(), y=temp[fa_type], y_range_name=\"FA\", line_color=\"red\", legend_label=\"Financial Assitance\")\n", + " plot.circle(x=x, y=temp[\"alignment_percentage\"].tolist())\n", + " ap_par = np.polyfit(x, temp[\"alignment_percentage\"].to_numpy(), 1, full=True)\n", + " ap_slope=ap_par[0][0]\n", + " ap_intercept=ap_par[0][1]\n", + " ap_y_predicted = [ap_slope*i + ap_intercept for i in x]\n", + " plot.line(x, ap_y_predicted, color=\"blue\", legend_label='y='+str(round(ap_slope,2))+'x+'+str(round(ap_intercept,2))) \n", + " plot.square(x=x, y=temp[fa_type], y_range_name=\"FA\", fill_color=\"red\", color=\"red\")\n", + " plot.line(x=x, y=temp[fa_type], y_range_name=\"FA\", line_color=\"red\", legend_label=\"Financial Assitance\")\n", " list_plots.append(plot)\n", " x = nr_plots\n", " grid = [list_plots[i : i + x] for i in range(0, len(list_plots), x)]\n", @@ -400,9 +434,8 @@ "\n", "widgets.interact(\n", " plot,\n", - " fa_type=widgets.Dropdown(\n", - " options=[\"FT_ODI\", \"AEI_FDI\", \"BU_ODA\", \"ODI_MOFCOM\"], description=\"FA Type:\"\n", - " ),\n", + " fa_type=fa_type,\n", + " country_group=country_group,\n", " nr_plots=widgets.BoundedIntText(\n", " value=4,\n", " min=2,\n", @@ -411,121 +444,112 @@ " description=\"Plots (2 to 10):\",\n", " disabled=False,\n", " ),\n", - ")\n", - "plot(\"FT_ODI\", 4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b68aa3a8-2d09-4c58-9db2-e0db0ec3198f", - "metadata": {}, - "outputs": [], - "source": [ - "# Group by 'Year' and 'Country' and calculate the sum of 'Chinainfav'\n", - "df_summary = df.groupby(['Year', 'Country']).agg({\n", - " 'In favour': 'sum',\n", - " 'Against': 'sum',\n", - " 'Chinainfav': lambda x: (x == 1).sum(),\n", - " 'Chinagainst': lambda x: (x == 1).sum()\n", - "}).reset_index()\n", - "df_summary['Total'] = df_summary['Chinainfav'] + df_summary['Chinagainst']\n", - "# Display the table\n", - "print(df_summary)" + ")\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "5bcb4db1-98ff-42ca-8762-8d0fb1ea7412", + "id": "dd27dace", "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import geopandas as gpd\n", - "from itertools import product\n", - "from bokeh.models import ColumnDataSource, CustomJS, LogColorMapper, Slider\n", - "from bokeh.plotting import figure, show, output_file\n", - "from bokeh.layouts import column\n", - "\n", - "# Read the data\n", - "data = pd.read_csv(\"data/oda.csv\")\n", - "\n", - "# Calculate total votes in favor per country per year\n", - "total_votes_favor = data.groupby(['Country', 'Year'])['In favour'].sum().reset_index()\n", - "\n", - "# Normalize data - ensure each country has an entry for each year\n", - "all_countries = total_votes_favor['Country'].unique()\n", - "all_years = total_votes_favor['Year'].unique()\n", - "\n", - "all_combinations = pd.DataFrame(product(all_countries, all_years), columns=['Country', 'Year'])\n", - "total_votes_favor = pd.merge(all_combinations, total_votes_favor, on=['Country', 'Year'], how='left')\n", - "total_votes_favor['In favour'].fillna(0, inplace=True)\n", - "\n", - "# Load world geometry data\n", - "world = gpd.read_file('data/ne_110m_admin_0_countries.shp')\n", - "\n", - "# Reproject to a projected CRS (e.g., World Mercator)\n", - "world = world.to_crs(epsg=3395)\n", - "\n", - "# Calculate the centroid of each country's geometry\n", - "world['centroid'] = world['geometry'].centroid\n", - "\n", - "# Convert back to geographic CRS for latitude and longitude\n", - "world = world.to_crs(epsg=4326)\n", "\n", - "# Extract the longitude and latitude of the centroid\n", - "world['lon'] = world['centroid'].x\n", - "world['lat'] = world['centroid'].y\n", + "# Load your data\n", + "oda = pd.read_csv(\"data/oda.csv\")\n", + "votes = pd.read_csv(\"data/vote.csv\")\n", + "df = pd.concat([oda, votes], ignore_index=True)\n", "\n", - "# Create a dictionary with country names as keys and coordinates as values\n", - "country_coords = world.set_index('ADMIN')['centroid'].apply(lambda x: {'lon': x.x, 'lat': x.y}).to_dict()\n", + "# Create a new column 'vote_nr' based on the values in the 'Vote' column\n", "\n", - "# Prepare data dictionary for ColumnDataSource\n", - "data_dict = {'x': [], 'y': [], 'name': [], 'votes': []}\n", - "for year in all_years:\n", - " year_data = total_votes_favor[total_votes_favor['Year'] == year]\n", - " for country in all_countries:\n", - " if country in country_coords:\n", - " data_dict['x'].append(country_coords[country]['lon'])\n", - " data_dict['y'].append(country_coords[country]['lat'])\n", - " data_dict['name'].append(country)\n", - " data_dict['votes'].append(year_data[year_data['Country'] == country]['In favour'].iloc[0])\n", - " data_dict[str(year)] = data_dict['votes'].copy()\n", + "china_votes = df[df[\"Country\"] == \"China\"][\n", + " [\"Session number\", \"Text title\", \"Vote\"]\n", + "].rename(columns={\"Vote\": \"China Vote\"})\n", + "df = df.merge(\n", + " china_votes,\n", + " on=[\"Session number\", \"Text title\"],\n", + " suffixes=(\"\", \"_china\"),\n", + " how=\"left\",\n", + ")\n", + "# Create a new column 'mapped_vote' that maps three values to -1, 0, and 1\n", + "vote_mapping = {\"Against\": -1, \"Abstaining\": 0, \"In Favour\": 1}\n", + "df[\"mapped_vote\"] = df[\"Vote\"].map(vote_mapping)\n", + "df[\"mapped_china_vote\"] = df[\"China Vote\"].map(vote_mapping)\n", + "df[\"Country Group\"] = df[\"Country\"].apply(get_country_group)\n", "\n", - "# Create a ColumnDataSource\n", - "source = ColumnDataSource(data_dict)\n", "\n", - "print(source)\n", + "df_fa = df.groupby([\"Year\", \"Country\", \"Country Group\"]).agg(\n", + " {\"FT_ODI\": \"first\", \"AEI_FDI\": \"first\", \"BU_ODA\": \"first\", \"ODI_MOFCOM\": \"first\"}\n", + ")\n", + "df_fa = df_fa.groupby([\"Year\", \"Country Group\"]).agg(\n", + " {\"FT_ODI\": \"sum\", \"AEI_FDI\": \"sum\", \"BU_ODA\": \"sum\", \"ODI_MOFCOM\": \"sum\"}\n", + ")\n", + "df_fa[\"Average FA\"] = df_fa[[\"ODI_MOFCOM\",\"BU_ODA\",\"AEI_FDI\",\"FT_ODI\"]].replace(0, np.nan).mean(axis=1, skipna=True)\n", + "df[\"alignment_score\"] = df.apply(calculate_alignment, axis=1)\n", + "df_aggregated = (\n", + " df.groupby([\"Year\", \"Country Group\"])\n", + " .agg(\n", + " {\n", + " \"alignment_score\": \"sum\",\n", + " \"Text title\": \"count\",\n", + " }\n", + " ).reset_index()\n", + ")\n", + "df_aggregated.rename(columns={\"Text title\": \"Number of votes\"}, inplace=True)\n", + "df_aggregated[\"alignment\"] = (\n", + " df_aggregated[\"alignment_score\"] / df_aggregated[\"Number of votes\"]\n", + ")\n", + "df_aggregated[\"alignment_percentage\"] = (\n", + " df_aggregated[\"alignment_score\"] / df_aggregated[\"Number of votes\"]\n", + ") * 100\n", + "df_aggregated = df_aggregated.merge(df_fa, on=[\"Year\", \"Country Group\"], how=\"left\")\n", + "# Load world geometry data (you might need to adjust the file path)\n", "\n", - "# Define color mapper\n", - "custom_colors = ['#f2f2f2', '#fee5d9', '#fcbba1', '#fc9272', '#fb6a4a', '#de2d26']\n", - "color_mapper = LogColorMapper(palette=custom_colors)\n", "\n", - "# Create figure\n", - "TOOLS = \"pan,wheel_zoom,reset,hover,save\"\n", - "p = figure(title=\"Total Votes in Favor per Country per Year\", tools=TOOLS, x_axis_location=None, y_axis_location=None)\n", - "p.grid.grid_line_color = None\n", + "def plot(fa_type, country_group):\n", + " plot=None\n", + " if country_group==\"All\":\n", + " filtered_df = df_aggregated\n", + " else:\n", + " filtered_df = df_aggregated[df_aggregated[\"Country Group\"]==country_group]\n", + " plot = figure(width=950, height=600, title=\"Overall FA vs Alignment\")\n", + " plot.y_range = Range1d(start=-100, end=+100)\n", + " plot.extra_y_ranges = {\"FA\": Range1d(start=0, end=filtered_df[fa_type].max())}\n", + " plot.add_layout(\n", + " LinearAxis(y_range_name=\"FA\", axis_label=\"Financial Assistance\"),\n", + " \"right\",\n", + " )\n", + " x = filtered_df[\"Year\"].to_numpy()\n", + " plot.vbar(\n", + " x=x,\n", + " top=filtered_df[fa_type],\n", + " width=0.9,\n", + " y_range_name=\"FA\",\n", + " fill_color=\"red\",\n", + " legend_label=f\"Financial assistance:{fa_type}\"\n", + " )\n", + " ap_par = np.polyfit(x, filtered_df[\"alignment_percentage\"].to_numpy(), 1, full=True)\n", + " ap_slope=ap_par[0][0]\n", + " ap_intercept=ap_par[0][1]\n", + " ap_y_predicted = [ap_slope*i + ap_intercept for i in x]\n", + " plot.line(x, ap_y_predicted, color=\"blue\", legend_label='y='+str(round(ap_slope,2))+'x+'+str(round(ap_intercept,2)))\n", + " plot.circle(\n", + " x=x,\n", + " y=filtered_df[\"alignment_percentage\"].tolist(),\n", + " legend_label=\"Alignment %\"\n", + " )\n", "\n", - "# Plot the countries\n", - "p.patches('x', 'y', source=source, fill_color={'field': 'votes', 'transform': color_mapper}, fill_alpha=0.8, line_color=\"black\", line_width=0.5)\n", + " show(plot)\n", "\n", - "# Slider\n", - "slider = Slider(start=min(all_years), end=max(all_years), value=min(all_years), step=1, title=\"Year\")\n", "\n", - "# JavaScript callback for the slider\n", - "callback = CustomJS(args=dict(source=source, slider=slider), code=\"\"\"\n", - "var data = source.data;\n", - "var year = slider.value.toString();\n", - "data['votes'] = data[year];\n", - "source.change.emit();\n", - "\"\"\")\n", "\n", - "slider.js_on_change('value', callback)\n", "\n", - "# Show plot\n", - "output_notebook()\n", - "show(column(slider, p))" + "widgets.interact(\n", + " plot,\n", + " fa_type=fa_type,\n", + " country_group=country_group\n", + ")\n", + "df_aggregated.head()" ] }, {