Skip to content

Commit

Permalink
part 3 of ass 8
Browse files Browse the repository at this point in the history
  • Loading branch information
serghiarista committed Feb 15, 2024
1 parent b6d92db commit c9100dc
Showing 1 changed file with 136 additions and 7 deletions.
143 changes: 136 additions & 7 deletions Lecture_10/Assignment_8/group_1_ass_8_2024.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "10b9d7f6",
"metadata": {},
"outputs": [],
Expand All @@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "ab8d0d69",
"metadata": {},
"outputs": [],
Expand All @@ -42,7 +42,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "3ca29162",
"metadata": {},
"outputs": [],
Expand All @@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "9a8e5168",
"metadata": {},
"outputs": [],
Expand All @@ -76,7 +76,17 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "a1061972",
"metadata": {},
"outputs": [],
"source": [
"base_Distrito"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07d1243d",
"metadata": {},
"outputs": [],
Expand All @@ -87,7 +97,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"id": "e29feb17",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -116,13 +126,132 @@
"3. Use geopandas to plot the number of cases in 2021 by the district using a continuous legend. Do not forget to indicate the color of NA values. Use this shapefile."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "45ec5dbe",
"metadata": {},
"outputs": [],
"source": [
"#Usamos la base filtrando el año 2021 y la renombramos\n",
"\n",
"base_Distrito_2021 = base_Distrito[base_Distrito[\"Año\"] == 2021]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e34df96",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#Checamos que los valores en las columnas Ubigeo_Distrito son únicas para ambas bases\n",
"\n",
"maps_DIST.Ubigeo_Distrito.is_unique\n",
"base_Distrito_2021.Ubigeo_Distrito.is_unique"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a3d4b9e",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#Checamos que tipos de datos son los valores en ambas bases, con énfasis en Ubigeo_Distrito para hacer el merge\n",
"\n",
"maps_DIST.dtypes\n",
"base_Distrito_2021.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38ce8fb0",
"metadata": {},
"outputs": [],
"source": [
"#Convertimos los valores de la columna Ubigeo_Distrito en la base_Distrito_2021 a string\n",
"\n",
"base_Distrito_2021.loc[:, 'Ubigeo_Distrito'] = base_Distrito_2021['Ubigeo_Distrito'].astype(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "603ea155",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"#Relizamos el merge, dejando todas las observaciones de la base maps_DIST, lo que nos genera valores NaN.\n",
"\n",
"dataset = pd.merge(maps_DIST, base_Distrito_2021, how = \"left\", on = \"Ubigeo_Distrito\" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aafaf7da",
"metadata": {},
"outputs": [],
"source": [
"#Para poder observar los distintos valores únicos en la columna Casos se realiza una tabla de frecuencias\n",
"\n",
"tabla_frecuencias_casos = dataset['Casos'].value_counts().reset_index()\n",
"tabla_frecuencias_casos.columns = ['Número de Casos', 'Frecuencia']\n",
"tabla_frecuencias_casos = tabla_frecuencias_casos.sort_values(by='Número de Casos')\n",
"\n",
"tabla_frecuencias_casos"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1658b99",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!pip install --upgrade mapclassify"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9dd96f04",
"metadata": {},
"outputs": [],
"source": []
"source": [
"#Se hace el mapeo considerando un colo diferente para los valores NaN\n",
"\n",
"import matplotlib.pyplot as plt\n",
"\n",
"fig, ax = plt.subplots(figsize=(20, 20))\n",
"dataset.plot(ax = ax, \n",
" column='Casos', \n",
" cmap= 'Reds', \n",
" figsize=(20, 20), \n",
" linestyle='--',\n",
" edgecolor='black', \n",
" legend = True, \n",
" scheme = \"User_Defined\", \n",
" missing_kwds= dict(color = \"#DADADB\",), \n",
" classification_kwds = dict( bins = [ 100, 200, 300, 400, 500, 1000, 1500, 2000, 2500, 3000 ] ), \n",
" legend_kwds=dict( loc='upper left',\n",
" bbox_to_anchor=(1.01, 1),\n",
" fontsize='x-large',\n",
" title= \"Número de casos por Distrito\", \n",
" title_fontsize = 'x-large', \n",
" frameon= False )\n",
" )"
]
}
],
"metadata": {
Expand Down

0 comments on commit c9100dc

Please sign in to comment.