Skip to content

Commit

Permalink
Merge pull request #27 from heatherbaier/23-guatemala-gua-create-ids
Browse files Browse the repository at this point in the history
Creating Guatemala IDs (issue 23)
  • Loading branch information
heatherbaier authored Feb 21, 2024
2 parents 14d9359 + 1ab509a commit c81aa54
Show file tree
Hide file tree
Showing 8 changed files with 57,394 additions and 0 deletions.
57,311 changes: 57,311 additions & 0 deletions files_for_db/geo/gua_geo.csv

Large diffs are not rendered by default.

Binary file added files_for_db/shps/gua.zip
Binary file not shown.
1 change: 1 addition & 0 deletions files_for_db/shps/gua/gua.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ISO-8859-1
Binary file added files_for_db/shps/gua/gua.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions files_for_db/shps/gua/gua.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file added files_for_db/shps/gua/gua.shp
Binary file not shown.
Binary file added files_for_db/shps/gua/gua.shx
Binary file not shown.
81 changes: 81 additions & 0 deletions scripts/geo/gua_geo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import geopandas as gpd
import pandas as pd
import shutil
import os

from utils import *


# import all data
gua_1314 = pd.read_excel("../../data/GUA/establecimientos_2013-2014.xlsx")
gua_1516 = pd.read_excel("../../data/GUA/establecimientos_2015-2016.xlsx")
gua_1718 = pd.read_excel("../../data/GUA/establecimientos_2017-2018.xlsx")
gua_1920 = pd.read_excel("../../data/GUA/establecimientos_2019-2020.xlsx")
gua_2122 = pd.read_excel("../../data/GUA/establecimientos_2021-2022.xlsx")

# combine into one dataframe
gua_all = pd.concat([gua_1314, gua_1516, gua_1718, gua_1920, gua_2122])

# get rid of duplicates
gua_all.sort_values(by="Latitud", inplace=True)
gua_all.drop_duplicates(subset=["CodigoEst"], inplace=True)
gua_all.sort_values(by="CodigoEst", inplace=True)
gua_all.reset_index(inplace=True)

# select and rename relevant columns
gua_all = gua_all[["Departamento", "Municipio", "CodigoEst", "NombreEstablecimiento", "direccion", "Latitud", "Longitud"]]
gua_all.columns = ["adm1_temp", "adm2_temp", "deped_id", "school_name", "address", "latitude", "longitude"]

# create geo_ids
gua_all.reset_index(inplace=True)
gua_all["geo_id"] = gua_all['index'].apply(lambda x: 'GUA-{0:0>6}'.format(x))

# add adm0
gua_all["adm0"] = "GUA"

# add other adms
longs = gua_all["longitude"].values
lats = gua_all["latitude"].values
cols = ["index", "adm1_temp", "adm2_temp", "deped_id", "school_name", "address", "latitude", "longitude", "geo_id", "adm0"]
for adm in range(1, 4):
try:
cols += ["adm" + str(adm)]
downloadGB("GTM", str(adm), ".")
shp = gpd.read_file(getGBpath("GTM", f"ADM{str(adm)}", "."))
gua_all = gpd.GeoDataFrame(gua_all, geometry = gpd.points_from_xy(gua_all.longitude, gua_all.latitude))
gua_all = gpd.tools.sjoin(gua_all, shp, how = "left").rename(columns = {"shapeName": "adm" + str(adm)})[cols]
gua_all["longitude"] = longs
gua_all["latitude"] = lats
print(gua_all.head())
except Exception as e:
gua_all["adm" + str(adm)] = None
print(e)

# compare to adms in the dataset and use adms originally in dataset if no lat/long
gua_all["adm1"] = gua_all["adm1"].fillna((gua_all["adm1_temp"]).str.title())
gua_all["adm2"] = gua_all["adm2"].fillna((gua_all["adm2_temp"]).str.title())

# format other columns
gua_all["school_name"] = gua_all["school_name"].str.title()
gua_all["address"] = gua_all["address"].str.title()

# reorder final columns
gua_all = gua_all[["geo_id", "deped_id", "school_name", "address", "adm0", "adm1", "adm2", "adm3", "longitude", "latitude"]]

# create csv
gua_all.to_csv("../../files_for_db/geo/gua_geo.csv", index=False)


# create shp files
gdf = gpd.GeoDataFrame(
gua_all,
geometry = gpd.points_from_xy(
x = gua_all.longitude,
y = gua_all.latitude,
crs = 'EPSG:4326', # or: crs = pyproj.CRS.from_user_input(4326)
)
)
if not os.path.exists("../../files_for_db/shps/gua/"):
os.mkdir("../../files_for_db/shps/gua/")
gdf.to_file("../../files_for_db/shps/gua/gua.shp", index = False)
shutil.make_archive("../../files_for_db/shps/gua", 'zip', "../../files_for_db/shps/gua")

0 comments on commit c81aa54

Please sign in to comment.