Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assign geo-ids for LBY (issue 35) #46

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,531 changes: 4,531 additions & 0 deletions files_for_db/geo/lby_geo.csv

Large diffs are not rendered by default.

Binary file added files_for_db/shps/lby.zip
Binary file not shown.
1 change: 1 addition & 0 deletions files_for_db/shps/lby/lby.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ISO-8859-1
Binary file added files_for_db/shps/lby/lby.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions files_for_db/shps/lby/lby.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file added files_for_db/shps/lby/lby.shp
Binary file not shown.
Binary file added files_for_db/shps/lby/lby.shx
Binary file not shown.
66 changes: 66 additions & 0 deletions scripts/geo/lby_geo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import geopandas as gpd
import pandas as pd
import shutil
import os

from utils import *

# read in excel file
data = pd.read_excel("reach_lby_nationalschoolsassessment_complete_db_reliable__not_reliable_18oct2012.xlsx")

# only use reliable data
data = data[data["RELIABLE"] == "Reliable"]

# select necessary data and rename columns
data = data[["QI_eSchoolID", "QI_fSchoolName", "QII_4Street", "QII_5Longitude", "QII_6Latitude"]]
data.columns = ["deped_id", "school_name", "address", "latitude", "longitude"] # lat and long columns intentionally switched

# create geo ids
data.reset_index(inplace=True)
data["geo_id"] = data['index'].apply(lambda x: 'LBY-{0:0>6}'.format(x))

# create adm0
data["adm0"] = "LBY"

# add other adms
longs = data["longitude"].values
lats = data["latitude"].values
cols = ["index", "deped_id", "school_name", "address", "latitude", "longitude", "geo_id", "adm0"]
for adm in range(1, 4):
try:
cols += ["adm" + str(adm)]
downloadGB("LBY", str(adm), ".")
shp = gpd.read_file(getGBpath("LBY", f"ADM{str(adm)}", "."))
data = gpd.GeoDataFrame(data, geometry = gpd.points_from_xy(data.longitude, data.latitude))

data = data.set_crs("EPSG:4326")

if adm == 1:
data = gpd.clip(data, shp)
longs = data["longitude"].values
lats = data["latitude"].values

data = gpd.tools.sjoin(data, shp, how = "left").rename(columns = {"shapeName": "adm" + str(adm)})[cols]
data["longitude"] = longs
data["latitude"] = lats
print(data.head())
except Exception as e:
data["adm" + str(adm)] = None
print(e)

# export as csv
data.to_csv("lby_geo.csv", index=False)

# export as shapefiles
gdf = gpd.GeoDataFrame(
data,
geometry = gpd.points_from_xy(
x = data.longitude,
y = data.latitude,
crs = 'EPSG:4326', # or: crs = pyproj.CRS.from_user_input(4326)
)
)
if not os.path.exists("../../files_for_db/shps/lby/"):
os.mkdir("../../files_for_db/shps/lby/")
gdf.to_file("../../files_for_db/shps/lby/lby.shp", index = False)
shutil.make_archive("../../files_for_db/shps/lby", 'zip', "../../files_for_db/shps/lby")
Loading