-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_masks_txt_shp.py
425 lines (350 loc) · 14.4 KB
/
create_masks_txt_shp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
import numpy as np
from matplotlib.path import Path
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.basemap import Basemap,cm
import os
import unicodedata
from osgeo import ogr
####################################################################################
# Load a text file containing lists of vertices (space separated, one vertex per line)
# Creates matplotlib.path.Path objects representing the polygons.
# Lines starting with '#' are ignored
# A blank line indicates the end of a polygon, so multiple polygons can be defined with new lines in between
def load_polygons(fname):
polygons=[]
tmp=[]
# Import Polygons of mask in fname (separate polygons are separated by a blank line)
for line in open(fname,'r'):
if line[0]=='#':
# skip commented lines
continue
elif line.strip()!='':
# Add coordinates to list
tuple=line.strip().split()
tmp.append(tuple) # lon,lat
else: # blank line
# Finished reading data for this polygon
# create polygon path out of list of vertices
if tmp !=[]:
polygons.append(Path(np.array(tmp)))
tmp=[]
# If the file didn't end in a blank line, add the final polygon
if tmp !=[]:
polygons.append(Path(np.array(tmp)))
return polygons
###################################################################################
def load_grid(fname,latname='latitude0',lonname='longitude0'):
# load region grid and returns list of points (lon,lat)
with Dataset(fname,'r') as f:
# Load 1d arrays of lon and lat
lat=f.variables[latname][:]
lon=f.variables[lonname][:]
if len(lat.shape)==2:
# 2D lat and lon:
lonxx=lon
latyy=lat
else:
# Create 2D arrays of lon and lat
lonxx,latyy=np.meshgrid(lon,lat)
return lonxx,latyy
##################################################################################
# Function to create mask given polygons object and points array
def create_mask(polygons,points,nlat,nlon):
# Convert polygons to mask (true if inside the polygon region)
# add the masks for multiple polygons together
for i,polygon in enumerate(polygons):
# Determine if points inside polygon
tmp_mask = polygon.contains_points(points)
# Reshape mask to dimensions of the grid
tmp_mask=np.reshape(tmp_mask,[nlat,nlon])
try:
mask=tmp_mask | mask
except:
mask=tmp_mask
return ~mask # Invert the mask so true is outside the region
#################################################################################
# Wrapper function to return the mask givent the polygons file and grid file
def load_and_create_mask(f_polygons,f_grid,latname='latitude0',lonname='longitude0'):
# Load inputs and create mask
polygons=load_polygons(f_polygons)
# Load 2D lon and lat arrays for grid
lonxx,latyy=load_grid(f_grid,latname,lonname)
nlat,nlon=lonxx.shape
# Stack points into a N x 2 array (where N = nlat x nlon)
points = np.vstack((lonxx.flatten(),latyy.flatten())).T
# Call create_mask function for polygons and grid points
return create_mask(polygons,points,nlat,nlon)
#################################################################################
def add_to_text(fileh,polygon):
for coord in polygon.vertices:
fileh.write(str(coord[0])+' '+str(coord[1])+'\n')
fileh.write('\n')
#################################################################################
def load_shapefile(shapefile,fieldname,field_list=None):
print 'Loading Shapefile'
driver = ogr.GetDriverByName("ESRI Shapefile")
dataSource = driver.Open(shapefile, 0)
layer = dataSource.GetLayer()
counties={}
boundaries=[]
types=[]
for feature in layer:
try:
region=feature.GetField(fieldname)
except ValueError:
print feature.items()
raise Exception('Error, field "'+fieldname+'" does not exist in the shapefile')
print region
if field_list is not None and region not in field_list:
# Skip this region
continue
geometry=feature.GetGeometryRef()
boundary=eval(feature.geometry().Boundary().ExportToJson())
#geometry = json['geometry']
if boundary['type']=='LineString':
polygons=[Path(np.array(boundary['coordinates']))]
elif boundary['type']=='MultiLineString':
polygons=[]
for p in boundary['coordinates']:
polygons.append(Path(np.array(p)))
else:
print 'Error: unknown geometry'
continue
if region is not None and boundary is not None:
counties[region]=polygons
return counties
################################################################################
def create_netcdf(template,data,outname,template_var='pr'):
# create outfile object
outfile=Dataset(outname,'w')
# Create dimensions copied from template file
temp=template.variables[template_var]
for dim in temp.dimensions:
if dim[:3]=='lat' or dim[:3] =='lon':
leng=len(template.dimensions[dim])
outfile.createDimension(dim[:3],leng)
outfile.createVariable(dim[:3],'f',(dim[:3],))
outfile.variables[dim[:3]][:]=template.variables[dim][:]
#print template.variables[dim].__dict__
for att in template.variables[dim].ncattrs():
outfile.variables[dim[:3]].__setattr__(att,template.variables[dim].__getattribute__(att))
# Create data variable (named region_mask)
outfile.createVariable('region_mask','f',['lat','lon'])
outfile.variables['region_mask'][:]=(data-1)*-1
#outfile.flush()
outfile.close()
############################################################################
# Create a textfile of polygons, combining multiple fields from the shapefile
#
# Input Arguments:
# shapefile: path of shapefile containing polygons
# fieldname: attribute name in shapefile used to identify each field.
# field_list: (optional)- specify a list (subset) of fields to include in the mask, otherwise the mask combines all fields
# region_name: (optional)- name of region used in output filename
#
def create_combined_textfiles(shapefile, fieldname, field_list=None, region_name='region'):
# first create folder (if needed)
if not os.path.exists('masks_text'):
os.mkdir('masks_text')
# Load Shape file
regions=load_shapefile(shapefile,fieldname,field_list=field_list)
print 'Looping over regions and combining masks'
# Either loop over all regions, or list of regions specified by 'field_list'
if field_list == None:
field_list = regions.iterkeys()
with open('masks_text/mask_'+region_name+'.txt','w') as text_polygons:
for region in field_list:
print fieldname,'=',region
polygons = regions[region]
# Add polygon to text file
for p in polygons:
add_to_text(text_polygons,p)
###############################################################################
# Create a number of masks, from the shapefile, for a specific grid
# Area outside the polygons is True/1, area inside the polygons is False/0
#
#
# f_grid: filename of netcdf file contatining grid information
# latname, lonname, template_var: variable names for latitude, longitude and a template variable in f_grid netcdf file
# shapefile: path of shapefile containing polygons
# fieldname: attribute name in shapefile used to identify each field.
# field_list: (optional)- specify a list (subset) of fields to create masks for, otherwise masks will be created covering all fields
# plot, netcdf_out: (optional) booleans- whether or not to create output plot and/or output netcdf file
#
# Returns: dictionary of region_name:mask_array pairs
#
def create_masks(f_grid, shapefile, fieldname, field_list=None, latname='lat',lonname='lon',template_var='pr', plot=False, netcdf_out = False):
# first create folders (if needed)
if plot and not os.path.exists('plots'):
os.mkdir('plots')
if netcdf_out and not os.path.exists('masks_netcdf'):
os.mkdir('masks_netcdf')
# Load Shape file
regions=load_shapefile(shapefile,fieldname,field_list=field_list)
# Load lat lon grid (for mask)
lonxx,latyy=load_grid(f_grid,latname=latname,lonname=lonname)
nlat,nlon=lonxx.shape
# Update lon to be from -180 to 180
# NOTE: (this is only if the shapefile uses lat coordinates from -180-180 )
# Comment out otherwise
lonxx[lonxx>180]=lonxx[lonxx>180]-360
# Turn lat and lon into a list of coordinates
points = np.vstack((lonxx.flatten(),latyy.flatten())).T
if plot:
# Set up Basemap projection (may need fine tuning)
m = Basemap(projection = 'robin',lon_0=180)
xx,yy=m(lonxx,latyy) # basemap coordinates
# Either loop over all regions, or list of regions specified by 'field_list'
if field_list == None:
field_list = regions.iterkeys()
# Dictionary of masks
masks={}
# Do the loop
print 'Looping over regions and creating gridded masks'
for region in field_list:
region_ascii = unicodedata.normalize('NFKD',str(region).decode('utf-8')).encode('ascii','ignore')
print fieldname,'=',region
polygons = regions[region]
# Create mask out of polygon, matching points from grid
mask = create_mask(polygons,points,nlat,nlon)
# Add to dictionary
masks[region_ascii] = mask
if netcdf_out:
create_netcdf(Dataset(f_grid,'r'),mask,'masks_netcdf/mask_'+region_ascii+'.nc', template_var=template_var)
if plot:
plt.clf()
m.contourf(xx,yy,mask)
plt.colorbar()
m.drawcoastlines(linewidth=0.2)
m.drawcountries(linewidth=0.2)
plt.title('Mask: '+region_ascii)
plt.savefig('plots/mask_'+region_ascii+'.png')
return masks
#############################################################################
# Create a mask, combining multiple fields from a shapefile, for a specific grid
# Area outside the polygons is True/1, area inside the polygons is False/0
#
# Input Arguments:
# f_grid: filename of netcdf file contatining grid information
# latname, lonname, template_var: variable names for latitude, longitude and a template variable in f_grid netcdf file
# shapefile: path of shapefile containing polygons
# fieldname: attribute name in shapefile used to identify each field.
# field_list: (optional)- specify a list (subset) of fields to include in the mask, otherwise the mask combines all fields
# plot, netcdf_out: (optional) booleans- whether or not to create output plot and/or output netcdf file
# region_name: (optional)- name of region in output files
#
# Returns array of the combined mask
#
def create_mask_combined(f_grid,shapefile,fieldname,field_list=None,region_name='region',latname='lat',lonname='lon',template_var='pr', plot=False,netcdf_out=False):
# first create folders (if needed)
if plot and not os.path.exists('plots'):
os.mkdir('plots')
if netcdf_out and not os.path.exists('masks_netcdf'):
os.mkdir('masks_netcdf')
# Load Shape file
regions=load_shapefile(shapefile,fieldname,field_list=field_list)
# Load lat lon grid (for mask)
lonxx,latyy=load_grid(f_grid,latname=latname,lonname=lonname)
nlat,nlon=lonxx.shape
# Update lon to be from -180 to 180
# NOTE: (this is only if the shapefile uses lat coordinates from -180-180 )
# Comment out otherwise
lonxx[lonxx>180]=lonxx[lonxx>180]-360
# Turn lat and lon into a list of coordinates
points = np.vstack((lonxx.flatten(),latyy.flatten())).T
combined_mask = np.zeros([nlat,nlon])
if plot:
# Set up Basemap projection (may need fine tuning)
m = Basemap(projection = 'robin',lon_0=180)
xx,yy=m(lonxx,latyy) # basemap coordinates
plot_mask = np.zeros([nlat,nlon])
print 'Looping over regions and combining masks'
# Either loop over all regions, or list of regions specified by 'field_list'
if field_list == None:
field_list = regions.iterkeys()
i=1
for region in field_list:
print fieldname,'=',region
polygons = regions[region]
# Create mask out of polygon, matching points from grid
mask = create_mask(polygons,points,nlat,nlon)
print mask.shape
combined_mask = combined_mask + (mask-1)*-1
if plot:
plot_mask = plot_mask + (mask-1)*-i
i+=1
# Create netcdf for combined mask
if netcdf_out:
create_netcdf(Dataset(f_grid,'r'),combined_mask,'masks_netcdf/mask_'+region_name+'.nc',template_var=template_var)
if plot:
plt.clf()
m.contourf(xx,yy,plot_mask)
plt.colorbar()
m.drawcoastlines(linewidth=0.2)
m.drawcountries(linewidth=0.2)
plt.title('Mask: '+region_name)
plt.savefig('plots/mask_'+region_name+'.png')
return mask
#############################################################################
# Create a mask, from textfile for a specific grid
# Area outside the polygons is True/1, area inside the polygons is False/0
#
# Input Arguments:
# f_grid: (filename of netcdf file contatining grid information)
# textfile: path of text file containing coordinates of polygons
# latname, lonname, template_var: variable names for latitude, longitude and a template variable in f_grid netcdf file
# plot, netcdf_out: (optional) booleans- whether or not to create output plot and/or output netcdf file
#
# Returns: mask array
#
def create_mask_fromtext(f_grid, textfile, region_name='region', latname='lat', lonname='lon', template_var='pr', plot=False, netcdf_out=False):
# first create folders (if needed)
if plot and not os.path.exists('plots'):
os.mkdir('plots')
if netcdf_out and not os.path.exists('masks_netcdf'):
os.mkdir('masks_netcdf')
# Load Shape file
polygons=load_polygons(textfile)
# Load lat lon grid (for mask)
lonxx,latyy=load_grid(f_grid,latname=latname,lonname=lonname)
nlat,nlon=lonxx.shape
# Update lon to be from -180 to 180
# NOTE: (this is only if the shapefile uses lat coordinates from -180-180 )
# Comment out otherwise
lonxx[lonxx>180]=lonxx[lonxx>180]-360
# Turn lat and lon into a list of coordinates
points = np.vstack((lonxx.flatten(),latyy.flatten())).T
if plot:
# Set up Basemap projection (may need fine tuning)
m = Basemap(projection = 'robin',lon_0=180)
xx,yy=m(lonxx,latyy) # basemap coordinates
plot_mask = np.zeros([nlat,nlon])
# Create mask out of polygon, matching points from grid
mask = create_mask(polygons,points,nlat,nlon)
# Create netcdf for combined mask
if netcdf_out:
create_netcdf(Dataset(f_grid,'r'),mask,'masks_netcdf/mask_'+region_name+'.nc',template_var=template_var)
if plot:
plt.clf()
m.contourf(xx,yy,mask)
plt.colorbar()
m.drawcoastlines(linewidth=0.2)
m.drawcountries(linewidth=0.2)
plt.title('Mask: '+region_name)
plt.savefig('plots/mask_'+region_name+'.png')
return mask
#################################################################################
#
# Examples are below
#
if __name__=='__main__':
# Set up input files
f_grid='/Users/sihanli/Desktop/NorESM1-HAPPI.pr.All-Hist_monclim_ensmean.nc'
# Example: countries
#
# shapefile = '/Users/sihanli/Desktop/countries.shp'
# fieldname = 'NAME'
# # Create mask for each country in shapefile
# create_masks(f_grid,shapefile,fieldname,latname='lat',lonname='lon',template_var='pr',plot=True)