-
Notifications
You must be signed in to change notification settings - Fork 1
/
mpl_divideimg_234_water_new.py
190 lines (157 loc) · 6.65 KB
/
mpl_divideimg_234_water_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
"""
MAPLE Workflow
(2) Script that breaks the image into smaller chunks (tiles)
Project: Permafrost Discovery Gateway: Mapping Application for Arctic Permafrost Land Environment(MAPLE)
PI : Chandi Witharana
Author : Rajitha Udwalpola
"""
import cv2
import h5py
import numpy as np
import os
from mpl_config import MPL_Config
from osgeo import gdal
def divide_image(
config: MPL_Config,
input_image_path: str, # the image directory
target_blocksize: int, # the crop size
file1: str,
file2: str,
): # cropped tile meta info, cropped file
"""
Script that breaks the image into smaller chunks (tiles)
The script will zero out the water based on the water mask created for this input file
and break the file into multiple tiles (target block x target block)
NOTE: Cropped files are packed into one h5p file
file1 : Keeps meta info on how to load file 2
file2 : Cropped tiles
"""
worker_root = config.WORKER_ROOT
water_dir = config.WATER_MASK_DIR
# get the image name from path
new_file_name = input_image_path.split("/")[-1]
new_file_name = new_file_name.split(".")[0]
water_mask_dir = os.path.join(water_dir, new_file_name)
water_mask = os.path.join(water_mask_dir, "%s_watermask.tif" % new_file_name)
MASK = gdal.Open(water_mask)
mask_band = MASK.GetRasterBand(1)
mask_arry = mask_band.ReadAsArray()
print(input_image_path)
# convert the original image into the geo cordinates for further processing using gdal
# https://gdal.org/tutorials/geotransforms_tut.html
# GT(0) x-coordinate of the upper-left corner of the upper-left pixel.
# GT(1) w-e pixel resolution / pixel width.
# GT(2) row rotation (typically zero).
# GT(3) y-coordinate of the upper-left corner of the upper-left pixel.
# GT(4) column rotation (typically zero).
# GT(5) n-s pixel resolution / pixel height (negative value for a north-up image).
IMG1 = gdal.Open(input_image_path)
gt1 = IMG1.GetGeoTransform()
ulx, x_resolution, _, uly, _, y_resolution = gt1
# ---------------------- crop image from the water mask----------------------
# dot product of the mask and the orignal data before breaking it for processing
# Also band 2 3 and 4 are taken because the 4 bands cannot be processed by the NN learingin algo
# Need to make sure that the training bands are the same as the bands used for inferencing.
#
img_band2 = IMG1.GetRasterBand(2)
img_band3 = IMG1.GetRasterBand(3)
img_band4 = IMG1.GetRasterBand(4)
final_array_2 = img_band2.ReadAsArray()
final_array_3 = img_band3.ReadAsArray()
final_array_4 = img_band4.ReadAsArray()
final_array_2 = np.multiply(final_array_2, mask_arry)
final_array_3 = np.multiply(final_array_3, mask_arry)
final_array_4 = np.multiply(final_array_4, mask_arry)
# files to store the masked and divided data. Will be stored in a directory named with the orignal
# file and sub directory <divided_img>
# Two object files are created one for the parameters <image_param> and the other for the data <image_data>.
f1 = h5py.File(file1, "w")
f2 = h5py.File(file2, "w")
rows_input = IMG1.RasterYSize
cols_input = IMG1.RasterXSize
# create empty grid cell
transform = IMG1.GetGeoTransform()
# ulx, uly is the upper left corner
ulx, x_resolution, _, uly, _, y_resolution = transform
# ---------------------- Divide image (tile) ----------------------
overlap_rate = 0.2
block_size = target_blocksize
ysize = rows_input
xsize = cols_input
image_count = 0
# Load the data frame
from collections import defaultdict
dict_ij = defaultdict(dict)
dict_n = defaultdict(dict)
tile_count = 0
y_list = range(0, ysize, int(block_size * (1 - overlap_rate)))
x_list = range(0, xsize, int(block_size * (1 - overlap_rate)))
dict_n["total"] = [len(y_list), len(x_list)]
# ---------------------- Find each Upper left (x,y) for each divided images ----------------------
# ***-----------------
# ***
# ***
# |
# |
#
for id_i, i in enumerate(y_list):
# don't want moving window to be larger than row size of input raster
if i + block_size < ysize:
rows = block_size
else:
rows = ysize - i
# read col
for id_j, j in enumerate(x_list):
if j + block_size < xsize:
cols = block_size
else:
cols = xsize - j
# get block out of the whole raster
# todo check the array values is similar as ReadAsArray()
band_1_array = final_array_4[i : i + rows, j : j + cols]
band_2_array = final_array_2[i : i + rows, j : j + cols]
band_3_array = final_array_3[i : i + rows, j : j + cols]
# filter out black image
if (
band_3_array[0, 0] == 0
and band_3_array[0, -1] == 0
and band_3_array[-1, 0] == 0
and band_3_array[-1, -1] == 0
):
continue
dict_ij[id_i][id_j] = tile_count
dict_n[tile_count] = [id_i, id_j]
# stack three bands into one array
img = np.stack((band_1_array, band_2_array, band_3_array), axis=2)
cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX)
img = img.astype(np.uint8)
B, G, R = cv2.split(img)
out_B = cv2.equalizeHist(B)
out_R = cv2.equalizeHist(R)
out_G = cv2.equalizeHist(G)
final_image = cv2.merge((out_B, out_G, out_R))
# Upper left (x,y) for each images
ul_row_divided_img = uly + i * y_resolution
ul_col_divided_img = ulx + j * x_resolution
data_c = np.array(
[i, j, ul_row_divided_img, ul_col_divided_img, tile_count]
)
image_count += 1
# Add tile into the data object data and the paremeters
f1.create_dataset(f"image_{image_count}", data=final_image)
f2.create_dataset(f"param_{image_count}", data=data_c)
tile_count += 1
# --------------- Store all the title as an object file
values = np.array([x_resolution, y_resolution, image_count])
f2.create_dataset("values", data=values)
import pickle
db_file_path = os.path.join(worker_root, "neighbors/%s_ij_dict.pkl" % new_file_name)
dbfile = open(db_file_path, "wb")
pickle.dump(dict_ij, dbfile)
dbfile.close()
db_file_path = os.path.join(worker_root, "neighbors/%s_n_dict.pkl" % new_file_name)
dbfile = open(db_file_path, "wb")
pickle.dump(dict_n, dbfile)
dbfile.close()
f1.close()
f2.close()