-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpre_grey_rgb.py
66 lines (62 loc) · 3.1 KB
/
pre_grey_rgb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#%% import packages
import numpy as np
import os
join = os.path.join
from skimage import io, transform
from tqdm import tqdm
# convert 2D data to npy files, including images and corresponding masks
modality = 'dd' # e.g., 'Dermoscopy
anatomy = 'dd' # e.g., 'SkinCancer'
img_name_suffix = '.png'
gt_name_suffix = '.png'
prefix = modality + '_' + anatomy + '_'
img_path = 'path to /images' # path to the images
gt_path = 'path to/labels' # path to the corresponding annotations
npz_path = 'path to/data/npy/' + prefix[:-1] # save npy path e.g., MedSAM/data/npy/; don't miss the `/`
os.makedirs(join(npz_path, "gts"), exist_ok=True)
os.makedirs(join(npz_path, "imgs"), exist_ok=True)
names = sorted(os.listdir(gt_path))
print(f'ori \# files {len(names)=}')
# set label ids that are excluded
remove_label_ids = []
tumor_id = None # only set this when there are multiple tumors in one image; convert semantic masks to instance masks
label_id_offset = 0
do_intensity_cutoff = False # True for grey images
#%% save preprocessed images and masks as npz files
for name in tqdm(names):
image_name = name.split(gt_name_suffix)[0] + img_name_suffix
gt_name = name
npy_save_name = prefix + gt_name.split(gt_name_suffix)[0]+save_suffix
gt_data_ori = np.uint8(io.imread(join(gt_path, gt_name)))
# remove label ids
for remove_label_id in remove_label_ids:
gt_data_ori[gt_data_ori==remove_label_id] = 0
# label tumor masks as instances and remove from gt_data_ori
if tumor_id is not None:
tumor_bw = np.uint8(gt_data_ori==tumor_id)
gt_data_ori[tumor_bw>0] = 0
# label tumor masks as instances
tumor_inst, tumor_n = cc3d.connected_components(tumor_bw, connectivity=26, return_N=True)
# put the tumor instances back to gt_data_ori
gt_data_ori[tumor_inst>0] = tumor_inst[tumor_inst>0] + label_id_offset + 1
# crop the ground truth with non-zero slices
image_data = io.imread(join(img_path, image_name))
if np.max(image_data) > 255.0:
image_data = np.uint8((image_data-image_data.min()) / (np.max(image_data)-np.min(image_data))*255.0)
if len(image_data.shape) == 2:
image_data = np.repeat(np.expand_dims(image_data, -1), 3, -1)
assert len(image_data.shape) == 3, 'image data is not three channels: img shape:' + str(image_data.shape) + image_name
# convert three channel to one channel
if image_data.shape[-1] > 3:
image_data = image_data[:,:,:3]
# image preprocess start
if do_intensity_cutoff:
lower_bound, upper_bound = np.percentile(image_data[image_data>0], 0.5), np.percentile(image_data[image_data>0], 99.5)
image_data_pre = np.clip(image_data, lower_bound, upper_bound)
image_data_pre = (image_data_pre - np.min(image_data_pre))/(np.max(image_data_pre)-np.min(image_data_pre))*255.0
image_data_pre[image_data==0] = 0
image_data_pre = np.uint8(image_data_pre)
else:
# print('no intensity cutoff')
image_data_pre = image_data.copy()
np.savez_compressed(join(npz_path, prefix + gt_name.split(gt_name_suffix)[0]+'.npz'), imgs=image_data_pre, gts=gt_data_ori)