-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdata_preprocessing.py
104 lines (67 loc) · 2.36 KB
/
data_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pickle
import random
import numpy as np
import cv2
import os
TRAIN_DIR = "C:/Users/RIDZZ ZOLDYCK/Desktop/pneumonia_data/chest_xray/train"
#VAL_DIR = "C:/Users/RIDZZ ZOLDYCK/Desktop/pneumonia_data/chest_xray/val"
TEST_DIR = "C:/Users/RIDZZ ZOLDYCK/Desktop/pneumonia_data/chest_xray/test"
CATEGORIES = ["NORMAL", "PNEUMONIA"]
img_size = 150
train_data = []
test_data = []
#val_data = []
def create_training_data():
for category in CATEGORIES:
path = os.path.join(TRAIN_DIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
img_arr = cv2.resize(img_arr, (img_size, img_size))
train_data.append([img_arr, class_num])
def create_test_data():
for category in CATEGORIES:
path = os.path.join(TEST_DIR, category)
class_num = CATEGORIES.index(category)
for img in os.listdir(path):
img_arr = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
img_arr = cv2.resize(img_arr, (img_size, img_size))
test_data.append([img_arr, class_num])
create_training_data()
create_test_data()
print(" \n\nNumber of images in Training Dataset : ", len(train_data))
print(" \n\nNumber of images in Test Dataset : ", len(test_data))
random.shuffle(train_data)
random.shuffle(test_data)
print(" \n\n Some labels after shuffling : ")
print("\nTrain Data : ")
for sample in train_data[:5]:
print(sample[1])
print("\nTest Data : ")
for sample in test_data[:5]:
print(sample[1])
x_train = []
y_train = []
x_test = []
y_test = []
for features, label in train_data:
x_train.append(features)
y_train.append(label)
for features, label in test_data:
x_test.append(features)
y_test.append(label)
x_train = np.array(x_train).reshape(-1, img_size, img_size, 1)
x_test = np.array(x_test).reshape(-1, img_size, img_size, 1)
pickle_out = open("X_TRAIN.pickle", "wb")
pickle.dump(x_train, pickle_out)
pickle_out.close()
pickle_out = open("X_TEST.pickle", "wb")
pickle.dump(x_test, pickle_out)
pickle_out.close()
pickle_out = open("Y_TRAIN.pickle", "wb")
pickle.dump(y_train, pickle_out)
pickle_out.close()
pickle_out = open("Y_TEST.pickle", "wb")
pickle.dump(y_test, pickle_out)
pickle_out.close()
print(" \n\n Features and labels of all datasets have been saved ")