forked from bunnie/iris-layout
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcnn1.py
350 lines (267 loc) · 15.5 KB
/
cnn1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import matplotlib.pyplot as plt
import numpy as np
import iris_dataset
#import cifar
# Dataset generation notes
# For any brand new block, you need to extract an idealized PNG of the layout from design
# data (e.g., GDS file)
# 1. Extract the GDS of the layer & sub-block of interest using klayout. Put it in
# imaging/blockname-layer.png, e.g. imaging/wrapped_snn_network-poly.gds. The techfile
# argument is required and is "--tech sky130" for the open source data set. Note that
# the default layer is "poly" (which is correct for SKY130)
# 2. Run "gds_to_png.py". This will automatically search for all .gds files in imaging/
# and generate idealized versions of the layers for reference alignment.
#
# With the block image, GDS data and idealized layout image, you can now create the data set:
# 1. Run "extract_dataset.py" with the names of the blocks that you want to generate
# data for, i.e. "--names wrapped_snn_network"
#
# This will generate a .pkl file with the dataset, and a .meta file with a description
# of the training data set.
# Current strategy:
# Just try to tell between ff, logic, fill, other
# - Reduce input channels from RGB to just gray - how to do that? This
# should reduce the # of parameters we need to tune
# - Refine the CNN to match our use case: right now the intermediate layers
# are optimized for a task that's not ours (handwriting recognition)
# - Maybe need to eliminate extremely small fill from the training set?
# - Alternatively, do we specify a cell size? Need to think about what
# that even means.
# - Maybe what we want in the end is a classifier that
# given a patch of image, guesses how many of what type of cell are in
# a region with a certain probability?
# - The underlying issue is that cell sizes are quite different in scale,
# and the size of the cell matters. The problem is the current CNN
# is designed explicitly to disregard scale (written numbers have
# the same meaning regardless of size), so again, need to tune the CNN
# to throw away the part that allows us to scale an object.
PATH = './iris_net.pth'
# functions to show an image
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
#this is a preliminary confidence score based on how far apart the top two values are
def confidence_score(probabilities):
"""Computes a confidence score based on the top-2 probabilities."""
top_prob = probabilities[0, 0].item() # Highest probability
second_prob = probabilities[0, 1].item() # Second highest probability
confidence = top_prob - second_prob # Difference as a confidence score
return confidence
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 6, 5) #changed to grey scale from self.conv1 = nn.Conv2d(3, 6, 5)
#Input Channels (3): The model expects 3-channel input images (RGB).
#Output Channels (6): Produces 6 feature maps by applying 6 filters.
#Kernel Size (5): Each filter is 5x5 pixels.
#Operation: Extracts low-level features such as edges and textures.
#Q what are we normalized to? what are the dimensions now?
self.pool = nn.MaxPool2d(2, 2)
#Pooling Type: Max Pooling.
# Kernel Size (2x2): Each pooling operation considers a 2x2 region.
# Stride (2): Moves the pooling window 2 pixels at a time.
# Operation: Reduces the spatial dimensions of the image by half (downsampling), retaining only the most prominent features.
self.conv2 = nn.Conv2d(6, 16, 5)
#Input Channels (6): Takes the 6 feature maps produced by conv1.
#Output Channels (16): Produces 16 feature maps by applying 16 filters.
#Kernel Size (5): Each filter is 5x5 pixels.
#Operation: Extracts higher-level features from the downsampled data.
#self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc1 = nn.Linear(1040, 120) # BUT WHY
#Input Features (1040): The flattened feature map from the convolutional and pooling layers.
#Reason for 1040: The dimensions of the feature map depend on the input image size, convolutional layer settings, and pooling steps.
#If the input image size is fixed, this value is manually calculated.
#Output Features (120): Projects the input into a 120-dimensional space for further processing.
self.fc2 = nn.Linear(120, 84)
# Input Features (120): Takes the 120 features from fc1.
# Output Features (84): Reduces dimensionality further.
# Additional fully connected layer for image size (width & height)
self.size_fc = nn.Linear(2, 32)
# Final classification layer (concatenates image & size features)
self.fc3 = nn.Linear(84 + 32, 3)
# Input Features (84): Takes the features from fc2.
# Othe rinput (32): The result of the FCC for image size
#Output Features (3): Outputs scores for 3 classes. Each score represents how likely the input belongs to a particular class.
def forward(self, x, size):
x = self.pool(F.relu(self.conv1(x)))
#Applies the first convolutional layer to the input tensor.
#Applies the ReLU activation function element-wise.
x = self.pool(F.relu(self.conv2(x)))
#Applies the second convolutional layer to the tensor from the previous step.
x = torch.flatten(x, 1) # flatten all dimensions except batch
#Converts the multi-dimensional tensor into a 1D tensor for fully connected layers.
#Flattening starts from the second dimension (1), leaving the batch dimension (0) intact.
#Input shape: [batch_size, 16, 5, 5].
#Output shape: [batch_size, 16 * 5 * 5] (e.g., [4, 400]).
x = F.relu(self.fc1(x))
#Fully connected layer that transforms the flattened features into a 120-dimensional vector.
#Input shape: [batch_size, 400].
#Output shape: [batch_size, 120].
#Applies ReLU activation for non-linearity.
x = F.relu(self.fc2(x))
size = size.view(-1, 2) # Ensure it always has batch dimension
size_features = F.relu(self.size_fc(size))
# Ensure correct batch size
if size_features.shape[0] != x.shape[0]:
size_features = size_features.expand(x.shape[0], -1) # Match batch size
#print(f"x.shape: {x.shape}, size_features.shape: {size_features.shape}")
x = torch.cat((x, size_features), dim=1) # Now safe to concatenate
x = self.fc3(x)
#Maps the 84-dimensional vector to 3 output scores, one for each class.
#Input shape: [batch_size, 84].
#Output shape: [batch_size, 3].
return x
if __name__ == "__main__":
start_time = time.time()
#Transforms: Prepares the input images for training by converting them to tensors and normalizing them.
#Batch Size: Sets the number of samples per batch for training and testing to 4.
transform = transforms.Compose([
transforms.Grayscale(num_output_channels=1), # Convert to grayscale
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)) # Adjust mean & std for single channel
# transforms.Normalize((0.5,), (0.5,)) # Adjust mean & std for single channel
])
batch_size = 4
#debugset = cifar.CIFAR10(root='./data', train=True, download=True, transform=transform)
#debugloader = torch.utils.data.DataLoader(debugset, batch_size=batch_size, shuffle=True, num_workers=2)
import pickle
from typing import Any
data: Any = []
targets = []
trainset = iris_dataset.Iris(root='./imaging', train=True,
download=True, transform=transform)
print(len(trainset.classes))
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
testset = iris_dataset.Iris(root='./imaging', train=False,
download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
shuffle=False, num_workers=2)
#Loads the training and testing datasets using the custom iris_dataset class.
classes = ('ff', 'logic', 'fill')
#DataLoader: Wraps the datasets for easy iteration in batches.
#Defines class labels.
dataiter = iter(trainloader)
images, labels, image_sizes = next(dataiter)
# print images
print('Image check: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))
#imshow(torchvision.utils.make_grid(images)) #commented out to run faster for testing
if True:
net = Net()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(f"this is the device {device}")
net.to(device)
criterion = nn.CrossEntropyLoss() #this is the loss function!!!! LogSoftmax and Negative Log-Likelihood Loss
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
#Training Loop
for epoch in range(4): # loop over the dataset multiple times
print(f"Entering epoch {epoch}")
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
# inputs, labels = data
#inputs, labels = data[0].to(device), data[1].to(device)
inputs, labels, image_sizes = data[0].to(device), data[1].to(device), data[2].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs, image_sizes)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
#Logs the average loss every 2000 mini-batches.
print('Finished Training')
torch.save(net.state_dict(), PATH)
if True:
net = Net()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)
net.load_state_dict(torch.load(PATH, weights_only=True))
net.to(device)
dataiter = iter(testloader)
images, labels, image_sizes = next(dataiter)
# print images
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))
#imshow(torchvision.utils.make_grid(images)) #commented out to run faster for testing
images_cuda = images.to(device)
image_sizes = data[2].to(device) # Extract image sizes
outputs = net(images_cuda, image_sizes) #Added image sizes
probabilities = torch.softmax(outputs, dim=1) # Convert logits to probabilities
sorted_probs, sorted_indices = torch.sort(probabilities, descending=True)
# Compute confidence score for each image
confidence_scores = [confidence_score(sorted_probs[i].unsqueeze(0)) for i in range(len(sorted_probs))]
print("Ranked Predictions with Confidence:")
for i in range(4): # Loop over batch
print(f"Image {i+1}:")
for rank, (index, prob) in enumerate(zip(sorted_indices[i], sorted_probs[i]), start=1):
print(f" Rank {rank}: {classes[index]} ({prob:.2%} confidence)")
correct = 0
total = 0
total_confidence = 0 # Store confidence scores
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
for data in testloader:
#images, labels = data
images, labels, image_sizes = data[0].to(device), data[1].to(device), data[2].to(device)
outputs = net(images, image_sizes)
# calculate outputs by running images through the network
images_cuda = images.to(device)
labels_cuda = labels.to(device)
#outputs = net(images_cuda)
# Convert logits to probabilities
probabilities = torch.softmax(outputs, dim=1)
sorted_probs, sorted_indices = torch.sort(probabilities, descending=True)
# Compute confidence scores
total_confidence += sum(confidence_score(sorted_probs[i].unsqueeze(0)) for i in range(len(sorted_probs)))
# the class with the highest energy is what we choose as prediction
_, predicted = torch.max(outputs.data, 1)
total += labels_cuda.size(0)
correct += (predicted == labels_cuda).sum().item()
# Compute **average confidence score**
average_confidence = total_confidence / total # Using total, which already tracks the number of samples
# Print accuracy with **average confidence score**
print(f'Accuracy of the network on the test images: {100 * correct // total} % '
f'(Avg Confidence: {average_confidence:.2f})')
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
# again no gradients needed
with torch.no_grad():
for data in testloader:
images, labels, image_sizes = data[0].to(device), data[1].to(device), data[2].to(device)
images_cuda = images.to(device) #can remove later
#labels_cuda = labels.to(device)
outputs = net(images_cuda, image_sizes)
_, predictions = torch.max(outputs, 1)
# collect the correct predictions for each class
for label, prediction in zip(labels_cuda, predictions):
if label == prediction:
correct_pred[classes[label.item()]] += 1
total_pred[classes[label.item()]] += 1
# print accuracy for each class
for classname, correct_count in correct_pred.items():
# print accuracy for each class
for classname, correct_count in correct_pred.items():
if total_pred[classname] == 0: # Prevent division by zero
print(f'Accuracy for class: {classname:5s} is N/A (No samples)')
else:
accuracy = 100 * float(correct_count) / total_pred[classname]
print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.6f} seconds")