Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/krish' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Yu-Zhewen committed Oct 11, 2023
2 parents 2210a9e + c1cdd5c commit c80c641
Show file tree
Hide file tree
Showing 18 changed files with 2,017 additions and 41 deletions.
67 changes: 67 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
## relu_main.py
The relu_main script is used to run and log sparsity collection and optimiser for different relu tuning policies

### Notes
Assumes the existence of a the following directories:
- ./runlog
- ./runlog/<arch-name< : Directory to store sparsity information
- ./relu_thresholds
- ./relu_thresholds/<arch-name< : Directory to store json files containing relu threshold information. Annotated to onnx model by onnx_sparsity_attribute.py
- ./onnx_models
- ./onnx_models/<arch-name< : Directory to store annotared onnx models
- ../../fpgaconvnet-optimiser/fpgaconvnet/optimiser/<arch-name< : Directory to stor eoptimiser outputs

Uses the krish-skipping branches of fpgaconvnet-optimiser and fpgaconvnet-model


### Usage
'''
python relu_main.py
'''

### Flags
- **arch**: model_name
- **relu-policy**: relu policy choice between slowest_node and uniform
- **fixed-hardware**: Uses fixed-hardware and does not run optimiser. Must provide "platform_path", and "optimised_config_path" flags to load fixed hardware
- **normalise-hardware**: Runs optimiser on same DSPs for dense and no skipping windows
- **accuracy_path**: "model_path". For fixed hardware
- **model_path**: Path to sparse .onnx model.
- **platform_path**: Path to platform specs (.toml). For fixed hardware
- **gpu**
- **enable-wandb**

### Parameters you may want to vary
- **THRESHOLD_INC in relu_main.py**: Amount you want to increase ReLU by for each iteration
- **--gain flag in fpgaconvnet-optimiser cli**: Minimum gain to push fine

### Example Usage:
#### **Uniform Increase with changing hardware for resnet18**:
'''
python relu_main.py -a resnet18 --relu_policy uniform
'''

#### **Uniform Increase with fixed hardware for resnet50**:
'''
python relu_main.py -a resnet50 --fixed-hardware --relu_policy uniform
'''

#### **Slowest node Increase with changing hardware for vgg11**:
'''
python relu_main.py -a vgg11 --relu_policy slowest_node
'''

#### **Slowest node Increase with changing hardware compared to normalised sparse and dense for vgg11**:
'''
python relu_main.py -a vgg11 --normalise-hardware --relu_policy slowest_node
'''

#### **Slowest node Increase with slowest hardware for resnet18**:
'''
python relu_main.py -a resnet18 --fixed-hardware --relu_policy uniform
'''


<!-- ## Collecting sparsity for a specific relu threshold configuration -->



190 changes: 190 additions & 0 deletions imagenet_activation_sensitivity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import argparse
import os
import random

import torch
import torch.nn as nn
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from utils import *
from sparsity_utils import *
from quan_utils import *
from relu_utils import *

from fpgaconvnet.parser.Parser import Parser


parser = argparse.ArgumentParser(description='PyTorch ImageNet')
parser.add_argument('--data', metavar='DIR', default="~/dataset/ILSVRC2012_img",
help='path to dataset')
parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',
help='model architecture: ' +
' | '.join(model_names))

parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
help='number of data loading workers (default: 4)')
parser.add_argument('-b', '--batch-size', default=64, type=int,
metavar='N',
help='mini-batch size')
parser.add_argument('-p', '--print-freq', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--gpu', default=None, type=int,
help='GPU id to use.')


parser.add_argument('--ma_window_size', default=None, type=int,
help='')
parser.add_argument('--calibration-size', default=4, type=int,
help='')

parser.add_argument("--accuracy_output", default=None, type=str,
help='Path to csv file to write accuracy to')



def imagenet_main():
args = parser.parse_args()

# if args.output_path == None:
# output_dir = str(args.arch) + "_output_relu_" + str(args.relu_threshold)
# if not os.path.isdir(output_dir):
# os.makedirs(output_dir)
# args.output_path = os.path.join(os.getcwd(), output_dir)

print(args)

random.seed(0)
torch.manual_seed(0)

# create model
print("=> using pre-trained model '{}'".format(args.arch))
model = load_model(args.arch)
random_input = torch.randn(1, 3, 224, 224)

if args.gpu is not None:
print("Use GPU: {}".format(args.gpu))
torch.cuda.set_device(args.gpu)
model = model.cuda(args.gpu)
random_input = random_input.cuda()
valdir = os.path.join(args.data, 'val')
traindir = os.path.join(args.data, 'train')
else:
print('using CPU, this will be slow')
valdir = os.path.join(args.data, 'val')
traindir = os.path.join(args.data, 'val')

print("Calculating MACs and Params")
calculate_macs_params(model, random_input, False, inference_mode=True)
# define loss function (criterion)
criterion = nn.CrossEntropyLoss().cuda(args.gpu)

# Data loading code
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])

val_loader = torch.utils.data.DataLoader(
datasets.ImageFolder(valdir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
])),
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True)


train_dataset = datasets.ImageFolder(traindir, transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
]))
# calibrate_size = 50000
calibrate_size = args.calibration_size
# per class few sampling, different from random_split
# https://github.com/mit-han-lab/proxylessnas/blob/6e7a96b7190963e404d1cf9b37a320501e62b0a0/search/data_providers/imagenet.py#L21
# assert calibrate_size % 1000 == 0
"""
rand_indexes = torch.randperm(len(train_dataset)).tolist()
train_labels = [sample[1] for sample in train_dataset.samples]
per_class_remain = [calibrate_size // 1000] * 1000
train_indexes, calibrate_indexes = [], []
for idx in rand_indexes:
label = train_labels[idx]
if per_class_remain[label] > 0:
calibrate_indexes.append(idx)
per_class_remain[label] -= 1
else:
train_indexes.append(idx)
"""
#Randomness handled by seeds
rand_indexes = torch.randperm(len(train_dataset)).tolist()
calibrate_indexes = random.choices(rand_indexes, k=calibrate_size)

#train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indexes)
calibrate_sampler = torch.utils.data.sampler.SubsetRandomSampler(calibrate_indexes)

#train_loader = torch.utils.data.DataLoader(
# train_dataset,
# batch_size=args.batch_size,
# num_workers=args.workers, pin_memory=True, sampler=train_sampler)

calibrate_dataset = datasets.ImageFolder(traindir, transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
normalize,
]))

calibrate_loader = torch.utils.data.DataLoader(
calibrate_dataset,
batch_size=args.batch_size,
num_workers=args.workers, pin_memory=True, sampler=calibrate_sampler)


#-----------------Model Quantisation----------------
# todo: measure post-quantisation results???
print("Quantising model")
model_quantisation(model, calibrate_loader, quantization_method=QuanMode.NETWORK_FP, weight_width=16, data_width=16)
print("Model quantised")
original_top1, original_top5 = validate(val_loader, model, criterion)
print("Accuracy above is for quantised model")
original_top1 = float(str(original_top1).split("( ")[1][:-1])
original_top5 = float(str(original_top5).split("( ")[1][:-1])
# use vanilla convolution to measure
# post-activation (post-sliding-window, to be more precise) sparsity

#-----------------Variable ReLU Sensitivity---------------------
relu_list = []
for name, module in model.named_modules():
if isinstance(module, nn.ReLU):#or isinstance(module, nn.Linear):
relu_list.append(name)

model_copy = copy.deepcopy(model)

for relu_layer in relu_list:
min_thresh = 0
max_thresh = 20
while (max_thresh - min_thresh) > 0.01:
recorded = False
for threshold in np.linspace(min_thresh, max_thresh, 21):
model = copy.deepcopy(model_copy)
replace_layer_with_variable_relu(model, relu_layer, threshold=threshold)
print("Variable ReLU added")
top1, top5 = validate(val_loader, model, criterion)
print("Accuracy above is for " + str(relu_layer) + " with ReLU threshold:" + str(threshold))
top1 = str(top1).split("( ")[1][:-1]
top5 = str(top5).split("( ")[1][:-1]
output_dir = args.accuracy_output + "/" + str(args.arch)
output_accuracy_to_csv(args.arch, threshold, relu_layer, top1, top5, output_dir)
if float(top5) < 0.99*original_top5 and not recorded:
min_thresh, max_thresh = threshold - (max_thresh - min_thresh)/20, threshold
recorded = True


if __name__ == '__main__':
imagenet_main()

#
54 changes: 51 additions & 3 deletions imagenet_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
from utils import *
from sparsity_utils import *
from quan_utils import *
from relu_utils import *

from fpgaconvnet.parser.Parser import Parser
import json


parser = argparse.ArgumentParser(description='PyTorch ImageNet')
parser.add_argument('--data', metavar='DIR', default="~/dataset/ILSVRC2012_img",
Expand All @@ -35,9 +40,24 @@

parser.add_argument('--ma_window_size', default=None, type=int,
help='')
parser.add_argument('--calibration-size', default=4, type=int,
parser.add_argument('--calibration-size', default=2500, type=int,
help='')

parser.add_argument('--relu_threshold', default=None, type=str,
help='path to json containing relu thresholds')

parser.add_argument("--accuracy_output", default=None, type=str,
help='Path to csv file to write accuracy to')

# parser.add_argument("--model_path", default=None, type=str,
# help='Path to sparse .onnx model')

# parser.add_argument("--platform_path", default=None, type=str,
# help='Path to platform specs (.toml)')

# parser.add_argument("--optimised_config_path", default=None, type=str,
# help='Path to optimised configuration (.json)')


def imagenet_main():
args = parser.parse_args()
Expand All @@ -60,13 +80,14 @@ def imagenet_main():
torch.cuda.set_device(args.gpu)
model = model.cuda(args.gpu)
random_input = random_input.cuda()
valdir = os.path.join(args.data, 'validation')
valdir = os.path.join(args.data, 'val')
traindir = os.path.join(args.data, 'train')
else:
print('using CPU, this will be slow')
valdir = os.path.join(args.data, 'val')
traindir = os.path.join(args.data, 'val')
traindir = os.path.join(args.data, 'train')

print("Calculating MACs and Params")
calculate_macs_params(model, random_input, False, inference_mode=True)
# define loss function (criterion)
criterion = nn.CrossEntropyLoss().cuda(args.gpu)
Expand All @@ -85,6 +106,7 @@ def imagenet_main():
batch_size=args.batch_size, shuffle=False,
num_workers=args.workers, pin_memory=True)


train_dataset = datasets.ImageFolder(traindir, transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
Expand All @@ -109,6 +131,7 @@ def imagenet_main():
else:
train_indexes.append(idx)
"""
#Randomness handled by seeds
rand_indexes = torch.randperm(len(train_dataset)).tolist()
calibrate_indexes = random.choices(rand_indexes, k=calibrate_size)

Expand All @@ -132,14 +155,39 @@ def imagenet_main():
batch_size=args.batch_size,
num_workers=args.workers, pin_memory=True, sampler=calibrate_sampler)


#-----------------Model Quantisation----------------
# todo: measure post-quantisation results???
print("Quantising model")
model_quantisation(model, calibrate_loader, quantization_method=QuanMode.NETWORK_FP, weight_width=16, data_width=16)
print("Model quantised")
validate(val_loader, model, criterion)
print("Accuracy above is for quantised model")
# use vanilla convolution to measure
# post-activation (post-sliding-window, to be more precise) sparsity

#-----------------Variable ReLU---------------------
if args.relu_threshold is not None:
f = open(args.relu_threshold)
args.relu_threshold = json.load(f)
replace_with_variable_relu(model, threshold=args.relu_threshold)
print("Variable ReLU added")
top1, top5 = validate(val_loader, model, criterion)
print("Accuracy above is for ReLU threshold:" + str(args.relu_threshold))
top1 = str(top1).split("( ")[1][:-1]
top5 = str(top5).split("( ")[1][:-1]


#---------------Sparsity Data Collection----------
replace_with_vanilla_convolution(model, window_size=args.ma_window_size)
print("Vanilla Convolution added")
validate(calibrate_loader, model, criterion, args.print_freq)
print("Sparsity data collected")
output_sparsity_to_csv(args.arch, model, args.output_path)

total_sparsity = total_network_sparsity(model)
output_accuracy_to_csv(args.arch, args.relu_threshold, top1, top5, total_sparsity, args.accuracy_output)


if __name__ == '__main__':
imagenet_main()
Loading

0 comments on commit c80c641

Please sign in to comment.