diff --git a/README.md b/README.md new file mode 100644 index 0000000..f0c9e64 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +## relu_main.py +The relu_main script is used to run and log sparsity collection and optimiser for different relu tuning policies + +### Notes +Assumes the existence of a the following directories: +- ./runlog +- ./runlog/<arch-name< : Directory to store sparsity information +- ./relu_thresholds +- ./relu_thresholds/<arch-name< : Directory to store json files containing relu threshold information. Annotated to onnx model by onnx_sparsity_attribute.py +- ./onnx_models +- ./onnx_models/<arch-name< : Directory to store annotared onnx models +- ../../fpgaconvnet-optimiser/fpgaconvnet/optimiser/<arch-name< : Directory to stor eoptimiser outputs + +Uses the krish-skipping branches of fpgaconvnet-optimiser and fpgaconvnet-model + + +### Usage +''' +python relu_main.py +''' + +### Flags +- **arch**: model_name +- **relu-policy**: relu policy choice between slowest_node and uniform +- **fixed-hardware**: Uses fixed-hardware and does not run optimiser. Must provide "platform_path", and "optimised_config_path" flags to load fixed hardware +- **normalise-hardware**: Runs optimiser on same DSPs for dense and no skipping windows +- **accuracy_path**: "model_path". For fixed hardware +- **model_path**: Path to sparse .onnx model. +- **platform_path**: Path to platform specs (.toml). For fixed hardware +- **gpu** +- **enable-wandb** + +### Parameters you may want to vary +- **THRESHOLD_INC in relu_main.py**: Amount you want to increase ReLU by for each iteration +- **--gain flag in fpgaconvnet-optimiser cli**: Minimum gain to push fine + +### Example Usage: +#### **Uniform Increase with changing hardware for resnet18**: +''' +python relu_main.py -a resnet18 --relu_policy uniform +''' + +#### **Uniform Increase with fixed hardware for resnet50**: +''' +python relu_main.py -a resnet50 --fixed-hardware --relu_policy uniform +''' + +#### **Slowest node Increase with changing hardware for vgg11**: +''' +python relu_main.py -a vgg11 --relu_policy slowest_node +''' + +#### **Slowest node Increase with changing hardware compared to normalised sparse and dense for vgg11**: +''' +python relu_main.py -a vgg11 --normalise-hardware --relu_policy slowest_node +''' + +#### **Slowest node Increase with slowest hardware for resnet18**: +''' +python relu_main.py -a resnet18 --fixed-hardware --relu_policy uniform +''' + + + + + + diff --git a/imagenet_activation_sensitivity.py b/imagenet_activation_sensitivity.py new file mode 100644 index 0000000..27306b9 --- /dev/null +++ b/imagenet_activation_sensitivity.py @@ -0,0 +1,190 @@ +import argparse +import os +import random + +import torch +import torch.nn as nn +import torch.utils.data +import torchvision.transforms as transforms +import torchvision.datasets as datasets + +from utils import * +from sparsity_utils import * +from quan_utils import * +from relu_utils import * + +from fpgaconvnet.parser.Parser import Parser + + +parser = argparse.ArgumentParser(description='PyTorch ImageNet') +parser.add_argument('--data', metavar='DIR', default="~/dataset/ILSVRC2012_img", + help='path to dataset') +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + help='model architecture: ' + + ' | '.join(model_names)) + +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('-b', '--batch-size', default=64, type=int, + metavar='N', + help='mini-batch size') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') + + +parser.add_argument('--ma_window_size', default=None, type=int, + help='') +parser.add_argument('--calibration-size', default=4, type=int, + help='') + +parser.add_argument("--accuracy_output", default=None, type=str, + help='Path to csv file to write accuracy to') + + + +def imagenet_main(): + args = parser.parse_args() + + # if args.output_path == None: + # output_dir = str(args.arch) + "_output_relu_" + str(args.relu_threshold) + # if not os.path.isdir(output_dir): + # os.makedirs(output_dir) + # args.output_path = os.path.join(os.getcwd(), output_dir) + + print(args) + + random.seed(0) + torch.manual_seed(0) + + # create model + print("=> using pre-trained model '{}'".format(args.arch)) + model = load_model(args.arch) + random_input = torch.randn(1, 3, 224, 224) + + if args.gpu is not None: + print("Use GPU: {}".format(args.gpu)) + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + random_input = random_input.cuda() + valdir = os.path.join(args.data, 'val') + traindir = os.path.join(args.data, 'train') + else: + print('using CPU, this will be slow') + valdir = os.path.join(args.data, 'val') + traindir = os.path.join(args.data, 'val') + + print("Calculating MACs and Params") + calculate_macs_params(model, random_input, False, inference_mode=True) + # define loss function (criterion) + criterion = nn.CrossEntropyLoss().cuda(args.gpu) + + # Data loading code + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + + + train_dataset = datasets.ImageFolder(traindir, transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + # calibrate_size = 50000 + calibrate_size = args.calibration_size + # per class few sampling, different from random_split + # https://github.com/mit-han-lab/proxylessnas/blob/6e7a96b7190963e404d1cf9b37a320501e62b0a0/search/data_providers/imagenet.py#L21 + # assert calibrate_size % 1000 == 0 + """ + rand_indexes = torch.randperm(len(train_dataset)).tolist() + train_labels = [sample[1] for sample in train_dataset.samples] + per_class_remain = [calibrate_size // 1000] * 1000 + train_indexes, calibrate_indexes = [], [] + for idx in rand_indexes: + label = train_labels[idx] + if per_class_remain[label] > 0: + calibrate_indexes.append(idx) + per_class_remain[label] -= 1 + else: + train_indexes.append(idx) + """ + #Randomness handled by seeds + rand_indexes = torch.randperm(len(train_dataset)).tolist() + calibrate_indexes = random.choices(rand_indexes, k=calibrate_size) + + #train_sampler = torch.utils.data.sampler.SubsetRandomSampler(train_indexes) + calibrate_sampler = torch.utils.data.sampler.SubsetRandomSampler(calibrate_indexes) + + #train_loader = torch.utils.data.DataLoader( + # train_dataset, + # batch_size=args.batch_size, + # num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + calibrate_dataset = datasets.ImageFolder(traindir, transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])) + + calibrate_loader = torch.utils.data.DataLoader( + calibrate_dataset, + batch_size=args.batch_size, + num_workers=args.workers, pin_memory=True, sampler=calibrate_sampler) + + + #-----------------Model Quantisation---------------- + # todo: measure post-quantisation results??? + print("Quantising model") + model_quantisation(model, calibrate_loader, quantization_method=QuanMode.NETWORK_FP, weight_width=16, data_width=16) + print("Model quantised") + original_top1, original_top5 = validate(val_loader, model, criterion) + print("Accuracy above is for quantised model") + original_top1 = float(str(original_top1).split("( ")[1][:-1]) + original_top5 = float(str(original_top5).split("( ")[1][:-1]) + # use vanilla convolution to measure + # post-activation (post-sliding-window, to be more precise) sparsity + + #-----------------Variable ReLU Sensitivity--------------------- + relu_list = [] + for name, module in model.named_modules(): + if isinstance(module, nn.ReLU):#or isinstance(module, nn.Linear): + relu_list.append(name) + + model_copy = copy.deepcopy(model) + + for relu_layer in relu_list: + min_thresh = 0 + max_thresh = 20 + while (max_thresh - min_thresh) > 0.01: + recorded = False + for threshold in np.linspace(min_thresh, max_thresh, 21): + model = copy.deepcopy(model_copy) + replace_layer_with_variable_relu(model, relu_layer, threshold=threshold) + print("Variable ReLU added") + top1, top5 = validate(val_loader, model, criterion) + print("Accuracy above is for " + str(relu_layer) + " with ReLU threshold:" + str(threshold)) + top1 = str(top1).split("( ")[1][:-1] + top5 = str(top5).split("( ")[1][:-1] + output_dir = args.accuracy_output + "/" + str(args.arch) + output_accuracy_to_csv(args.arch, threshold, relu_layer, top1, top5, output_dir) + if float(top5) < 0.99*original_top5 and not recorded: + min_thresh, max_thresh = threshold - (max_thresh - min_thresh)/20, threshold + recorded = True + + +if __name__ == '__main__': + imagenet_main() + +# \ No newline at end of file diff --git a/imagenet_main.py b/imagenet_main.py index 8a2621c..46afb15 100644 --- a/imagenet_main.py +++ b/imagenet_main.py @@ -12,6 +12,11 @@ from utils import * from sparsity_utils import * from quan_utils import * +from relu_utils import * + +from fpgaconvnet.parser.Parser import Parser +import json + parser = argparse.ArgumentParser(description='PyTorch ImageNet') parser.add_argument('--data', metavar='DIR', default="~/dataset/ILSVRC2012_img", @@ -35,9 +40,24 @@ parser.add_argument('--ma_window_size', default=None, type=int, help='') -parser.add_argument('--calibration-size', default=4, type=int, +parser.add_argument('--calibration-size', default=2500, type=int, help='') +parser.add_argument('--relu_threshold', default=None, type=str, + help='path to json containing relu thresholds') + +parser.add_argument("--accuracy_output", default=None, type=str, + help='Path to csv file to write accuracy to') + +# parser.add_argument("--model_path", default=None, type=str, +# help='Path to sparse .onnx model') + +# parser.add_argument("--platform_path", default=None, type=str, +# help='Path to platform specs (.toml)') + +# parser.add_argument("--optimised_config_path", default=None, type=str, +# help='Path to optimised configuration (.json)') + def imagenet_main(): args = parser.parse_args() @@ -60,13 +80,14 @@ def imagenet_main(): torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) random_input = random_input.cuda() - valdir = os.path.join(args.data, 'validation') + valdir = os.path.join(args.data, 'val') traindir = os.path.join(args.data, 'train') else: print('using CPU, this will be slow') valdir = os.path.join(args.data, 'val') - traindir = os.path.join(args.data, 'val') + traindir = os.path.join(args.data, 'train') + print("Calculating MACs and Params") calculate_macs_params(model, random_input, False, inference_mode=True) # define loss function (criterion) criterion = nn.CrossEntropyLoss().cuda(args.gpu) @@ -85,6 +106,7 @@ def imagenet_main(): batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) + train_dataset = datasets.ImageFolder(traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), @@ -109,6 +131,7 @@ def imagenet_main(): else: train_indexes.append(idx) """ + #Randomness handled by seeds rand_indexes = torch.randperm(len(train_dataset)).tolist() calibrate_indexes = random.choices(rand_indexes, k=calibrate_size) @@ -132,14 +155,39 @@ def imagenet_main(): batch_size=args.batch_size, num_workers=args.workers, pin_memory=True, sampler=calibrate_sampler) + + #-----------------Model Quantisation---------------- # todo: measure post-quantisation results??? + print("Quantising model") model_quantisation(model, calibrate_loader, quantization_method=QuanMode.NETWORK_FP, weight_width=16, data_width=16) + print("Model quantised") validate(val_loader, model, criterion) + print("Accuracy above is for quantised model") # use vanilla convolution to measure # post-activation (post-sliding-window, to be more precise) sparsity + + #-----------------Variable ReLU--------------------- + if args.relu_threshold is not None: + f = open(args.relu_threshold) + args.relu_threshold = json.load(f) + replace_with_variable_relu(model, threshold=args.relu_threshold) + print("Variable ReLU added") + top1, top5 = validate(val_loader, model, criterion) + print("Accuracy above is for ReLU threshold:" + str(args.relu_threshold)) + top1 = str(top1).split("( ")[1][:-1] + top5 = str(top5).split("( ")[1][:-1] + + + #---------------Sparsity Data Collection---------- replace_with_vanilla_convolution(model, window_size=args.ma_window_size) + print("Vanilla Convolution added") validate(calibrate_loader, model, criterion, args.print_freq) + print("Sparsity data collected") output_sparsity_to_csv(args.arch, model, args.output_path) + total_sparsity = total_network_sparsity(model) + output_accuracy_to_csv(args.arch, args.relu_threshold, top1, top5, total_sparsity, args.accuracy_output) + + if __name__ == '__main__': imagenet_main() diff --git a/imagenet_training_main.py b/imagenet_training_main.py new file mode 100644 index 0000000..8b138ff --- /dev/null +++ b/imagenet_training_main.py @@ -0,0 +1,544 @@ +import argparse +import os +import random +import shutil +import time +import warnings +from enum import Enum + +import torch +import torch.backends.cudnn as cudnn +import torch.distributed as dist +import torch.multiprocessing as mp +import torch.nn as nn +import torch.nn.parallel +import torch.optim +import torch.utils.data +import torch.utils.data.distributed +import torchvision.datasets as datasets +import torchvision.models as models +import torchvision.transforms as transforms +from torch.optim.lr_scheduler import StepLR +from torch.utils.data import Subset + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") + and callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument('data', metavar='DIR', nargs='?', default="~/dataset/ILSVRC2012_img", + help='path to dataset (default: imagenet)') +parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names) + + ' (default: resnet18)') +parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument('--epochs', default=90, type=int, metavar='N', + help='number of total epochs to run') +parser.add_argument('--start-epoch', default=0, type=int, metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument('-b', '--batch-size', default=256, type=int, + metavar='N', + help='mini-batch size (default: 256), this is the total ' + 'batch size of all GPUs on the current node when ' + 'using Data Parallel or Distributed Data Parallel') +parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, + metavar='LR', help='initial learning rate', dest='lr') +parser.add_argument('--momentum', default=0.9, type=float, metavar='M', + help='momentum') +parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, + metavar='W', help='weight decay (default: 1e-4)', + dest='weight_decay') +parser.add_argument('-p', '--print-freq', default=10, type=int, + metavar='N', help='print frequency (default: 10)') +parser.add_argument('--resume', default='', type=str, metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', + help='evaluate model on validation set') +parser.add_argument('--pretrained', dest='pretrained', action='store_true', + help='use pre-trained model') +parser.add_argument('--world-size', default=-1, type=int, + help='number of nodes for distributed training') +parser.add_argument('--rank', default=-1, type=int, + help='node rank for distributed training') +parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, + help='url used to set up distributed training') +parser.add_argument('--dist-backend', default='nccl', type=str, + help='distributed backend') +parser.add_argument('--seed', default=None, type=int, + help='seed for initializing training. ') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument('--multiprocessing-distributed', action='store_true', + help='Use multi-processing distributed training to launch ' + 'N processes per node, which has N GPUs. This is the ' + 'fastest way to use PyTorch for either single node or ' + 'multi node data parallel training') +parser.add_argument('--dummy', action='store_true', help="use fake data to benchmark") +parser.add_argument('--model_dir', default = None, type = str) + + +best_acc1 = 0 + + +def main(): + args = parser.parse_args() + + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + cudnn.benchmark = False + warnings.warn('You have chosen to seed training. ' + 'This will turn on the CUDNN deterministic setting, ' + 'which can slow down your training considerably! ' + 'You may see unexpected behavior when restarting ' + 'from checkpoints.') + + if args.gpu is not None: + warnings.warn('You have chosen a specific GPU. This will completely ' + 'disable data parallelism.') + + if args.dist_url == "env://" and args.world_size == -1: + args.world_size = int(os.environ["WORLD_SIZE"]) + + args.distributed = args.world_size > 1 or args.multiprocessing_distributed + + if torch.cuda.is_available(): + ngpus_per_node = torch.cuda.device_count() + else: + ngpus_per_node = 1 + if args.multiprocessing_distributed: + # Since we have ngpus_per_node processes per node, the total world_size + # needs to be adjusted accordingly + args.world_size = ngpus_per_node * args.world_size + # Use torch.multiprocessing.spawn to launch distributed processes: the + # main_worker process function + mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) + else: + # Simply call main_worker function + main_worker(args.gpu, ngpus_per_node, args) + + +def main_worker(gpu, ngpus_per_node, args): + global best_acc1 + args.gpu = gpu + + if args.gpu is not None: + print("Use GPU: {} for training".format(args.gpu)) + + if args.distributed: + if args.dist_url == "env://" and args.rank == -1: + args.rank = int(os.environ["RANK"]) + if args.multiprocessing_distributed: + # For multiprocessing distributed training, rank needs to be the + # global rank among all the processes + args.rank = args.rank * ngpus_per_node + gpu + dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, + world_size=args.world_size, rank=args.rank) + # create model + if args.pretrained: + print("=> using pre-trained model '{}'".format(args.arch)) + model = models.__dict__[args.arch](pretrained=True) + else: + print("=> creating model '{}'".format(args.arch)) + model = models.__dict__[args.arch]() + + if not torch.cuda.is_available() and not torch.backends.mps.is_available(): + print('using CPU, this will be slow') + elif args.distributed: + # For multiprocessing distributed, DistributedDataParallel constructor + # should always set the single device scope, otherwise, + # DistributedDataParallel will use all available devices. + if torch.cuda.is_available(): + if args.gpu is not None: + torch.cuda.set_device(args.gpu) + model.cuda(args.gpu) + # When using a single GPU per process and per + # DistributedDataParallel, we need to divide the batch size + # ourselves based on the total number of GPUs of the current node. + args.batch_size = int(args.batch_size / ngpus_per_node) + args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) + else: + model.cuda() + # DistributedDataParallel will divide and allocate batch_size to all + # available GPUs if device_ids are not set + model = torch.nn.parallel.DistributedDataParallel(model) + elif args.gpu is not None and torch.cuda.is_available(): + torch.cuda.set_device(args.gpu) + model = model.cuda(args.gpu) + elif torch.backends.mps.is_available(): + device = torch.device("mps") + model = model.to(device) + else: + # DataParallel will divide and allocate batch_size to all available GPUs + if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): + model.features = torch.nn.DataParallel(model.features) + model.cuda() + else: + model = torch.nn.DataParallel(model).cuda() + + if torch.cuda.is_available(): + if args.gpu: + device = torch.device('cuda:{}'.format(args.gpu)) + else: + device = torch.device("cuda") + elif torch.backends.mps.is_available(): + device = torch.device("mps") + else: + device = torch.device("cpu") + # define loss function (criterion), optimizer, and learning rate scheduler + criterion = nn.CrossEntropyLoss().to(device) + + optimizer = torch.optim.SGD(model.parameters(), args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + scheduler = StepLR(optimizer, step_size=30, gamma=0.1) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + if args.gpu is None: + checkpoint = torch.load(args.resume) + elif torch.cuda.is_available(): + # Map model to be loaded to specified single gpu. + loc = 'cuda:{}'.format(args.gpu) + checkpoint = torch.load(args.resume, map_location=loc) + args.start_epoch = checkpoint['epoch'] + best_acc1 = checkpoint['best_acc1'] + if args.gpu is not None: + # best_acc1 may be from a checkpoint from a different GPU + best_acc1 = best_acc1.to(args.gpu) + model.load_state_dict(checkpoint['state_dict']) + optimizer.load_state_dict(checkpoint['optimizer']) + scheduler.load_state_dict(checkpoint['scheduler']) + print("=> loaded checkpoint '{}' (epoch {})" + .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + + + # Data loading code + if args.dummy: + print("=> Dummy data is used!") + train_dataset = datasets.FakeData(1281167, (3, 224, 224), 1000, transforms.ToTensor()) + val_dataset = datasets.FakeData(50000, (3, 224, 224), 1000, transforms.ToTensor()) + else: + traindir = os.path.join(args.data, 'train') + valdir = os.path.join(args.data, 'val') + normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) + + train_dataset = datasets.ImageFolder( + traindir, + transforms.Compose([ + transforms.RandomResizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + + val_dataset = datasets.ImageFolder( + valdir, + transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])) + + if args.distributed: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) + val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset, shuffle=False, drop_last=True) + else: + train_sampler = None + val_sampler = None + + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), + num_workers=args.workers, pin_memory=True, sampler=train_sampler) + + val_loader = torch.utils.data.DataLoader( + val_dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True, sampler=val_sampler) + + if args.evaluate: + validate(val_loader, model, criterion, args) + return + + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + train_sampler.set_epoch(epoch) + + # train for one epoch + train(train_loader, model, criterion, optimizer, epoch, device, args) + + # evaluate on validation set + acc1 = validate(val_loader, model, criterion, args) + + scheduler.step() + + # remember best acc@1 and save checkpoint + is_best = acc1 > best_acc1 + best_acc1 = max(acc1, best_acc1) + + if not args.multiprocessing_distributed or (args.multiprocessing_distributed + and args.rank % ngpus_per_node == 0): + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': args.arch, + 'state_dict': model.state_dict(), + 'best_acc1': best_acc1, + 'optimizer' : optimizer.state_dict(), + 'scheduler' : scheduler.state_dict() + }, is_best, dirname=args.model_dir) + + +def train(train_loader, model, criterion, optimizer, epoch, device, args): + batch_time = AverageMeter('Time', ':6.3f') + data_time = AverageMeter('Data', ':6.3f') + losses = AverageMeter('Loss', ':.4e') + reg_losses = AverageMeter('Reg Loss', ':.4e') + total_losses = AverageMeter('Total Loss', ':.4e') + top1 = AverageMeter('Acc@1', ':6.2f') + top5 = AverageMeter('Acc@5', ':6.2f') + progress = ProgressMeter( + len(train_loader), + [batch_time, data_time, losses, reg_losses, total_losses, top1, top5], + prefix="Epoch: [{}]".format(epoch)) + + # switch to train mode + model.train() + + activation = {} + def getActivation(name): + # the hook signature + def hook(model, input, output): + activation[name] = output.detach() + return hook + + activation_handles = {} + for name, module in model.named_modules(): + if isinstance(module, nn.ReLU): + activation_handles[name] = module.register_forward_hook(getActivation(name)) + + end = time.time() + for i, (images, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + # move data to the same device as model + images = images.to(device, non_blocking=True) + target = target.to(device, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + reg = 0.0 + for name, values in activation.items(): + reg += ( (torch.sum(torch.sqrt(torch.sum(values**2,0)))**2) + (torch.sum(torch.sqrt(torch.sum(values**2,1)))**2) + (torch.sum(torch.sqrt(torch.sum(values**2,2)))**2) )/torch.sum(values**2) + decay = 1e-6 + reg_loss = decay*reg + total_loss = loss+reg_loss + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + reg_losses.update(reg_loss.item(), images.size(0)) + total_losses.update(total_loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + total_loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i + 1) + + +def validate(val_loader, model, criterion, args): + + def run_validate(loader, base_progress=0): + with torch.no_grad(): + end = time.time() + for i, (images, target) in enumerate(loader): + i = base_progress + i + if args.gpu is not None and torch.cuda.is_available(): + images = images.cuda(args.gpu, non_blocking=True) + if torch.backends.mps.is_available(): + images = images.to('mps') + target = target.to('mps') + if torch.cuda.is_available(): + target = target.cuda(args.gpu, non_blocking=True) + + # compute output + output = model(images) + loss = criterion(output, target) + + # measure accuracy and record loss + acc1, acc5 = accuracy(output, target, topk=(1, 5)) + losses.update(loss.item(), images.size(0)) + top1.update(acc1[0], images.size(0)) + top5.update(acc5[0], images.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + progress.display(i + 1) + + batch_time = AverageMeter('Time', ':6.3f', Summary.NONE) + losses = AverageMeter('Loss', ':.4e', Summary.NONE) + top1 = AverageMeter('Acc@1', ':6.2f', Summary.AVERAGE) + top5 = AverageMeter('Acc@5', ':6.2f', Summary.AVERAGE) + progress = ProgressMeter( + len(val_loader) + (args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset))), + [batch_time, losses, top1, top5], + prefix='Test: ') + + # switch to evaluate mode + model.eval() + + run_validate(val_loader) + if args.distributed: + top1.all_reduce() + top5.all_reduce() + + if args.distributed and (len(val_loader.sampler) * args.world_size < len(val_loader.dataset)): + aux_val_dataset = Subset(val_loader.dataset, + range(len(val_loader.sampler) * args.world_size, len(val_loader.dataset))) + aux_val_loader = torch.utils.data.DataLoader( + aux_val_dataset, batch_size=args.batch_size, shuffle=False, + num_workers=args.workers, pin_memory=True) + run_validate(aux_val_loader, len(val_loader)) + + progress.display_summary() + + return top1.avg + + +def save_checkpoint(state, is_best, dirname=None): + if dirname == None: + filename = 'checkpoint.pth.tar' + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'model_best.pth.tar') + else: + if not os.path.isdir(dirname): + os.mkdir(dirname) + filename = dirname + 'checkpoint.pth.tar' + torch.save(state, dirname) + if is_best: + shutil.copyfile(dirname, 'model_best.pth.tar') + +class Summary(Enum): + NONE = 0 + AVERAGE = 1 + SUM = 2 + COUNT = 3 + +class AverageMeter(object): + """Computes and stores the average and current value""" + def __init__(self, name, fmt=':f', summary_type=Summary.AVERAGE): + self.name = name + self.fmt = fmt + self.summary_type = summary_type + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + def all_reduce(self): + if torch.cuda.is_available(): + device = torch.device("cuda") + elif torch.backends.mps.is_available(): + device = torch.device("mps") + else: + device = torch.device("cpu") + total = torch.tensor([self.sum, self.count], dtype=torch.float32, device=device) + dist.all_reduce(total, dist.ReduceOp.SUM, async_op=False) + self.sum, self.count = total.tolist() + self.avg = self.sum / self.count + + def __str__(self): + fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})' + return fmtstr.format(**self.__dict__) + + def summary(self): + fmtstr = '' + if self.summary_type is Summary.NONE: + fmtstr = '' + elif self.summary_type is Summary.AVERAGE: + fmtstr = '{name} {avg:.3f}' + elif self.summary_type is Summary.SUM: + fmtstr = '{name} {sum:.3f}' + elif self.summary_type is Summary.COUNT: + fmtstr = '{name} {count:.3f}' + else: + raise ValueError('invalid summary type %r' % self.summary_type) + + return fmtstr.format(**self.__dict__) + + +class ProgressMeter(object): + def __init__(self, num_batches, meters, prefix=""): + self.batch_fmtstr = self._get_batch_fmtstr(num_batches) + self.meters = meters + self.prefix = prefix + + def display(self, batch): + entries = [self.prefix + self.batch_fmtstr.format(batch)] + entries += [str(meter) for meter in self.meters] + print('\t'.join(entries)) + + def display_summary(self): + entries = [" *"] + entries += [meter.summary() for meter in self.meters] + print(' '.join(entries)) + + def _get_batch_fmtstr(self, num_batches): + num_digits = len(str(num_batches // 1)) + fmt = '{:' + str(num_digits) + 'd}' + return '[' + fmt + '/' + fmt.format(num_batches) + ']' + +def accuracy(output, target, topk=(1,)): + """Computes the accuracy over the k top predictions for the specified values of k""" + with torch.no_grad(): + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/onnx_sparsity_attribute.py b/onnx_sparsity_attribute.py index d2d0555..88f9b6c 100644 --- a/onnx_sparsity_attribute.py +++ b/onnx_sparsity_attribute.py @@ -87,13 +87,22 @@ def _layer_name_translation(model_name, onnx_name): sparsity_data = toml_data[layer_name]["avg"] set_nodeattr(node, "input sparsity", sparsity_data) +def annotate_histograms(model_name, onnx_model, data_path): + for node in onnx_model.graph.node: + if node.op_type == 'Conv': + layer_name = layer_name_translation(model_name, node.name) + np_path = os.path.join(data_path, model_name + "_" + layer_name + "_histograms.npy") + channel_wise_sprasity = np.load(np_path) + windows_data = channel_wise_sprasity[:, -1]/channel_wise_sprasity.sum(axis = 1) + set_nodeattr(node, "window sparsity", windows_data) + + if __name__ == "__main__": parser = argparse.ArgumentParser(description='Export ONNX model with sparsity attribute') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18',choices=model_names) parser.add_argument('--state_dict', metavar='DIR', default="/home/zy18/Downloads/Pruning Results-20230815T143526Z-001/Pruning Results/weight_sparse_50/resnet18_classification_imagenet_2023-08-12/software/transform/transformed_ckpt/state_dict.pt") parser.add_argument('--data_path', metavar='DIR', default="/home/zy18/Downloads/Pruning Results-20230815T143526Z-001/Pruning Results/weight_sparse_50/resnet18_classification_imagenet_2023-08-12/software/transform/prune/activation_report.toml") parser.add_argument('--export_path', metavar='DIR', default="models") - args = parser.parse_args() torch_model = load_model(args.arch) @@ -108,4 +117,5 @@ def _layer_name_translation(model_name, onnx_name): annotate_sparsity_from_toml(args.arch, onnx_model, args.data_path) else: annotate_sparsity_from_numpy(args.arch, onnx_model, args.data_path) - onnx.save(onnx_model, sparse_onnx_path) \ No newline at end of file + # annotate_histograms(args.arch, onnx_model, args.data_path) + onnx.save(onnx_model, sparse_onnx_path) diff --git a/onnx_sparsity_attribute_full.py b/onnx_sparsity_attribute_full.py new file mode 100644 index 0000000..c68825a --- /dev/null +++ b/onnx_sparsity_attribute_full.py @@ -0,0 +1,131 @@ +import os +import numpy as np +import torch +import torch.nn as nn +import onnx +import argparse +import csv +import json +from utils import load_model, model_names, replace_modules + +def torch_onnx_exporter(model, model_name, random_input, output_path): + if model_name == "mobilenet_v2": + replace_dict = {} + for name, module in model.named_modules(): + # todo: relu6 creates clip node + if isinstance(module, nn.ReLU6): + replace_dict[module] = nn.ReLU() + replace_modules(model, replace_dict) + torch.onnx.export(model, random_input, output_path, verbose=False, keep_initializers_as_inputs=True) + +# https://github.com/Xilinx/finn-base/blob/7c2603a95e90e4de2575020e575c24eab6a15889/src/finn/custom_op/base.py +def set_nodeattr(node, attr_name, attr_value): + new_attr = onnx.helper.make_attribute(attr_name, attr_value) + node.attribute.append(new_attr) + +def annotate_quantisation(model, weight_width, data_width, acc_width, block_floating_point): + for node in model.graph.node: + if node.op_type in ["Conv", "Gemm"]: + set_nodeattr(node, "weight_width", weight_width) + set_nodeattr(node, "data_width", data_width) + set_nodeattr(node, "acc_width", acc_width) + set_nodeattr(node, "block_floating_point", block_floating_point) + else: + set_nodeattr(node, "data_width", data_width) + +def layer_name_translation(model_name, onnx_name): + onnx_name = onnx_name.split("/") + if model_name in ["resnet18", "resnet50"]: + if len(onnx_name) == 3: # first conv + torch_name = onnx_name[1]+ ".1" + else: + assert len(onnx_name) in [5,6] + torch_name = onnx_name[2] + "." +onnx_name[-2]+ ".1" + elif model_name == "mobilenet_v2": + if len(onnx_name) == 5: # first and last conv + torch_name = onnx_name[-2] + ".1" + else: + assert len(onnx_name) in [6,7] + torch_name = onnx_name[2] + "." + onnx_name[-2] + ".1" + elif model_name in ["alexnet", "vgg11", "vgg16"]: + torch_name = onnx_name[-2] + ".1" + elif model_name == "repvgg-a0": + torch_name = ".".join(onnx_name[1:-1]) + ".1" + return torch_name + +def annotate_sparsity(model_name, onnx_model, data_path): + for node in onnx_model.graph.node: + if node.op_type == 'Conv': + layer_name = layer_name_translation(model_name, node.name) + np_path = os.path.join(data_path, model_name + "_" + layer_name + "_histograms.npy") + histograms_data = np.load(np_path) + histograms = histograms_data/histograms_data.sum(axis = 1)[:, np.newaxis] + set_nodeattr(node, "input sparsity", histograms.flatten()) + +def replace_relu_nodes(model_name, onnx_model, relu_thresholds): + + for index, node in enumerate(onnx_model.graph.node): + + # find a greater node + if node.op_type != "Relu": + continue + + + # remove greater and where node + onnx_model.graph.node.remove(node) + torch_name = layer_name_translation(model_name, node.name) + + # create a Gemm node with the matmul weights and add bias + new_node_name = "/".join(node.name.split("/")[:-1] + ["ThresholdedReLU"]) + new_node = onnx.helper.make_node( + "ThresholdedRelu", + name= new_node_name, + inputs=[*node.input], + outputs=node.output, + alpha = relu_thresholds[torch_name] + ) + + + # add new one + onnx_model.graph.node.insert(index, new_node) + + # connect node and ThresholdedReLU node together + next_node = next(filter(lambda x: node.output[0] in x.input, onnx_model.graph.node)) + next_node.input[0] = new_node.output[0] + + +parser = argparse.ArgumentParser(description='Export ONNX model with sparsity attribute') +parser.add_argument('-a', '--arch', metavar='ARCH', default='vgg16', + choices=model_names, + help='model architecture: ' + + ' | '.join(model_names)) +parser.add_argument('--data', metavar='DIR', default="runlog/resnet18/uniform_relu_0.085", + help='path to onnx model') +parser.add_argument('--dense_onnx_path', metavar='DIR', default="models/vgg16.onnx", + help='path to onnx model') +parser.add_argument('--sparse_onnx_path', metavar='DIR', default="models/vgg16_sparse.onnx", + help='path to onnx model') +parser.add_argument('--temp_onnx_path', metavar='DIR', default="models/vgg16_sparse.onnx", + help='path to onnx model') +parser.add_argument("-r", "--relu_thresholds_path", metavar='DIR', default=None, + help='path to relu thresholds json model') + +args = parser.parse_args() + +torch_model = load_model(args.arch) +torch_onnx_exporter(torch_model, args.arch, torch.randn(1, 3, 224, 224), args.dense_onnx_path) +onnx_model = onnx.load(args.dense_onnx_path) + +if args.relu_thresholds_path is not None: + + f = open(args.relu_thresholds_path) + relu_thresholds = json.load(f) + + onnx_model = onnx.load(args.dense_onnx_path) + replace_relu_nodes(args.arch, onnx_model, relu_thresholds) + + +annotate_quantisation(onnx_model, 16, 16, 32, False) +annotate_sparsity(args.arch, onnx_model, args.data) +# annotate_histograms(args.arch, onnx_model, args.data) +onnx.save(onnx_model, args.sparse_onnx_path) \ No newline at end of file diff --git a/quan_utils.py b/quan_utils.py index ef5c6c4..234b0ea 100644 --- a/quan_utils.py +++ b/quan_utils.py @@ -2,6 +2,7 @@ import torch.nn as nn import copy from enum import Enum +import os from utils import * @@ -41,18 +42,18 @@ def linear_dequantize(x_quan, scaling_factor, zero_point): return x - +#Asymmetric Quantisation: x_q = round((x_f - min_xf) * (2^n - 1) / (max_xf - min_xf)) def asymmetric_linear_no_clipping(wordlength, x_min, x_max): - scaling_factor = (2**wordlength - 1) / torch.clamp((x_max - x_min), min=1e-8) - zero_point = scaling_factor * x_min + scaling_factor = (2**wordlength - 1) / torch.clamp((x_max - x_min), min=1e-8) # Calculates scaling factor as shown in equation for function above + zero_point = scaling_factor * x_min #Corresponds to most negative value represented by wlen-bit if isinstance(zero_point, torch.Tensor): zero_point = zero_point.round() else: zero_point = float(round(zero_point)) - zero_point += 2**(wordlength - 1) + zero_point += 2**(wordlength - 1) #Corresponds to zero by adding 2^(wlen - 1) return scaling_factor, zero_point @@ -62,6 +63,7 @@ def saturate(w_quan, wordlength): return w_quan +#Takes a model as input and can call a function with wordlength and quantisation method to quantise base don quantisation method class WeightQuantizer(): def __init__(self, model): bFirst = True @@ -70,10 +72,10 @@ def __init__(self, model): if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear): if bFirst: bFirst = False - self.w_min = torch.min(module.weight) - self.w_max = torch.max(module.weight) + self.w_min = torch.min(module.weight) #Single value + self.w_max = torch.max(module.weight) #QUESTION: Why don't we use torch.minimum givng us tensors for the first module as well? else: - self.w_min = torch.minimum(self.w_min, torch.min(module.weight)) + self.w_min = torch.minimum(self.w_min, torch.min(module.weight)) self.w_max = torch.maximum(self.w_max, torch.max(module.weight)) print("weight min:", self.w_min) @@ -123,7 +125,7 @@ def get_scale_shift(self): def forward(self, x): - if self.gather_data: + if self.gather_data: #Collects data about the x_min and x_max to quantise the input features if self.quantization_method == QuanMode.CHANNEL_BFP: channel_num = x.size()[1] x_block = x.data.transpose(0, 1) @@ -160,6 +162,7 @@ def forward(self, x): return x_quan +#Function that performs quantisation on the feature maps post-activation def activation_quantization(model, wordlength, quantization_method, calibrate_loader): # add activation quantisation module replace_dict ={} @@ -167,7 +170,7 @@ def activation_quantization(model, wordlength, quantization_method, calibrate_lo if type(module) in QUAN_TARGET_MODULES: module_quan = nn.Sequential(*[QuanAct(wordlength, quantization_method), copy.deepcopy(module), QuanAct(wordlength, quantization_method)]) replace_dict[module] = module_quan - + replace_modules(model, replace_dict) model.eval() @@ -219,4 +222,16 @@ def model_quantisation(model, calibrate_loader, quantization_method=QuanMode.NET quantized_weight = weight_quantizer.AsymmetricQuantHandler(module.weight, weight_width, quantization_method) module.weight.data.copy_(quantized_weight) - activation_quantization(model, data_width, quantization_method, calibrate_loader) \ No newline at end of file + activation_quantization(model, data_width, quantization_method, calibrate_loader) + + +def output_quan_accuracy_to_csv(model_name, relu_threshold, top1, top5): + file_path = os.path.join(os.getcwd(), "runlog", str(model_name) + "_accuracy_var_quantisation.csv") + + if not (os.path.isfile(file_path)): + with open(file_path, "w") as f: + f.write("Wordlength,Top1 Accuracy,Top5 Accuracy\n") + + with open(file_path, "a") as f: + row = ",".join([str(relu_threshold), str(top1), str(top5)]) + "\n" + f.write(row) diff --git a/relu_main.py b/relu_main.py new file mode 100644 index 0000000..eb99934 --- /dev/null +++ b/relu_main.py @@ -0,0 +1,308 @@ +#Imports +import argparse +import datetime +import json +import toml +import numpy +import wandb +from torch import nn +import os + +import fpgaconvnet.tools.graphs as graphs +from fpgaconvnet.tools.layer_enum import LAYER_TYPE +from fpgaconvnet.parser.Parser import Parser + +from utils import * + +#Get new throughput function +def get_new_throughput(model_name, net, sparsity_path): + + for partition_index in range(len(net.partitions)): + # print("Patition:", partition_index) + partition = net.partitions[partition_index] + for layer in graphs.ordered_node_list(partition.graph): + + #Check if layer is a Convolution layer tha can benefit from sparsit + if (partition.graph.nodes[layer]['type'] == LAYER_TYPE.Convolution): + + if len(partition.graph.nodes[layer]['hw'].sparsity): + layer_name = layer_name_translation(model_name, layer) + np_path = os.path.join(sparsity_path, model_name + "_" + layer_name + "_histograms.npy") + histograms_data = np.load(np_path) + histograms = histograms_data/histograms_data.sum(axis = 1)[:, np.newaxis] + partition.graph.nodes[layer]['hw'].sparsity = histograms + + net.update_partitions() + + return net.get_throughput(), net.get_latency() + +#Layer name translation function +def layer_name_translation(model_name, onnx_name): + onnx_name = onnx_name.split("_") + if model_name in ["resnet18", "resnet50"]: + if len(onnx_name) == 3: # first conv + torch_name = onnx_name[1]+ ".1" + else: + assert len(onnx_name) in [5,6] + torch_name = onnx_name[2] + "." +onnx_name[-2]+ ".1" + elif model_name == "mobilenet_v2": + if len(onnx_name) == 5: # first and last conv + torch_name = onnx_name[-2] + ".1" + else: + assert len(onnx_name) in [6,7] + torch_name = onnx_name[2] + "." + onnx_name[-2] + ".1" + elif model_name in ["alexnet", "vgg11", "vgg16"]: + torch_name = onnx_name[-2] + ".1" + elif model_name == "repvgg-a0": + torch_name = ".".join(onnx_name[1:-1]) + ".1" + return torch_name + +THRESHOLD_INC = 0.005 +#Main +if __name__ == "__main__": + + #Command line parser + parser = argparse.ArgumentParser() + + parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', + help='model architecture: ' + + ' | '.join(model_names)) + parser.add_argument('--relu-policy', choices=['slowest_node', 'uniform'], default="uniform", type=str, + help='') + parser.add_argument('--fixed-hardware', action="store_true", + help='') + parser.add_argument('--normalise-hardware', action="store_true", + help='') + parser.add_argument('--use-old-sparsity', action="store_true", + help='') + parser.add_argument('--runs', default=100, type=int, + help='how many runs') + + + parser.add_argument("--sparsity_path", default="runlog/resnet18/", type=str, + help='Path to sparsity log dir for old sparsity') + + parser.add_argument("--accuracy_path", default="runlog/resnet18/uniform_accuracy.csv", type=str, + help='Path to accuracy .csv file for old accuracy') + + parser.add_argument("--model_path", default="onnx_models/resnet18/resnet18_uniform_relu_0.0.onnx", type=str, + help='Path to sparse .onnx model') + + parser.add_argument("--platform_path", default="../fpgaconvnet-optimiser/examples/platforms/u250.toml" , type=str, + help='Path to platform specs (.toml)') + + parser.add_argument("--optimised_config_path", default="../fpgaconvnet-optimiser/fpgaconvnet/optimiser/resnet18/resnet18_uniform_relu_0.0/config.json", type=str, + help='Path to optimised configuration (.json)') + + parser.add_argument('--gpu', default=None, type=str, + help='GPU id to use.') + + parser.add_argument('--enable-wandb', action="store_true", help='enable wandb') + + args = parser.parse_args() + + #Initialise wandb + if args.enable_wandb: + wandb.login() + start_time = datetime.datetime.now() + name = args.relu_policy + "_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + if (args.fixed_hardware): + name = args.relu_policy + "_fixed_hardware_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + + wandb.init( + # Set the project where this run will be logged + project= "-".join([args.arch, "relu"]), + name = name, + # Track hyperparameters and run metadata + config={ + "platform": "u250" + }) + + #Initialise relu_thresholds + print("=> using pre-trained model '{}'".format(args.arch)) + model = load_model(args.arch) + relu_thresholds = {} + for name, module in model.named_modules(): + if isinstance(module, nn.ReLU) or isinstance(module, nn.ReLU6): + relu_thresholds[name + ".1"] = 0.0 + + + #For run in runs + threshold = 0.0 + acc_file = "runlog/" + args.arch + "/" + args.relu_policy + "_accuracy.csv" + if args.relu_policy == "slowest_node" and not args.fixed_hardware: + acc_file = "runlog/" + args.arch + "/" + args.relu_policy + "_changing_accuracy.csv" + for run in range(args.runs): + + #If old sparsity, note metrics + if args.use_old_sparsity: + assert args.relu_policy == "uniform" + # Note accuracy + with open(args.accuracy_path, 'r') as f: + lines = f.read().splitlines() + line = lines[run + 1] + line_vals = line.split(",") + top1 = float(line_vals[-3]) + top5 = float(line_vals[-2]) + sparsity = float(line_vals[-1]) + + sparsity_dir = args.sparsity_path + "/uniform_relu_" + str(threshold) + + #Else collect sparsity + else: + if args.relu_policy == "uniform": + log_dir = args.arch + "/uniform_relu_" + str(threshold) + threshold_path = "relu_thresholds/" + args.arch + "/" + args.arch + "_uniform_relu_" + str(threshold) + ".json" + elif args.relu_policy == "slowest_node": + if (args.fixed_hardware): + log_dir = args.arch + "/slowest_node_" + str(run) + threshold_path = "relu_thresholds/" + args.arch + "/" + args.arch + "_slowest_node_" + str(run) + ".json" + else: + log_dir = args.arch + "/slowest_node_changing_" + str(run) + threshold_path = "relu_thresholds/" + args.arch + "/" + args.arch + "_slowest_node_changing_" + str(run) + ".json" + + #Create log_dir + if not os.path.isdir("runlog/" + log_dir): + os.makedirs("runlog/" + log_dir) + log_file="runlog/" + log_dir + "/log.txt" + + #Store relu_thresholds + with open(threshold_path, 'w') as fp: + json.dump(relu_thresholds, fp) + + os.system("python imagenet_main.py -a " + args.arch + " --gpu " + args.gpu + " --output_path runlog/" + log_dir + " --accuracy_output " + acc_file + " --relu_threshold " + threshold_path) + + sparsity_dir = "runlog/" + log_dir + + # Note accuracy + with open(acc_file, 'r') as f: + lines = f.read().splitlines() + last_line = lines[-1] + top1 = float(last_line.split(",")[-3]) + top5 = float(last_line.split(",")[-2]) + sparsity = float(last_line.split(",")[-1]) + + + + #If fixed hardware, parse network and get throughput and latency from fixed hardwareusing collected sparsity + if (args.fixed_hardware): + config_parser = Parser(backend="chisel", quant_mode="auto", custom_onnx = True) # use the HLS backend with 16-bit fixed-point quantisation + net = config_parser.onnx_to_fpgaconvnet(args.model_path, args.platform_path) # parse the onnx model + + net = config_parser.prototxt_to_fpgaconvnet(net, args.optimised_config_path) + + net.update_partitions() + + throughput, latency = get_new_throughput(args.arch, net, sparsity_dir) + + log_info = relu_thresholds | {"top1_accuracy": top1, "top5_accuracy": top5, "throughput": throughput, "latency": latency, "network_sparsity": sparsity} + print("Logging:", log_info) + + #Else annotate sparsity, run optimiser, note resources, throughput, and latency + else: + #Annotate sparsity + if args.relu_policy == "uniform": + dense_onnx_path = "onnx_models/" + args.arch + "/" + args.arch + ".onnx" + sparse_onnx_path = "onnx_models/" + args.arch + "/" + args.arch + "_uniform_relu_" + str(threshold) + ".onnx" + elif args.relu_policy == "slowest_node": + if (args.fixed_hardware): + dense_onnx_path = "onnx_models/" + args.arch + "/" + args.arch + ".onnx" + sparse_onnx_path = "onnx_models/" + args.arch + "/" + args.arch + "_slowest_node_" + str(run) + ".onnx" + else: + dense_onnx_path = "onnx_models/" + args.arch + "/" + args.arch + ".onnx" + sparse_onnx_path = "onnx_models/" + args.arch + "/" + args.arch + "_slowest_node_changing_" + str(run) + ".onnx" + + os.system("python onnx_sparsity_attribute_full.py -a " + args.arch + " --data " + sparsity_dir + " --dense_onnx_path " + dense_onnx_path + " --sparse_onnx_path " + sparse_onnx_path + " -r " + threshold_path) + + + # Run optimiser + if args.relu_policy == "uniform": + output_path = "../fpgaconvnet-optimiser/fpgaconvnet/optimiser/" + args.arch + "/" + args.arch + "_uniform_relu_" + str(threshold) + elif args.relu_policy == "slowest_node": + output_path = "../fpgaconvnet-optimiser/fpgaconvnet/optimiser/" + args.arch + "/" + args.arch + "_slowest_node_" + str(run) + + os.system("python -u ../fpgaconvnet-optimiser/fpgaconvnet/optimiser/cli.py --rerun-optim -n "+ args.arch + " -m " + sparse_onnx_path + " -o " + output_path + " -p " + args.platform_path + " -b 256 --objective throughput --optimiser greedy_partition --optimiser_config_path ../fpgaconvnet-optimiser/examples/greedy_partition_throughput_residual.toml") + + # Note throughput + f = open(output_path + "/report.json") + report = json.load(f) + throughput = report["network"]["performance"]["throughput"] + latency = report["network"]["performance"]["latency"] + resources = report["network"]["max_resource_usage"] + f.close() + + + # # Create resource toml file + # f = open(args.platform_path, 'r') + # new_toml = toml.load(f) + # for key, value in resources.items(): + # if key == "DSP": + # new_toml["resources"][key] = round(value/0.9) + # f.close() + + # # Write resource toml file + # if not os.path.isdir("../fpgaconvnet-optimiser/examples/platforms/" + args.arch + "_cifar10_uniform_relu_norm/"): + # os.mkdir("../fpgaconvnet-optimiser/examples/platforms/" + args.arch + "_cifar10_uniform_relu_norm/") + # platform_path = "../fpgaconvnet-optimiser/examples/platforms/" + args.arch + "_cifar10_uniform_relu_norm/u250_" + str(relu_threshold) + ".toml" + # f = open(platform_path, 'w') + # toml.dump(new_toml, f) + # f.close() + + + #If normalise, run dense and sparse normalised + if (args.normalise_hardware): + pass + + else: + log_info = relu_thresholds | resources | {"top1_accuracy": top1, "top5_accuracy": top5, "throughput": throughput, "latency": latency, "network_sparsity": sparsity} + + + #Log into wandb + if (args.enable_wandb): + wandb.log(log_info) + + + #Update based on relu-policy + threshold = round(threshold + THRESHOLD_INC, 4) + + if args.relu_policy == "uniform": + for name, module in model.named_modules(): + if isinstance(module, nn.ReLU): + relu_thresholds[name + ".1"] = round(threshold, 4) + elif args.relu_policy == "slowest_node": + if not (args.fixed_hardware): + config_ptmuarser = Parser(backend="chisel", quant_mode="auto", custom_onnx = True) # use the HLS backend with 16-bit fixed-point quantisation + net = config_parser.onnx_to_fpgaconvnet(sparse_onnx_path, args.platform_path) # parse the onnx model + + net = config_parser.prototxt_to_fpgaconvnet(net, output_path + "/config.json") + + net.update_partitions() + + # Update ReLU thresholds for slowest node + replaced_layers = set() + previous_relu = None + #Change slowest node in each partition + for partition_index in range(len(net.partitions)): + replace_layer = None + max_latency = 0 + partition = net.partitions[partition_index] + for layer in graphs.ordered_node_list(partition.graph): + #Keep track of preceding relu layer + if isinstance(partition.graph.nodes[layer]['type'], list): + if LAYER_TYPE.ReLU in partition.graph.nodes[layer]['type']: + previous_relu = layer + elif (partition.graph.nodes[layer]['type'] == LAYER_TYPE.ReLU): + previous_relu = layer + + #Check if layer is a Convolution layer tha can benefit from sparsit + if (partition.graph.nodes[layer]['type'] == LAYER_TYPE.Convolution): + layer_latency = partition.graph.nodes[layer]['hw'].latency() + if previous_relu != None: + previous_layer = layer_name_translation(args.arch, previous_relu) + if layer_latency > max_latency and len(partition.graph.nodes[layer]['hw'].sparsity): + max_latency = layer_latency + replace_layer = previous_layer + + if replace_layer != None and replace_layer not in replaced_layers: + relu_thresholds[replace_layer] += THRESHOLD_INC + replaced_layers.add(replace_layer) \ No newline at end of file diff --git a/relu_run.py b/relu_run.py new file mode 100644 index 0000000..1f352e2 --- /dev/null +++ b/relu_run.py @@ -0,0 +1,93 @@ +import datetime +import os +import subprocess +import argparse + + + + +parser = argparse.ArgumentParser(description='Low rank approximation experiment') +parser.add_argument('--gpu', default=None, type=int, + help='GPU id to use.') +parser.add_argument("--model_path", default=None, type=str, + help='Path to sparse .onnx model') + +parser.add_argument("--platform_path", default=None, type=str, + help='Path to platform specs (.toml)') + +"../../examples/platforms/zc706.toml" +parser.add_argument("--optimised_config_path", default=None, type=str, + help='Path to optimised configuration (.json)') + +parser.add_argument("--accuracy_output", default=None, type=str, + help='Path to csv file to write accuracy to') + + +args = parser.parse_args() + +#python relu_run.py --gpu 1 --model_path ../fpgaconvnet-optimiser/fpgaconvnet/optimiser/onnx_models/resnet18_sparse.onnx --platform_path ../fpgaconvnet-optimiser/examples/platforms/u250.toml --optimised_config_path ../fpgaconvnet-optimiser/fpgaconvnet/optimiser/outputs/sparse/resnet18_sparse_hetero/config.json + +''' +sweep_range = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536]#[131072, 262144, 524288] +for window_size in sweep_range: + test_name = "sparsity_run_ma_window_size" + str(window_size) + + start_time = datetime.datetime.now() + log_dir= test_name + "_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + + os.makedirs("runlog/" + log_dir) + log_file="runlog/" + log_dir + "/log.txt" + + regsys_cmd="python3 -u imagenet_main.py --output_path " + "runlog/" + log_dir + " --ma_window_size " + str(window_size) + " --gpu " + str(args.gpu) + + with open(log_file, "w") as log_fp: + log_fp.write(regsys_cmd + '\n') + + os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) +''' +''' +for model_name in ["resnet18"]: + test_name = "{}_sparsity_run_50k".format(model_name) + + start_time = datetime.datetime.now() + log_dir= test_name + "_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + + os.makedirs("runlog/" + log_dir) + log_file="runlog/" + log_dir + "/log.txt" + + regsys_cmd="python3 -u imagenet_main.py --output_path " + "runlog/" + log_dir + " --gpu " + str(args.gpu) + " -a " + model_name + f" --data /data/imagenet -b 4" + + with open(log_file, "w") as log_fp: + log_fp.write(regsys_cmd + '\n') + + os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) +''' + +def relu_run(args): + + sweep_range = [0.1, 0.15, 0.2] + for model_name in ["resnet18"]: + for relu_threshold in sweep_range: + test_name = model_name + "_sparsity_run_50K_relu_" + str(relu_threshold) + + start_time = datetime.datetime.now() + log_dir= test_name + "_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + + os.makedirs("runlog/" + log_dir) + log_file="runlog/" + log_dir + "/log.txt" + + regsys_cmd="python imagenet_main.py --calibration-size 50000 --output_path " + "runlog/" + log_dir + " --relu_threshold " + str(relu_threshold) + " --gpu " + str(args.gpu) + \ + " --optimised_config_path " + args.optimised_config_path + " --platform_path " + args.platform_path + " --model_path " + args.model_path + " --accuracy_output " + args.accuracy_output + + with open(log_file, "w") as log_fp: + log_fp.write(regsys_cmd + '\n') + os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) + + +if __name__ == "__main__": + args = parser.parse_args() + relu_run(args) + # models_run(args) + + + diff --git a/relu_utils.py b/relu_utils.py new file mode 100644 index 0000000..a355773 --- /dev/null +++ b/relu_utils.py @@ -0,0 +1,74 @@ +from torch import nn +import utils +from utils import * +import fpgaconvnet.tools.graphs as graphs +from fpgaconvnet.tools.layer_enum import LAYER_TYPE +import os +import torch + +class VariableReLUWrapper(nn.Module): + def __init__(self, relu_threshold, relu6=False): + super(VariableReLUWrapper, self).__init__() + + self.threshold = relu_threshold + self.relu6 = relu6 + + def forward(self, x): + if self.relu6: + x = torch.clip(x, max = 6) + return torch.where(x > self.threshold, x, 0.0) + else: + return torch.where(x > self.threshold, x, 0.0) + +def replace_layer_with_variable_relu(model, layer_name, threshold=0): + + replace_dict = {} + for name, module in model.named_modules(): + if (isinstance(module, nn.ReLU) or isinstance(module, nn.ReLU6)) and name == layer_name:#or isinstance(module, nn.Linear): + relu6 = isinstance(module, nn.ReLU6) + new_module = VariableReLUWrapper(threshold, relu6=relu6) + replace_dict[module] = new_module + + utils.replace_modules(model, replace_dict) + +def replace_with_variable_relu(model, threshold=0): + + replace_dict = {} + relu_thresholds = {} + + if isinstance(threshold, dict): + for name, module in model.named_modules(): + if isinstance(module, nn.ReLU) or isinstance(module, nn.ReLU6): + new_module = VariableReLUWrapper(threshold[name]) + replace_dict[module] = new_module + + else: + for name, module in model.named_modules(): + if isinstance(module, nn.ReLU):#or isinstance(module, nn.Linear): + new_module = VariableReLUWrapper(threshold) + replace_dict[module] = new_module + relu_thresholds[name] = threshold + + + for name, module in model.named_modules(): + if name in relu_thresholds: + new_module = VariableReLUWrapper(threshold) + replace_dict[module] = new_module + elif isinstance(module, VariableReLUWrapper): + new_module = VariableReLUWrapper(threshold) + replace_dict[module] = new_module + + utils.replace_modules(model, replace_dict) + return relu_thresholds + # for name, module in model.named_modules(): + # print(type(module)) + +def output_accuracy_to_csv(arch, relu_threshold, top1, top5, sparsity, output_path): + if not os.path.isfile(output_path): + with open(output_path, mode='w') as f: + row = "Network,ReLU_Threshold,Top1_Accuracy,Top5_Accuracy,Network_Sparsity\n" + f.write(row) + with open(output_path, mode='a') as f: + row = ','.join([arch, str(relu_threshold), top1, top5, str(sparsity)]) + "\n" + print("Writing to csv") + f.write(row) \ No newline at end of file diff --git a/run.py b/run.py index 628f05e..be5bc5b 100644 --- a/run.py +++ b/run.py @@ -26,7 +26,7 @@ os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) ''' - +''' for model_name in ["resnet18"]: test_name = "{}_sparsity_run_50k".format(model_name) @@ -42,3 +42,49 @@ log_fp.write(regsys_cmd + '\n') os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) +''' + +def relu_run(args): + sweep_range = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10] + for model_name in ["resnet18"]: + for relu_threshold in sweep_range: + test_name = model_name + "_sparsity_run_50K_relu_" + str(relu_threshold) + + start_time = datetime.datetime.now() + log_dir= test_name + "_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + + os.makedirs("runlog/" + log_dir) + log_file="runlog/" + log_dir + "/log.txt" + + regsys_cmd="python imagenet_main.py --calibration-size 50000 --output_path " + "runlog/" + log_dir + " --relu_threshold " + str(relu_threshold) + " --gpu " + str(args.gpu) + + with open(log_file, "w") as log_fp: + log_fp.write(regsys_cmd + '\n') + os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) + +def models_run(args): + sweep_range = [0] #, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10] + for model_name in ["mobilenet_v2"]: + for relu_threshold in sweep_range: + test_name = model_name + "_sparsity_run_50K_relu_" + str(relu_threshold) + + start_time = datetime.datetime.now() + log_dir= test_name + "_" + str(start_time).replace(" ","_").replace(".","_").replace(":","_").replace("-", "_") + + os.makedirs("runlog/" + log_dir) + log_file="runlog/" + log_dir + "/log.txt" + + regsys_cmd="python imagenet_main.py --calibration-size 50000 --output_path " + "runlog/" + log_dir + " --relu_threshold " + str(relu_threshold) + " --gpu " + str(args.gpu) + " --arch " + str(model_name) + if (model_name != "resnet18" and model_name != "mobilenet_v2"): + regsys_cmd += " --batch-size 16" + with open(log_file, "w") as log_fp: + log_fp.write(regsys_cmd + '\n') + os.system(regsys_cmd + " 2>&1 | tee -a " + log_file) + +if __name__ == "__main__": + args = parser.parse_args() + # relu_run(args) + models_run(args) + + + diff --git a/run_annotations.py b/run_annotations.py new file mode 100644 index 0000000..d2b0552 --- /dev/null +++ b/run_annotations.py @@ -0,0 +1,10 @@ +import os +import glob + +if __name__ == "__main__": + for model in ["resnet18", "resnet50", "vgg11", "vgg16", "alexnet", "mobilenet_v2"]: + data = "runlog/" + model + "_sparsity_run_50K_relu_0_*" + dir = glob.glob(data)[0] + dense = "../fpgaconvnet-optimiser/fpgaconvnet/optimiser/onnx_models/" + model + ".onnx" + sparse = "../fpgaconvnet-optimiser/fpgaconvnet/optimiser/onnx_models/" + model + "_full.onnx" + os.system("python onnx_sparsity_attribute_full.py --arch " + model + " --data " + dir + " --dense_onnx_path " + dense + " --sparse_onnx_path " + sparse) \ No newline at end of file diff --git a/sparsity_utils.py b/sparsity_utils.py index 696492e..dfe57ec 100644 --- a/sparsity_utils.py +++ b/sparsity_utils.py @@ -19,24 +19,25 @@ def output_sparsity_to_csv(model_name, model, output_dir): if isinstance(module, VanillaConvolutionWrapper): if bFirst: bFirst = False - with open(file_path, mode='a') as f: + with open(file_path, mode='w') as f: csv_writer = csv.writer(f) csv_header = ["Layer Name", "Layer Type"] - csv_header += ["KERNEL*KERNEL", "Avg Zeros", "Avg Sparsity"] + csv_header += ["KERNEL*KERNEL", "Avg Zeros", "Avg Sparsity", "Avg Window Sparsity"] csv_writer.writerow(csv_header) with open(file_path, mode='a') as f: csv_writer = csv.writer(f) new_row = [name, type(module)] - new_row += [module.kk, module.statistics.mean.mean().item(), module.statistics.mean.mean().item()/module.kk] + new_row += [module.kk, module.statistics.mean.mean().item(), module.statistics.mean.mean().item()/module.kk, module.statistics.histograms.sum(axis = 0)[-1]/module.statistics.histograms.sum()] csv_writer.writerow(new_row) np.save(os.path.join(output_dir,"{}_{}_mean.npy".format(model_name, name)), module.statistics.mean.cpu().numpy()) np.save(os.path.join(output_dir,"{}_{}_var.npy".format(model_name, name)), module.statistics.var.cpu().numpy()) np.save(os.path.join(output_dir,"{}_{}_correlation.npy".format(model_name, name)), module.statistics.cor.cpu().numpy()) - #np.save(os.path.join(output_dir,"{}_{}_sparsity.npy".format(model_name, name)), module.statistics.sparsity) + # np.save(os.path.join(output_dir,"{}_{}_sparsity.npy".format(model_name, name)), module.statistics.sparsity) + np.save(os.path.join(output_dir,"{}_{}_histograms.npy".format(model_name, name)), module.statistics.histograms.cpu().numpy()) # np.savetxt(os.path.join(output_dir,"{}_{}_mean.csv".format(model_name, name)), module.statistics.mean.cpu().numpy(), delimiter=",") # np.savetxt(os.path.join(output_dir,"{}_{}_var.csv".format(model_name, name)), module.statistics.var.cpu().numpy(), delimiter=",") # np.savetxt(os.path.join(output_dir,"{}_{}_correlation.csv".format(model_name, name)), module.statistics.cor.cpu().numpy(), delimiter=",") @@ -50,8 +51,9 @@ def output_sparsity_to_csv(model_name, model, output_dir): np.savetxt(os.path.join(output_dir,"{}_{}_ma_var.csv".format(model_name, name)), module.ma_statistics.var.cpu().numpy(), delimiter=",") np.savetxt(os.path.join(output_dir,"{}_{}_ma_correaltion.csv".format(model_name, name)), module.ma_statistics.cor.cpu().numpy(), delimiter=",") + class StreamDataAnalyser(): - def __init__(self, stream_num): + def __init__(self, in_channels): self.count = 0 self.stream_num = stream_num self.mean = torch.zeros(stream_num) @@ -71,9 +73,9 @@ def update(self, newValues): self.var = self.var * self.count self.cov = self.cov * (self.count - 1) - #self.sparsity = np.vstack((self.sparsity, newValues.clone().cpu().numpy())) + # self.sparsity = np.vstack((self.sparsity, newValues.clone().cpu().numpy())) - assert newValues.size()[1] == self.stream_num + assert newValues.size()[1] == self.in_channels self.count += newValues.size()[0] # newvalues - oldMean @@ -90,11 +92,26 @@ def update(self, newValues): self.cov = self.cov / (self.count - 1) self.cor = self.cov / torch.sqrt(torch.matmul(self.var.unsqueeze(1), self.var.unsqueeze(0))) * (self.count-1) / self.count +def total_network_sparsity(model): + ops = [] + sparsity = [] + for name, module in model.named_modules(): + if isinstance(module, VanillaConvolutionWrapper): + sparsity.append(module.statistics.mean.mean().item()/module.kk) + ops.append(module.ops) + + ops = np.array(ops) + sparsity = np.array(sparsity) + ops = ops/ops.sum() + return (sparsity * ops).sum() + + def moving_average(a, n): ret = torch.cumsum(a, dim=0) ret[n:] = ret[n:] - ret[:-n] return ret[n - 1:] / n + class VanillaConvolutionWrapper(nn.Module): def __init__(self, conv_module): super(VanillaConvolutionWrapper, self).__init__() @@ -107,15 +124,21 @@ def forward(self, x): # compared with MASE implementation # differences are: 1) torch.nn.Unfold 2) random sample patches - #with open(f"input.dat", 'w') as f: - # f.write("\n".join([ str(i) for i in x.clone().cpu().numpy().reshape(-1).tolist() ])) + #Write data to a file + # with open(f"input.dat", 'w') as f: + # f.write("\n".join([ str(i) for i in x.clone().cpu().numpy().reshape(-1).tolist() ])) # https://discuss.pytorch.org/t/make-custom-conv2d-layer-efficient-wrt-speed-and-memory/70175 assert self.conv_module.padding_mode == 'zeros' + #Zero-pad x x_padded = F.pad(input=x, pad=self.conv_module._reversed_padding_repeated_twice, mode='constant', value=0) dh, dw = self.conv_module.stride + + #Number of filter, number of channels, kernel height, kernel width out_channels, in_channels, kh, kw = self.conv_module.weight.shape + + groups = self.conv_module.groups in_channels *= groups batch_size = x.shape[0] @@ -123,11 +146,19 @@ def forward(self, x): patches = x_padded.unfold(2, kh, dh).unfold(3, kw, dw) h_windows = patches.shape[2] w_windows = patches.shape[3] - patches = patches.expand(out_channels//groups, *patches.shape) - patches = patches.permute(1, 3, 4, 0, 2, 5, 6) - num_of_elements = torch.numel(patches) + patches = patches.expand(out_channels//groups, *patches.shape) # dims = (out_channels//groups, batch_size, in_channels, h_windows, w_windows, kh, kw) + patches = patches.permute(1, 3, 4, 0, 2, 5, 6) # dims = ( batch_size, h_windows, w_windows, out_channels//groups, in_channels, kh, kw) + self.ops = h_windows * w_windows * out_channels * in_channels * kh * kw + + # num_of_elements = torch.numel(patches) + if (self.statistics.histograms == None): + #NOTE: Toggle the commenting for the following 2 lines for per window + # self.statistics.histograms = torch.zeros(in_channels//groups, h_windows, w_windows, self.kk + 1) + self.statistics.histograms = torch.zeros(in_channels//groups, self.kk + 1) + + if torch.cuda.is_available(): + self.statistics.histograms = self.statistics.histograms.cuda() - num_of_nonzeros = 0 y = torch.zeros((batch_size, h_windows, w_windows, out_channels)) if torch.cuda.is_available(): y = y.cuda() @@ -146,15 +177,38 @@ def forward(self, x): wend = (wi+1) * (w_windows // self.roll_factor) patch = patches[:,hstart:hend,wstart:wend].reshape((batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, out_channels//groups, groups, in_channels//groups, kh, kw)) - patch = patch.permute(0, 1, 2, 4, 3, 5, 6, 7) + patch = patch.permute(0, 1, 2, 4, 3, 5, 6, 7) #(batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, groups, out_channels//groups, in_channels//groups, kh, kw) weight = self.conv_module.weight.reshape((groups, out_channels//groups, in_channels//groups, kh, kw)) patch = patch * weight + #----------------Zero Histogram Calculation and Update----------------------- + #Patches Dims: (batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, groups, out_channels//groups, in_channels//groups, kh, kw) + #Histograms Dims: (in_channels, h_windows, w_windows, self.kk) + tmp = patch.reshape((*patch.shape[:-2], self.kk)) + + num_of_zeros = self.kk - torch.count_nonzero(tmp, dim = -1) # (batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, groups, out_channels//groups, in_channels//groups) + + + zeros_hists = F.one_hot(num_of_zeros, num_classes = self.kk + 1) # (batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, groups, out_channels//groups, in_channels//groups, bins) + + #All out_channels have the input feature map and therefore same sparsity, can squeeze those dimensions + zeros_hists = zeros_hists[:, :, :, :, 0].squeeze(4) # (batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, groups, in_channels//groups, bins) + + #NOTE: Toggle the commenting for the following 5 lines for per window + zeros_hists = zeros_hists.reshape(batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, in_channels, self.kk + 1) + zeros_hists = zeros_hists.sum(dim = (0, 1, 2)) # (in_channels, bins) + self.statistics.histograms += zeros_hists + # zeros_hists = zeros_hists.sum(dim = 0) # (h_windows//self.roll_factor, w_windows//self.roll_factor, in_channels//groups, bins) + # zeros_hists = zeros_hists.permute(2, 0, 1, 3) # (in_channels//groups, h_windows//self.roll_factor, w_windows//self.roll_factor, bins) + # self.statistics.histograms[:,hstart:hend,wstart:wend, :] += zeros_hists + + #------------------------Average sparsity calculate and update---------------------------------- tmp = patch.reshape((-1, self.kk)) num_of_zeros = self.kk - torch.count_nonzero(tmp, dim=1) num_of_zeros = num_of_zeros.reshape((-1, self.conv_module.in_channels)) self.statistics.update(num_of_zeros) + #-----------------------MA Statistics----------------------- if self.ma_statistics is not None: if self.ma_data_buffer is None: self.ma_data_buffer = num_of_zeros @@ -168,21 +222,15 @@ def forward(self, x): else: self.ma_data_buffer = self.ma_data_buffer[-(self.ma_window_size-1):] - patch = patch.sum(-1).sum(-1).sum(-1) - patch = patch.reshape(batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, out_channels) + # patch = patch.sum(-1).sum(-1).sum(-1) + # patch = patch.reshape(batch_size, h_windows//self.roll_factor, w_windows//self.roll_factor, out_channels) + + # y[:,hstart:hend,wstart:wend] = patch - y[:,hstart:hend,wstart:wend] = patch - if self.conv_module.bias is not None: - bias = self.conv_module.bias.expand(batch_size, h_windows, w_windows, out_channels) - y = y + bias - y = y.permute(0, 3, 1, 2) + return self.conv_module(x) - if self.run_reference: - ref_output = self.conv_module(x) - assert torch.allclose(ref_output, y, atol=1e-5) - return y def replace_with_vanilla_convolution(model, window_size=None): replace_dict = {} @@ -204,3 +252,6 @@ def replace_with_vanilla_convolution(model, window_size=None): conv_layer_index += 1 replace_modules(model, replace_dict) + + + diff --git a/visualise/visualise_acc_vs_throughput.py b/visualise/visualise_acc_vs_throughput.py new file mode 100644 index 0000000..6f63762 --- /dev/null +++ b/visualise/visualise_acc_vs_throughput.py @@ -0,0 +1,34 @@ +import os +import argparse +import pandas as pd +import matplotlib.pyplot as plt + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--filepath", default = None, type = str, + help = "Path to .csv file with accuracy and throughput") + parser.add_argument("--output", default = None, type = str, + help = "Path to .png file to save to") + + args = parser.parse_args() + + data = pd.read_csv(args.filepath) + + fig, ax1 = plt.subplots() + fig.set_figheight(9) + fig.set_figwidth(16) + ax2 = ax1.twinx() + + ax1.plot(data["ReLU_Threshold"], data["Top5_Accuracy"], label="Accuracy") + ax1.axhline(data["Top5_Accuracy"].max() - 1, color='k', linestyle='dashed', linewidth=1, label="Accuracy Loss = 1%") + ax2.plot(data["ReLU_Threshold"], data["Throughput"]/(data["Throughput"].min()), label="Throughput", color = 'r') + + + fig.suptitle('Overview of relu thresholding for resnet18') + ax1.set(xlabel = "Relu Threshold", ylabel = "Accuracy") + ax2.set(xlabel = "Relu Threshold", ylabel = "Normalised Throughput") + ax1.legend(loc = "best") + ax2.legend(loc = "best") + + fig.savefig(args.output) \ No newline at end of file diff --git a/visualise/visualise_all_networks.py b/visualise/visualise_all_networks.py new file mode 100644 index 0000000..a16c64a --- /dev/null +++ b/visualise/visualise_all_networks.py @@ -0,0 +1,12 @@ +import os +from visualise_network_sparsity import visualise_network +import argparse + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--data", metavar='DIR', required = True, help = "directory with sparsity stats of all networks to be visualised") + + args = parser.parse_args() + for sparsity_data in os.listdir(args.data): + print("Visualing network:", sparsity_data) + visualise_network(os.path.join(args.data, sparsity_data)) diff --git a/visualise/visualise_layer_sparsity.py b/visualise/visualise_layer_sparsity.py new file mode 100644 index 0000000..d608ef8 --- /dev/null +++ b/visualise/visualise_layer_sparsity.py @@ -0,0 +1,70 @@ +import numpy as np +import os +import matplotlib.pyplot as plt +import seaborn as sns + +def plot_avg_channel_sparsity_distribution(data, ax): + ax.hist(data) + ax.axvline(data.mean(), color='k', linestyle='dashed', linewidth=1, label="mean = "+str(data.mean())) + ax.set_title("Histogram of avg sparsity of channels") + ax.set(xlabel = "Sparsity", ylabel = "Number of Channels") + ax.legend(loc = "upper right") + +def plot_channel_sparsity_avg_histograms(data, ax): + avg_hist = data.mean(axis = 0) + ax.bar(list(range(len(avg_hist))), avg_hist) + + hist_sum = 0 + for i in range(len(avg_hist)): + hist_sum += avg_hist[i]*i + hist_sum /= sum(avg_hist) + + ax.axvline(hist_sum, color='k', linestyle='dashed', linewidth=1, label="mean "+str(hist_sum)) + ax.set_title("Histogram of avg number of zeros per window") + ax.set(xlabel = "Number of zeros in window", ylabel = "Number of windows") + ax.legend(loc = "upper right") + +def plot_channel_sparsity_correlation_heatmap(data, ax): + sns.heatmap(data[:32, :32], annot = True) + ax.set_title("Heatmap of correlation between channel (reduced to 32 channels)") + ax.set(xlabel = "Channels", ylabel = "Channels") + ax.legend(loc = "upper right") + +def plot_channel_sparsity_correlation_histogram(data, ax): + flattened_corr_data = data.flatten() + flattened_corr_data = flattened_corr_data[flattened_corr_data != 1]/2 + + ax.hist(flattened_corr_data, bins=20) + ax.axvline(flattened_corr_data.mean(), color='k', linestyle='dashed', linewidth=1, label="mean = "+str(flattened_corr_data.mean())) + ax.set_title("Histogram of correlation of channels") + ax.set(xlabel = "Correlation", ylabel = "Number of Channels Pairs") + ax.legend(loc = "upper right") + +def visualise_layer(corr_data, hist_data, output_path): + corr_data[np.abs(corr_data) == np.Inf] = 1 + corr_data[np.abs(corr_data) == np.NaN] = 0 + corr_data[np.abs(corr_data) == np.nan] = 0 + corr_data[np.abs(corr_data) == np.NAN] = 0 + fig, ax = plt.subplots(3) + fig.set_figheight(20) + fig.set_figwidth(20) + fig.suptitle('Sparsity statistics for ' + output_path.split("/")[-1][:-4], fontsize = 16) + try: + plot_channel_sparsity_avg_histograms(hist_data, ax[0]) + plot_channel_sparsity_correlation_histogram(corr_data, ax[1]) + plot_channel_sparsity_correlation_heatmap(corr_data, ax[2]) + print("Saving in output path", output_path) + fig.savefig(output_path) + except: + pass + +if __name__ == "__main__": + layer_dir = "/home/ka720/sparseCNN/runlog/resnet18_sparsity_run_50K_relu_0_2023_04_05_13_22_35_310476" + corr_data = np.load(os.path.join(layer_dir, "resnet18_layer2.0.conv2.1_correlation.npy")) + hist_data = np.load(os.path.join(layer_dir, "resnet18_layer2.0.conv2.1_histograms.npy")) + # print(corr_data) + # corr_data[np.abs(corr_data) == np.Inf] = 1 + + # print(corr_data) + # output = "Generic.png" + # visualise_layer(corr_data, hist_data, output) diff --git a/visualise/visualise_network_sparsity.py b/visualise/visualise_network_sparsity.py new file mode 100644 index 0000000..ddc7413 --- /dev/null +++ b/visualise/visualise_network_sparsity.py @@ -0,0 +1,87 @@ +import argparse +import glob +import os +import matplotlib.pyplot as plt +import numpy as np +from visualise_layer_sparsity import visualise_layer + + + +def visualise_network(sparsity_data = None): + + + data_spec = "_".join(sparsity_data.split("/")[2].split("_")[:-7]) + output_dir = os.path.join("figures", data_spec) + mean_list = glob.glob(os.path.join(sparsity_data, "*_mean.npy")) + corr_list = glob.glob(os.path.join(sparsity_data, "*_correlation.npy")) + hist_list = glob.glob(os.path.join(sparsity_data, "*_histograms.npy")) + data_list = zip(corr_list, hist_list) + + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + + output_path = os.path.join(output_dir, "layer_level_overview.png") + + try: + network_overview(mean_list, corr_list, hist_list, data_spec, output_path) + except: + pass + for corr_file, hist_file in data_list: + output_path = os.path.join(output_dir, corr_file.split("/")[-1][:-16] + ".png") + corr_data = np.load(corr_file) + hist_data = np.load(hist_file) + visualise_layer(corr_data, hist_data, output_path) + + +def network_overview(mean_list, corr_list, hist_list, data_spec, output_path): + def plot_mean_per_layer(mean_data, layer_names, ax): + ax.bar(layer_names, mean_data) + ax.set_title("Average mean sparsity per layer") + ax.set(xlabel = "Layer name", ylabel = "Average Sparsity") + def plot_corr_per_layer(corr_data, layer_names, ax): + ax.bar(layer_names, corr_data) + ax.set_title("Average Sparsity correlation per layer") + ax.set(xlabel = "Layer name", ylabel = "Average Correlation") + def plot_non_zeros_per_layer(data, layer_names, ax): + data[data == np.inf] = 0 + data[data == np.nan] = 0 + ax.bar(layer_names, data) + ax.set_title("Average percentage of full zero windows per layer") + ax.set(xlabel = "Layer name", ylabel = "Zero windows proportion") + + mean_layers = np.empty(len(mean_list)) + corr_layers = np.empty(len(corr_list)) + percentage_non_zeros_per_layer = np.empty(len(hist_list)) + layer_names = [] + for index, (mean_file, corr_file, hist_file) in enumerate(zip(mean_list, corr_list, hist_list)): + + mean_data = np.load(mean_file) + corr_data = np.load(corr_file) + hist_data = np.load(hist_file) + corr_data[np.abs(corr_data) == np.Inf] = 1 + corr_data[np.abs(corr_data) == np.NaN] = 0 + mean_layers[index] = mean_data.mean()/(len(hist_data[0]) - 1) + corr_layers[index] = corr_data.mean() + hist_sum = hist_data.sum(axis = 0) + percentage_non_zeros_per_layer[index] = hist_sum[-1]/hist_sum.sum() + print(corr_file) + layer_names.append(corr_file.split("/")[-1].split("_")[1:-1][-1]) + + fig, ax = plt.subplots(3) + fig.set_figheight(20) + fig.set_figwidth(20) + fig.suptitle('Overview of sparsity statistics for ' + data_spec) + plot_mean_per_layer(mean_layers, layer_names, ax[0]) + plot_corr_per_layer(corr_layers, layer_names, ax[1]) + plot_non_zeros_per_layer(percentage_non_zeros_per_layer, layer_names, ax[2]) + print("Saving in output path", output_path) + fig.savefig(output_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sparsity_data", metavar='DIR', required = True, help = "sparsity") + + args = parser.parse_args() + + visualise_network(args.sparsity_data) \ No newline at end of file diff --git a/visualise/visualise_notebook.ipynb b/visualise/visualise_notebook.ipynb new file mode 100644 index 0000000..16e47a1 --- /dev/null +++ b/visualise/visualise_notebook.ipynb @@ -0,0 +1,186 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'seaborn'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/home/ka720/sparseCNN/visualise/visualise_notebook.ipynb Cell 1\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mnumpy\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mnp\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mvisualise_layer_sparsity\u001b[39;00m \u001b[39mimport\u001b[39;00m visualise_layer\n\u001b[1;32m 10\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mvisualise_network\u001b[39m(sparsity_data \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m):\n\u001b[1;32m 12\u001b[0m data_spec \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m_\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(sparsity_data\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m/\u001b[39m\u001b[39m\"\u001b[39m)[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m_\u001b[39m\u001b[39m\"\u001b[39m)[:\u001b[39m-\u001b[39m\u001b[39m7\u001b[39m])\n", + "File \u001b[0;32m~/sparseCNN/visualise/visualise_layer_sparsity.py:4\u001b[0m, in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mmatplotlib\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mpyplot\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mplt\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mseaborn\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39msns\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mplot_avg_channel_sparsity_distribution\u001b[39m(data, ax):\n\u001b[1;32m 7\u001b[0m ax\u001b[39m.\u001b[39mhist(data)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'seaborn'" + ] + } + ], + "source": [ + "import argparse\n", + "import glob\n", + "import os\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from visualise_layer_sparsity import visualise_layer\n", + "\n", + "\n", + "\n", + "def visualise_network(sparsity_data = None):\n", + "\n", + " data_spec = \"_\".join(sparsity_data.split(\"/\")[-1].split(\"_\")[:-7])\n", + " output_dir = os.path.join(\"figures\", data_spec)\n", + " mean_list = glob.glob(os.path.join(sparsity_data, \"*_mean.npy\"))\n", + " corr_list = glob.glob(os.path.join(sparsity_data, \"*_correlation.npy\"))\n", + " hist_list = glob.glob(os.path.join(sparsity_data, \"*_histograms.npy\"))\n", + " data_list = zip(corr_list, hist_list)\n", + "\n", + " if not os.path.isdir(output_dir):\n", + " os.makedirs(output_dir)\n", + "\n", + " output_path = os.path.join(output_dir, \"layer_level_overview.png\")\n", + "\n", + " network_overview(mean_list, corr_list, hist_list, data_spec, output_path)\n", + "\n", + " for corr_file, hist_file in data_list:\n", + " output_path = os.path.join(output_dir, corr_file.split(\"/\")[-1][:-16] + \".png\")\n", + " corr_data = np.load(corr_file)\n", + " hist_data = np.load(hist_file)\n", + " visualise_layer(corr_data, hist_data, output_path)\n", + "\n", + "\n", + "def network_overview(mean_list, corr_list, hist_list, data_spec, output_path):\n", + " def plot_mean_per_layer(mean_data, layer_names, ax):\n", + " ax.bar(layer_names, mean_data)\n", + " ax.set_title(\"Average mean sparsity per layer\")\n", + " ax.set(xlabel = \"Layer name\", ylabel = \"Average Sparsity\")\n", + " def plot_corr_per_layer(corr_data, layer_names, ax):\n", + " ax.bar(layer_names, corr_data)\n", + " ax.set_title(\"Average apRSITY correlation per layer\")\n", + " ax.set(xlabel = \"Layer name\", ylabel = \"Average Correlation\")\n", + " def plot_non_zeros_per_layer(data, layer_names, ax):\n", + " data[data == np.inf] = 0\n", + " data[data == np.nan] = 0\n", + " ax.bar(layer_names, data)\n", + " ax.set_title(\"Average percentage of full zero windows per layer\")\n", + " ax.set(xlabel = \"Layer name\", ylabel = \"Zero windows (%)\")\n", + "\n", + " mean_layers = np.empty(len(mean_list))\n", + " corr_layers = np.empty(len(corr_list))\n", + " percentage_non_zeros_per_layer = np.empty(len(hist_list))\n", + " layer_names = []\n", + " for index, (mean_file, corr_file, hist_file) in enumerate(zip(mean_list, corr_list, hist_list)):\n", + "\n", + " mean_data = np.load(mean_file)\n", + " corr_data = np.load(corr_file)\n", + " hist_data = np.load(hist_file)\n", + " corr_data[np.abs(corr_data) == np.Inf] = 1\n", + " corr_data[np.abs(corr_data) == np.NaN] = 0\n", + " mean_layers[index] = mean_data.mean()/(len(hist_data[0]) - 1)\n", + " corr_layers[index] = corr_data.mean()\n", + " hist_sum = hist_data.sum(axis = 0)\n", + " percentage_non_zeros_per_layer[index] = hist_sum[-1]/hist_sum.sum()\n", + " layer_names.append(corr_file.split(\"/\")[-1].split(\"_\")[1:-1][0])\n", + "\n", + " fig, ax = plt.subplots(3)\n", + " fig.set_figheight(20)\n", + " fig.set_figwidth(20)\n", + " fig.suptitle('Overview of sparsity statistics for ' + data_spec)\n", + " plot_mean_per_layer(mean_layers, layer_names, ax[0])\n", + " plot_corr_per_layer(corr_layers, layer_names, ax[1])\n", + " plot_non_zeros_per_layer(percentage_non_zeros_per_layer, layer_names, ax[2])\n", + " print(\"Saving in output path\", output_path)\n", + " fig.savefig(output_path)\n", + "\n", + "\n", + "\n", + "if __name__ == \"__main__\":\n", + " sparsity_data = \"../runlog/resnet18_sparsity_run_50K_relu_0.01_2023_04_05_13_22_12_846615\"\n", + " data_spec = \"_\".join(sparsity_data.split(\"/\")[-1].split(\"_\")[:-7])\n", + " output_dir = os.path.join(\"figures\", data_spec)\n", + " mean_list = glob.glob(os.path.join(sparsity_data, \"*_mean.npy\"))\n", + " corr_list = glob.glob(os.path.join(sparsity_data, \"*_correlation.npy\"))\n", + " hist_list = glob.glob(os.path.join(sparsity_data, \"*_histograms.npy\"))\n", + " data_list = zip(corr_list, hist_list)\n", + "\n", + " if not os.path.isdir(output_dir):\n", + " os.makedirs(output_dir)\n", + "\n", + " output_path = os.path.join(output_dir, \"layer_level_overview.png\")\n", + "\n", + " network_overview(mean_list, corr_list, hist_list, data_spec, output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.read_csv(\"../runlog/resnet18_accuracy_var_relu.csv\")\n", + "\n", + "fig, ax1 = plt.subplots()\n", + "fig.set_figheight(9)\n", + "fig.set_figwidth(16)\n", + "ax2 = ax1.twinx()\n", + "\n", + "ax1.plot(data[\"ReLU_Threshold\"], data[\"Top5_Accuracy\"], label=\"Accuracy\")\n", + "ax1.axhline(data[\"Top5_Accuracy\"].max() - 1, color='k', linestyle='dashed', linewidth=1, label=\"Accuracy Loss = 1%\")\n", + "ax2.plot(data[\"ReLU_Threshold\"], data[\"Throughput\"]/(data[\"Throughput\"].min()), label=\"Throughput\", color = 'r')\n", + "\n", + "\n", + "fig.suptitle('Overview of relu thresholding for resnet18')\n", + "ax1.set(xlabel = \"Relu Threshold\", ylabel = \"Accuracy\")\n", + "ax2.set(xlabel = \"Relu Threshold\", ylabel = \"Normalised Throughput\")\n", + "ax1.legend(loc = \"best\")\n", + "ax2.legend(loc = \"best\")\n", + "\n", + "fig.savefig(\"figures/resnet18_relu_acc_vs_through.png\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "sparseCNN", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}