From 9b69e9d331fd54d37641b8c62c56c0f975613796 Mon Sep 17 00:00:00 2001 From: Chen Pin-han <72907153+sifa1024@users.noreply.github.com> Date: Tue, 5 Mar 2024 18:30:04 +0800 Subject: [PATCH 1/3] Update architect.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 72907153+sifa1024@users.noreply.github.com Signed-off-by: Chen Pin-Han <72907153+sifa1024​@users.noreply.github.com> --- .../darts-cnn-cifar10/architect.py | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py index c3d649a11db..afe775237c9 100644 --- a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py +++ b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py @@ -46,6 +46,12 @@ def virtual_step(self, train_x, train_y, xi, w_optim): # Compute gradient gradients = torch.autograd.grad(loss, self.model.getWeights()) + # Check device use cuda or cpu + use_cuda = list(range(torch.cuda.device_count())) + if use_cuda: + print("Using CUDA") + device = torch.device("cuda" if use_cuda else "cpu") + # Do virtual step (Update gradient) # Below operations do not need gradient tracking with torch.no_grad(): @@ -53,7 +59,10 @@ def virtual_step(self, train_x, train_y, xi, w_optim): # be iterated also. for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients): m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum - vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w)) + if(device == 'cuda'): + vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w)) + elif(device == 'cpu'): + vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w)) # Sync alphas for a, va in zip(self.model.getAlphas(), self.v_model.getAlphas()): @@ -72,6 +81,12 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim): # Loss for validation with w'. L_valid(w') loss = self.v_model.loss(valid_x, valid_y) + # Check device use cuda or cpu + use_cuda = list(range(torch.cuda.device_count())) + if use_cuda: + print("Using CUDA") + device = torch.device("cuda" if use_cuda else "cpu") + # Calculate gradient v_alphas = tuple(self.v_model.getAlphas()) v_weights = tuple(self.v_model.getWeights()) @@ -85,7 +100,10 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim): # Update final gradient = dalpha - xi * hessian with torch.no_grad(): for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian): - alpha.grad = da - torch.FloatTensor(xi) * h + if(device == 'cuda'): + alpha.grad = da - torch.cuda.FloatTensor(xi) * h + elif(device == 'cpu'): + alpha.grad = da - torch.cpu.FloatTensor(xi) * h def compute_hessian(self, dws, train_x, train_y): """ From 4f3ef04c97e595f51ec5ddf1fb173514eb302c1e Mon Sep 17 00:00:00 2001 From: Chen Pin-han <72907153+sifa1024@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:59:17 +0800 Subject: [PATCH 2/3] Update run_trial.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 72907153+sifa1024@users.noreply.github.com Signed-off-by: Chen Pin-Han <72907153+sifa1024​@users.noreply.github.com> --- examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py b/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py index ceb74dfc5e3..a9836d240cd 100644 --- a/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py +++ b/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py @@ -140,7 +140,7 @@ def main(): num_epochs, eta_min=w_lr_min) - architect = Architect(model, w_momentum, w_weight_decay) + architect = Architect(model, w_momentum, w_weight_decay, device) # Start training best_top1 = 0. From bbb9f3e4adf7aa46cd1e5800287149ba0997ce80 Mon Sep 17 00:00:00 2001 From: Chen Pin-han <72907153+sifa1024@users.noreply.github.com> Date: Wed, 6 Mar 2024 21:59:23 +0800 Subject: [PATCH 3/3] Update architect.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 72907153+sifa1024@users.noreply.github.com Signed-off-by: Chen Pin-Han <72907153+sifa1024​@users.noreply.github.com> --- .../darts-cnn-cifar10/architect.py | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py index afe775237c9..c449a6a1fe9 100644 --- a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py +++ b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py @@ -20,11 +20,12 @@ class Architect(): """" Architect controls architecture of cell by computing gradients of alphas """ - def __init__(self, model, w_momentum, w_weight_decay): + def __init__(self, model, w_momentum, w_weight_decay, device): self.model = model self.v_model = copy.deepcopy(model) self.w_momentum = w_momentum self.w_weight_decay = w_weight_decay + self.device = device def virtual_step(self, train_x, train_y, xi, w_optim): """ @@ -43,14 +44,9 @@ def virtual_step(self, train_x, train_y, xi, w_optim): # Forward and calculate loss # Loss for train with w. L_train(w) loss = self.model.loss(train_x, train_y) + # Compute gradient gradients = torch.autograd.grad(loss, self.model.getWeights()) - - # Check device use cuda or cpu - use_cuda = list(range(torch.cuda.device_count())) - if use_cuda: - print("Using CUDA") - device = torch.device("cuda" if use_cuda else "cpu") # Do virtual step (Update gradient) # Below operations do not need gradient tracking @@ -59,9 +55,9 @@ def virtual_step(self, train_x, train_y, xi, w_optim): # be iterated also. for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients): m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum - if(device == 'cuda'): + if(self.device == 'cuda'): vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w)) - elif(device == 'cpu'): + elif(self.device == 'cpu'): vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w)) # Sync alphas @@ -80,12 +76,6 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim): # Calculate unrolled loss # Loss for validation with w'. L_valid(w') loss = self.v_model.loss(valid_x, valid_y) - - # Check device use cuda or cpu - use_cuda = list(range(torch.cuda.device_count())) - if use_cuda: - print("Using CUDA") - device = torch.device("cuda" if use_cuda else "cpu") # Calculate gradient v_alphas = tuple(self.v_model.getAlphas()) @@ -100,9 +90,9 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim): # Update final gradient = dalpha - xi * hessian with torch.no_grad(): for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian): - if(device == 'cuda'): + if(self.device == 'cuda'): alpha.grad = da - torch.cuda.FloatTensor(xi) * h - elif(device == 'cpu'): + elif(self.device == 'cpu'): alpha.grad = da - torch.cpu.FloatTensor(xi) * h def compute_hessian(self, dws, train_x, train_y):