From 9b69e9d331fd54d37641b8c62c56c0f975613796 Mon Sep 17 00:00:00 2001
From: Chen Pin-han <72907153+sifa1024@users.noreply.github.com>
Date: Tue, 5 Mar 2024 18:30:04 +0800
Subject: [PATCH 1/3] Update architect.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

72907153+sifa1024@users.noreply.github.com

Signed-off-by: Chen Pin-Han <72907153+sifa1024​@users.noreply.github.com>
---
 .../darts-cnn-cifar10/architect.py            | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
index c3d649a11db..afe775237c9 100644
--- a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
+++ b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
@@ -46,6 +46,12 @@ def virtual_step(self, train_x, train_y, xi, w_optim):
         # Compute gradient
         gradients = torch.autograd.grad(loss, self.model.getWeights())
 
+        # Check device use cuda or cpu 
+        use_cuda = list(range(torch.cuda.device_count()))
+        if use_cuda:
+            print("Using CUDA")
+        device = torch.device("cuda" if use_cuda else "cpu")
+        
         # Do virtual step (Update gradient)
         # Below operations do not need gradient tracking
         with torch.no_grad():
@@ -53,7 +59,10 @@ def virtual_step(self, train_x, train_y, xi, w_optim):
             # be iterated also.
             for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients):
                 m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum
-                vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
+                if(device == 'cuda'):
+                    vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
+                elif(device == 'cpu'):
+                    vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
 
             # Sync alphas
             for a, va in zip(self.model.getAlphas(), self.v_model.getAlphas()):
@@ -72,6 +81,12 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
         # Loss for validation with w'. L_valid(w')
         loss = self.v_model.loss(valid_x, valid_y)
 
+        # Check device use cuda or cpu 
+        use_cuda = list(range(torch.cuda.device_count()))
+        if use_cuda:
+            print("Using CUDA")
+        device = torch.device("cuda" if use_cuda else "cpu")
+        
         # Calculate gradient
         v_alphas = tuple(self.v_model.getAlphas())
         v_weights = tuple(self.v_model.getWeights())
@@ -85,7 +100,10 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
         # Update final gradient = dalpha - xi * hessian
         with torch.no_grad():
             for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian):
-                alpha.grad = da - torch.FloatTensor(xi) * h
+                if(device == 'cuda'):
+                    alpha.grad = da - torch.cuda.FloatTensor(xi) * h
+                elif(device == 'cpu'):
+                    alpha.grad = da - torch.cpu.FloatTensor(xi) * h
 
     def compute_hessian(self, dws, train_x, train_y):
         """

From 4f3ef04c97e595f51ec5ddf1fb173514eb302c1e Mon Sep 17 00:00:00 2001
From: Chen Pin-han <72907153+sifa1024@users.noreply.github.com>
Date: Wed, 6 Mar 2024 21:59:17 +0800
Subject: [PATCH 2/3] Update run_trial.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

72907153+sifa1024@users.noreply.github.com

Signed-off-by: Chen Pin-Han <72907153+sifa1024​@users.noreply.github.com>
---
 examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py b/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py
index ceb74dfc5e3..a9836d240cd 100644
--- a/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py
+++ b/examples/v1beta1/trial-images/darts-cnn-cifar10/run_trial.py
@@ -140,7 +140,7 @@ def main():
         num_epochs,
         eta_min=w_lr_min)
 
-    architect = Architect(model, w_momentum, w_weight_decay)
+    architect = Architect(model, w_momentum, w_weight_decay, device)
 
     # Start training
     best_top1 = 0.

From bbb9f3e4adf7aa46cd1e5800287149ba0997ce80 Mon Sep 17 00:00:00 2001
From: Chen Pin-han <72907153+sifa1024@users.noreply.github.com>
Date: Wed, 6 Mar 2024 21:59:23 +0800
Subject: [PATCH 3/3] Update architect.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

72907153+sifa1024@users.noreply.github.com

Signed-off-by: Chen Pin-Han <72907153+sifa1024​@users.noreply.github.com>
---
 .../darts-cnn-cifar10/architect.py            | 24 ++++++-------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
index afe775237c9..c449a6a1fe9 100644
--- a/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
+++ b/examples/v1beta1/trial-images/darts-cnn-cifar10/architect.py
@@ -20,11 +20,12 @@ class Architect():
     """" Architect controls architecture of cell by computing gradients of alphas
     """
 
-    def __init__(self, model, w_momentum, w_weight_decay):
+    def __init__(self, model, w_momentum, w_weight_decay, device):
         self.model = model
         self.v_model = copy.deepcopy(model)
         self.w_momentum = w_momentum
         self.w_weight_decay = w_weight_decay
+        self.device = device
 
     def virtual_step(self, train_x, train_y, xi, w_optim):
         """
@@ -43,14 +44,9 @@ def virtual_step(self, train_x, train_y, xi, w_optim):
         # Forward and calculate loss
         # Loss for train with w. L_train(w)
         loss = self.model.loss(train_x, train_y)
+
         # Compute gradient
         gradients = torch.autograd.grad(loss, self.model.getWeights())
-
-        # Check device use cuda or cpu 
-        use_cuda = list(range(torch.cuda.device_count()))
-        if use_cuda:
-            print("Using CUDA")
-        device = torch.device("cuda" if use_cuda else "cpu")
         
         # Do virtual step (Update gradient)
         # Below operations do not need gradient tracking
@@ -59,9 +55,9 @@ def virtual_step(self, train_x, train_y, xi, w_optim):
             # be iterated also.
             for w, vw, g in zip(self.model.getWeights(), self.v_model.getWeights(), gradients):
                 m = w_optim.state[w].get("momentum_buffer", 0.) * self.w_momentum
-                if(device == 'cuda'):
+                if(self.device == 'cuda'):
                     vw.copy_(w - torch.cuda.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
-                elif(device == 'cpu'):
+                elif(self.device == 'cpu'):
                     vw.copy_(w - torch.FloatTensor(xi) * (m + g + self.w_weight_decay * w))
 
             # Sync alphas
@@ -80,12 +76,6 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
         # Calculate unrolled loss
         # Loss for validation with w'. L_valid(w')
         loss = self.v_model.loss(valid_x, valid_y)
-
-        # Check device use cuda or cpu 
-        use_cuda = list(range(torch.cuda.device_count()))
-        if use_cuda:
-            print("Using CUDA")
-        device = torch.device("cuda" if use_cuda else "cpu")
         
         # Calculate gradient
         v_alphas = tuple(self.v_model.getAlphas())
@@ -100,9 +90,9 @@ def unrolled_backward(self, train_x, train_y, valid_x, valid_y, xi, w_optim):
         # Update final gradient = dalpha - xi * hessian
         with torch.no_grad():
             for alpha, da, h in zip(self.model.getAlphas(), dalpha, hessian):
-                if(device == 'cuda'):
+                if(self.device == 'cuda'):
                     alpha.grad = da - torch.cuda.FloatTensor(xi) * h
-                elif(device == 'cpu'):
+                elif(self.device == 'cpu'):
                     alpha.grad = da - torch.cpu.FloatTensor(xi) * h
 
     def compute_hessian(self, dws, train_x, train_y):