eborin · henricsoares · May 22, 2021 · May 22, 2021 · May 24, 2021 · May 24, 2021
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+cifar10/
+*.png
diff --git a/dist_dcgan.py b/dist_dcgan.py
@@ -3,6 +3,7 @@
 import random
 import numpy as np
 import argparse
+import socket
 import time
 import torchvision
 import torchvision.transforms as transforms
@@ -13,8 +14,12 @@
 import torchvision.datasets as dset
 import torchvision.utils as vutils
 
+initTime = time.time()
+curpath= os.path.abspath(os.curdir)
+
 class Generator(nn.Module):
     def __init__(self, nz, ngf, nc):
+
         super(Generator, self).__init__()
         self.main = nn.Sequential(
             # input is Z, going into a convolution
@@ -189,6 +194,8 @@ def main():
         epoch_start_time = time.time()
         print(f"Rank: {rank}, Epoch: {epoch}, Training ...")
         for i, data in enumerate(train_loader):
+            if i >= 20:
+                break
             iteration_start_time = time.time()
             ############################
             # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
@@ -230,6 +237,12 @@ def main():
             optimizerG.step()
 
             iteration_end_time = time.time()-iteration_start_time
+            # initialization time - Henrique
+            global initTime, curpath
+            if i == 0:
+                with open(os.path.join(curpath, (f'experiments/ativ-7-exp-1/results/{socket.gethostname()}-rank{rank}.out')), 'w') as arquivo:
+                    arquivo.write(f"Rank: {rank}, Initialization Time: {(time.time() - initTime):.4f}s.\n")
+
             print(f"[epoch: {epoch}/{argv.num_epochs}][iteration: {i}/{len(train_loader)}][rank: {rank}] " \
                   f"Loss_D: {errD.item():.4f}, Loss_G: {errG.item():.4f}, " \
                   f"D(x): {D_x:.4f}, D(G(z)): {D_G_z1:.4f} / {D_G_z2:.4f}, " \
@@ -240,9 +253,14 @@ def main():
                 fake = netG(fixed_noise)
                 vutils.save_image(fake.detach(), f'{argv.out_folder}/fake_samples_rank_{rank}_epoch_{epoch}_iter_{i}.png', normalize=True)
                 torch.distributed.barrier()
+
+            with open(os.path.join(curpath, (f'experiments/ativ-7-exp-1/results/{socket.gethostname()}-rank{rank}.out')), 'a+') as arquivo:
+                arquivo.write(f"Rank: {rank}, Epoch: {epoch}, Iteration: {i}, Iteration Time: {iteration_end_time:.4f}s, Elapsed Time: {(time.time() - initTime):.4f}s.\n")
 
         epoch_end_time = time.time()-epoch_start_time
         print(f"[rank: {rank}] Epoch {epoch} took: {epoch_end_time:.4f} seconds")
+        with open(os.path.join(curpath, (f'experiments/ativ-7-exp-1/results/{socket.gethostname()}-rank{rank}.out')), 'a+') as arquivo:
+            arquivo.write(f"Rank: {rank}, Epoch: {epoch}, Iteration: {i}, Epoch Time: {epoch_end_time:.4f}s, Elapsed Time: {(time.time() - initTime):.4f}s.\n")
 
     torch.distributed.destroy_process_group()
 

diff --git a/experiments/ativ-7-exp-1/results/m5.large/ip-172-31-72-23-rank0.out b/experiments/ativ-7-exp-1/results/m5.large/ip-172-31-72-23-rank0.out
@@ -0,0 +1,22 @@
+Rank: 0, Initialization Time: 2.2270s.
+Rank: 0, Epoch: 0, Iteration: 0, Iteration Time: 1.4531s, Elapsed Time: 2.5152s.
+Rank: 0, Epoch: 0, Iteration: 1, Iteration Time: 1.3270s, Elapsed Time: 3.8458s.
+Rank: 0, Epoch: 0, Iteration: 2, Iteration Time: 1.3019s, Elapsed Time: 5.1507s.
+Rank: 0, Epoch: 0, Iteration: 3, Iteration Time: 1.2989s, Elapsed Time: 6.4501s.
+Rank: 0, Epoch: 0, Iteration: 4, Iteration Time: 1.2915s, Elapsed Time: 7.7423s.
+Rank: 0, Epoch: 0, Iteration: 5, Iteration Time: 1.2913s, Elapsed Time: 9.0354s.
+Rank: 0, Epoch: 0, Iteration: 6, Iteration Time: 1.2805s, Elapsed Time: 10.3185s.
+Rank: 0, Epoch: 0, Iteration: 7, Iteration Time: 1.2825s, Elapsed Time: 11.6014s.
+Rank: 0, Epoch: 0, Iteration: 8, Iteration Time: 1.3044s, Elapsed Time: 12.9063s.
+Rank: 0, Epoch: 0, Iteration: 9, Iteration Time: 1.2837s, Elapsed Time: 14.1919s.
+Rank: 0, Epoch: 0, Iteration: 10, Iteration Time: 1.3035s, Elapsed Time: 15.4974s.
+Rank: 0, Epoch: 0, Iteration: 11, Iteration Time: 1.2919s, Elapsed Time: 16.7911s.
+Rank: 0, Epoch: 0, Iteration: 12, Iteration Time: 1.2898s, Elapsed Time: 18.0828s.
+Rank: 0, Epoch: 0, Iteration: 13, Iteration Time: 1.2692s, Elapsed Time: 19.3540s.
+Rank: 0, Epoch: 0, Iteration: 14, Iteration Time: 1.2797s, Elapsed Time: 20.6357s.
+Rank: 0, Epoch: 0, Iteration: 15, Iteration Time: 1.2967s, Elapsed Time: 21.9344s.
+Rank: 0, Epoch: 0, Iteration: 16, Iteration Time: 1.2830s, Elapsed Time: 23.2193s.
+Rank: 0, Epoch: 0, Iteration: 17, Iteration Time: 1.2749s, Elapsed Time: 24.5048s.
+Rank: 0, Epoch: 0, Iteration: 18, Iteration Time: 1.2750s, Elapsed Time: 25.7819s.
+Rank: 0, Epoch: 0, Iteration: 19, Iteration Time: 1.2763s, Elapsed Time: 27.0604s.
+Rank: 0, Epoch: 0, Iteration: 20, Epoch Time: 26.4058s, Elapsed Time: 27.0803s.