Skip to content

Commit

Permalink
Merge pull request #3 from marsggbo/master
Browse files Browse the repository at this point in the history
get lastest version from marsggbo/master
  • Loading branch information
PJDong authored Oct 12, 2021
2 parents 8e9a7a3 + 993dd18 commit 627f7b5
Show file tree
Hide file tree
Showing 29 changed files with 715 additions and 288 deletions.
3 changes: 2 additions & 1 deletion configs/model/network_cfg/bn_nas.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ num_blocks: [2,2,4,4,4,1]
strides_list: [2,2,2,1,2,1]
num_classes: 10
search_depth: False
is_only_train_bn: False
is_only_train_bn: False
mask: null
24 changes: 13 additions & 11 deletions hyperbox/models/classify_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ def training_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

def training_epoch_end(self, outputs: List[Any]):
acc = np.mean([output['acc'].item() for output in outputs])
loss = np.mean([output['loss'].item() for output in outputs])
acc_epoch = self.trainer.callback_metrics['train/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['train/loss_epoch'].item()
logger.info(f'Train epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')


def validation_step(self, batch: Any, batch_idx: int):
loss, preds, targets = self.step(batch)
Expand All @@ -87,10 +89,10 @@ def validation_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

def validation_epoch_end(self, outputs: List[Any]):
acc = np.mean([output['acc'].item() for output in outputs])
loss = np.mean([output['loss'].item() for output in outputs])
logger.info(f"[rank {self.rank}] Val epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
acc_epoch = self.trainer.callback_metrics['val/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['val/loss_epoch'].item()
logger.info(f'Val epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def test_step(self, batch: Any, batch_idx: int):
loss, preds, targets = self.step(batch)

Expand All @@ -102,15 +104,15 @@ def test_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets}

def test_epoch_end(self, outputs: List[Any]):
acc = np.mean([output['acc'].item() for output in outputs])
loss = np.mean([output['loss'].item() for output in outputs])
logger.info(f"Test epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
acc = self.trainer.callback_metrics['test/acc'].item()
loss = self.trainer.callback_metrics['test/loss'].item()
logger.info(f'Test epoch{self.trainer.current_epoch} acc={acc:.4f} loss={loss:.4f}')

def on_fit_start(self):
mflops, size = self.arch_size((1,3,32,32), convert=True)
mflops, size = self.arch_size((2,3,64,64), convert=True)
logger.info(f"[rank {self.rank}] current model({self.arch}): {mflops:.4f} MFLOPs, {size:.4f} MB.")

def on_fit_end(self):
mflops, size = self.arch_size((1,3,32,32), convert=True)
mflops, size = self.arch_size((2,3,64,64), convert=True)
logger.info(f"[rank {self.rank}] current model({self.arch}): {mflops:.4f} MFLOPs, {size:.4f} MB.")

29 changes: 12 additions & 17 deletions hyperbox/models/darts_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def training_step(self, batch: Any, batch_idx: int, optimizer_idx: int):
# self.trainer.accelerator.barrier()
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} Updating archs")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} mutator:{self.mutator._cache['normal_n2_p0']}")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} mutator:{self.mutator.choices['normal_n2_p0']}")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} mutator:{self.mutator.candidates['normal_n2_p0']}")

# phase 2: child network step
self.network.train()
Expand All @@ -81,7 +81,7 @@ def training_step(self, batch: Any, batch_idx: int, optimizer_idx: int):
# self.trainer.accelerator.barrier()
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} Updating weights")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} mutator:{self.mutator._cache['normal_n2_p0']}")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} mutator:{self.mutator.choices['normal_n2_p0']}")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} mutator:{self.mutator.candidates['normal_n2_p0']}")
# print(f"[rank {self.rank}] epoch-{self.current_epoch} batch-{batch_idx} bias={self.network.linear.weight[0][:10]}")

# log train metrics
Expand Down Expand Up @@ -193,13 +193,13 @@ def _compute_hessian(self, backup_params, dw, trn_X, trn_y):
return hessian

def training_epoch_end(self, outputs: List[Any]):
acc = np.mean([output['acc'].item() for output in outputs])
loss = np.mean([output['loss'].item() for output in outputs])
acc_epoch = self.trainer.callback_metrics['train/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['train/loss_epoch'].item()
logger.info(f'Train epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

mflops, size = self.arch_size((1, 3, 32, 32), convert=True)
logger.info(
f"[rank {self.rank}] current model({self.arch}): {mflops:.4f} MFLOPs, {size:.4f} MB.")
logger.info(
f"[rank {self.rank}] Train epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
logger.info("self.mutator._cache: ", len(self.mutator._cache), self.mutator._cache)

if self.current_epoch % 10 == 0:
Expand All @@ -220,13 +220,9 @@ def validation_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

def validation_epoch_end(self, outputs: List[Any]):
acc = np.mean([output['acc'].item() for output in outputs])
loss = np.mean([output['loss'].item() for output in outputs])
mflops, size = self.arch_size((1, 3, 32, 32), convert=True)
logger.info(
f"[rank {self.rank}] current model({self.arch}): {mflops:.4f} MFLOPs, {size:.4f} MB.")
logger.info(
f"[rank {self.rank}] Val epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
acc_epoch = self.trainer.callback_metrics['val/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['val/loss_epoch'].item()
logger.info(f'Val epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def test_step(self, batch: Any, batch_idx: int):
(X, targets) = batch
Expand All @@ -242,10 +238,9 @@ def test_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

def test_epoch_end(self, outputs: List[Any]):
acc = np.mean([output['acc'].item() for output in outputs])
loss = np.mean([output['loss'].item() for output in outputs])
logger.info(
f"[rank {self.rank}] Test epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
acc = self.trainer.callback_metrics['test/acc'].item()
loss = self.trainer.callback_metrics['test/loss'].item()
logger.info(f'Test epoch{self.trainer.current_epoch} acc={acc:.4f} loss={loss:.4f}')

def configure_optimizers(self):
"""Choose what optimizers and learning-rate schedulers to use in your optimization.
Expand Down
13 changes: 9 additions & 4 deletions hyperbox/models/mnist_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ def training_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets}

def training_epoch_end(self, outputs: List[Any]):
# `outputs` is a list of dicts returned from `training_step()`
pass
acc_epoch = self.trainer.callback_metrics['train/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['train/loss_epoch'].item()
logger.info(f'Train epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def validation_step(self, batch: Any, batch_idx: int):
loss, preds, targets = self.step(batch)
Expand All @@ -88,7 +89,9 @@ def validation_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets}

def validation_epoch_end(self, outputs: List[Any]):
pass
acc_epoch = self.trainer.callback_metrics['val/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['val/loss_epoch'].item()
logger.info(f'Val epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def test_step(self, batch: Any, batch_idx: int):
loss, preds, targets = self.step(batch)
Expand All @@ -101,7 +104,9 @@ def test_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets}

def test_epoch_end(self, outputs: List[Any]):
pass
acc = self.trainer.callback_metrics['test/acc'].item()
loss = self.trainer.callback_metrics['test/loss'].item()
logger.info(f'Test epoch{self.trainer.current_epoch} acc={acc:.4f} loss={loss:.4f}')

def configure_optimizers(self):
"""Choose what optimizers and learning-rate schedulers to use in your optimization.
Expand Down
15 changes: 14 additions & 1 deletion hyperbox/models/nasbench_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def training_step(self, batch: Any, batch_idx: int):
# remember to always return loss from training_step, or else backpropagation will fail!
return {"loss": loss, "preds": preds, "targets": targets}

def training_epoch_end(self, outputs: List[Any]):
acc_epoch = self.trainer.callback_metrics['train/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['train/loss_epoch'].item()
logger.info(f'Train epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def validation_step(self, batch: Any, batch_idx: int):
loss, preds, targets = self.step(batch)

Expand All @@ -103,6 +108,11 @@ def validation_step(self, batch: Any, batch_idx: int):

return {"loss": loss, "preds": preds, "targets": targets}

def validation_epoch_end(self, outputs: List[Any]):
acc_epoch = self.trainer.callback_metrics['val/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['val/loss_epoch'].item()
logger.info(f'Val epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def test_step(self, batch: Any, batch_idx: int):
loss, preds, targets = self.step(batch)

Expand All @@ -114,4 +124,7 @@ def test_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets}

def test_epoch_end(self, outputs: List[Any]):
pass
acc = self.trainer.callback_metrics['test/acc'].item()
loss = self.trainer.callback_metrics['test/loss'].item()
logger.info(f'Test epoch{self.trainer.current_epoch} acc={acc:.4f} loss={loss:.4f}')

39 changes: 13 additions & 26 deletions hyperbox/models/ofa_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,9 @@ def training_step(self, batch: Any, batch_idx: int):
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

def training_epoch_end(self, outputs: List[Any]):
# acc = np.mean([output['acc'].item() for output in outputs])
# loss = np.mean([output['loss'].item() for output in outputs])
# self.log("train/loss", loss, on_step=False, on_epoch=True, sync_dist=False, prog_bar=False)
# self.log("train/acc", acc, on_step=False, on_epoch=True, sync_dist=False, prog_bar=False)
# # mflops, size = self.arch_size((1,3,32,32), convert=True)
# # logger.info(f"[rank {self.rank}] current model({self.arch}): {mflops:.4f} MFLOPs, {size:.4f} MB.")
# logger.info(f"[rank {self.rank}] Train epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
acc_epoch = self.trainer.callback_metrics['train/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['train/loss_epoch'].item()
logger.info(f'Train epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

# evaluation
if self.current_epoch < self.supernet_epoch:
Expand Down Expand Up @@ -324,17 +320,10 @@ def validation_step(self, batch: Any, batch_idx: int):
# logger.info(f"Val epoch{self.current_epoch} batch{batch_idx}: loss={loss}, acc={acc}")
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

# def validation_epoch_end(self, outputs: List[Any]):
# if self.current_epoch > self.supernet_epoch:
# return
# acc = np.mean([output['acc'].item() for output in outputs])
# loss = np.mean([output['loss'].item() for output in outputs])
# mflops, size = self.arch_size((1,3,32,32), convert=True)
# sync_dist = not self.is_net_parallel # sync the metrics if all processes train the same sub network
# self.log(f"val/loss", loss, on_step=False, on_epoch=True, sync_dist=sync_dist, prog_bar=True)
# self.log(f"val/acc", acc, on_step=False, on_epoch=True, sync_dist=sync_dist, prog_bar=True)
# logger.info(f"[rank {self.rank}] current model({self.arch}): {mflops:.4f} MFLOPs, {size:.4f} MB.")
# logger.info(f"[rank {self.rank}] Val epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
def validation_epoch_end(self, outputs: List[Any]):
acc_epoch = self.trainer.callback_metrics['val/acc_epoch'].item()
loss_epoch = self.trainer.callback_metrics['val/loss_epoch'].item()
logger.info(f'Val epoch{self.trainer.current_epoch} acc={acc_epoch:.4f} loss={loss_epoch:.4f}')

def test_step(self, batch: Any, batch_idx: int):
if isinstance(batch, list) and len(batch)==1:
Expand All @@ -349,16 +338,14 @@ def test_step(self, batch: Any, batch_idx: int):
# preds = torch.argmax(output, dim=1)
preds = torch.softmax(output, -1)
acc = self.val_metric(preds, targets)
self.log(f"val/acc", acc, on_step=True, on_epoch=True, prog_bar=False)
self.log("test/acc", acc, on_step=True, on_epoch=True)
self.log("test/loss", loss, on_step=False, on_epoch=True)
return {"loss": loss, "preds": preds, "targets": targets, 'acc': acc}

# def test_epoch_end(self, outputs: List[Any]):
# acc = np.mean([output['acc'].item() for output in outputs])
# loss = np.mean([output['loss'].item() for output in outputs])
# if self.trainer.world_size > 1:
# acc = torch.tensor(self.all_gather(acc)).mean()
# loss = torch.tensor(self.all_gather(loss)).mean()
# logger.info(f"Test epoch{self.current_epoch} final result: loss={loss}, acc={acc}")
def test_epoch_end(self, outputs: List[Any]):
acc = self.trainer.callback_metrics['test/acc'].item()
loss = self.trainer.callback_metrics['test/loss'].item()
logger.info(f'Test epoch{self.trainer.current_epoch} acc={acc:.4f} loss={loss:.4f}')

def configure_callbacks(self):
ofa_callback = OFACallback()
Expand Down
2 changes: 0 additions & 2 deletions hyperbox/models/random_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,5 +120,3 @@ def test_epoch_end(self, outputs: List[Any]):
acc = self.trainer.callback_metrics['test/acc'].item()
loss = self.trainer.callback_metrics['test/loss'].item()
logger.info(f'Test epoch{self.trainer.current_epoch} acc={acc:.4f} loss={loss:.4f}')

W
Loading

0 comments on commit 627f7b5

Please sign in to comment.