Skip to content

Commit ae4d1bb

Browse files
committed
Ensure final epoch always gets validated even if it doesn't line up with val interval. Add a few comments.
1 parent 50306f2 commit ae4d1bb

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

train.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,14 +1015,16 @@ def main():
10151015
_logger.info("Distributing BatchNorm running means and vars")
10161016
utils.distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
10171017

1018-
if (epoch + 1) % args.val_interval != 0:
1018+
epoch_p_1 = epoch + 1
1019+
if epoch_p_1 % args.val_interval != 0 and epoch_p_1 != num_epochs:
10191020
if utils.is_primary(args):
10201021
_logger.info("Skipping eval and checkpointing ")
10211022
if lr_scheduler is not None:
1022-
# step LR for next epoch
1023-
# careful when using metric dependent lr_scheduler
1024-
lr_scheduler.step(epoch + 1, metric=None)
1025-
# skip validation and metric logic
1023+
# step LR for next epoch, take care when using metric dependent lr_scheduler
1024+
lr_scheduler.step(epoch_p_1, metric=None)
1025+
# Skip validation and metric logic
1026+
# FIXME we could make the logic below able to handle no eval metrics more gracefully,
1027+
# but for simplicity opting to just skip for now.
10261028
continue
10271029

10281030
if loader_eval is not None:
@@ -1076,7 +1078,7 @@ def main():
10761078

10771079
if lr_scheduler is not None:
10781080
# step LR for next epoch
1079-
lr_scheduler.step(epoch + 1, latest_metric)
1081+
lr_scheduler.step(epoch_p_1, latest_metric)
10801082

10811083
latest_results = {
10821084
'epoch': epoch,

0 commit comments

Comments
 (0)