@@ -1015,14 +1015,16 @@ def main():
10151015 _logger .info ("Distributing BatchNorm running means and vars" )
10161016 utils .distribute_bn (model , args .world_size , args .dist_bn == 'reduce' )
10171017
1018- if (epoch + 1 ) % args .val_interval != 0 :
1018+ epoch_p_1 = epoch + 1
1019+ if epoch_p_1 % args .val_interval != 0 and epoch_p_1 != num_epochs :
10191020 if utils .is_primary (args ):
10201021 _logger .info ("Skipping eval and checkpointing " )
10211022 if lr_scheduler is not None :
1022- # step LR for next epoch
1023- # careful when using metric dependent lr_scheduler
1024- lr_scheduler .step (epoch + 1 , metric = None )
1025- # skip validation and metric logic
1023+ # step LR for next epoch, take care when using metric dependent lr_scheduler
1024+ lr_scheduler .step (epoch_p_1 , metric = None )
1025+ # Skip validation and metric logic
1026+ # FIXME we could make the logic below able to handle no eval metrics more gracefully,
1027+ # but for simplicity opting to just skip for now.
10261028 continue
10271029
10281030 if loader_eval is not None :
@@ -1076,7 +1078,7 @@ def main():
10761078
10771079 if lr_scheduler is not None :
10781080 # step LR for next epoch
1079- lr_scheduler .step (epoch + 1 , latest_metric )
1081+ lr_scheduler .step (epoch_p_1 , latest_metric )
10801082
10811083 latest_results = {
10821084 'epoch' : epoch ,
0 commit comments