| @@ -182,13 +182,19 @@ print("Trainer initialized.") | |||
| # [7/7] Training | |||
| # ---------------------------- | |||
| print(f"{80 * '_'}\n[7/7] Starting training...") | |||
| try: | |||
| checkpoint_exists = any( | |||
| d.startswith("checkpoint-") | |||
| for d in os.listdir(OUTPUT_DIR) | |||
| ) if os.path.exists(OUTPUT_DIR) else False | |||
| if checkpoint_exists: | |||
| print("Checkpoint found → resuming training") | |||
| train_output = trainer.train(resume_from_checkpoint=True) | |||
| except Exception as e: | |||
| print("No checkpoint found or resume failed, starting fresh training.") | |||
| print(f"Reason: {e}") | |||
| else: | |||
| print("No checkpoint found → starting fresh training") | |||
| train_output = trainer.train() | |||
| print("\n=== Training summary ===") | |||
| print(f"Global steps: {train_output.global_step}") | |||
| print(f"Training loss: {train_output.training_loss}") | |||