diff --git a/Finetunning/finetunning.py b/Finetunning/finetunning.py index ad806f1..4b9e80f 100644 --- a/Finetunning/finetunning.py +++ b/Finetunning/finetunning.py @@ -1,3 +1,4 @@ +import os import torch from datasets import load_dataset from transformers import ( @@ -11,15 +12,18 @@ from peft import ( prepare_model_for_kbit_training, ) from trl import SFTTrainer -import os + +# ---------------------------- +# Environment safety (Windows) +# ---------------------------- os.environ["TORCHDYNAMO_DISABLE"] = "1" # ---------------------------- # Model configuration # ---------------------------- -MODEL_NAME = "Qwen/Qwen2.5-14B-Instruct" +MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct" -print("=== Starting fine-tuning script ===") +print(f"=== Starting fine-tuning script {MODEL_NAME} ===") print(f"{80 * '_'}\n[1/7] Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( @@ -27,7 +31,7 @@ tokenizer = AutoTokenizer.from_pretrained( trust_remote_code=True ) -# Ensure padding token is defined +# Ensure padding is defined tokenizer.pad_token = tokenizer.eos_token tokenizer.model_max_length = 1024 @@ -38,13 +42,19 @@ model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, load_in_4bit=True, device_map="auto", - torch_dtype=torch.float16, # OK for weights + torch_dtype=torch.float16, # weights in fp16, gradients fp32 trust_remote_code=True, ) print("Model loaded.") print(f"{80 * '_'}\n[3/7] Preparing model for k-bit training...") model = prepare_model_for_kbit_training(model) + +# Fix future PyTorch checkpointing behavior +model.gradient_checkpointing_enable( + gradient_checkpointing_kwargs={"use_reentrant": False} +) + print("Model prepared for k-bit training.") # ---------------------------- @@ -70,7 +80,7 @@ lora_config = LoraConfig( model = get_peft_model(model, lora_config) model.print_trainable_parameters() -print("LoRA adapters attached to the model.") +print("LoRA adapters attached.") # ---------------------------- # Dataset loading @@ -80,6 +90,7 @@ dataset = load_dataset( "json", data_files="traductions.json" ) + print(f"Dataset loaded with {len(dataset['train'])} samples.") print("Formatting dataset for Ukrainian → French translation...") @@ -92,34 +103,32 @@ def format_prompt(example): ) return {"text": prompt} -dataset = dataset.map(format_prompt, remove_columns=dataset["train"].column_names) +dataset = dataset.map( + format_prompt, + remove_columns=dataset["train"].column_names +) + print("Dataset formatting completed.") # ---------------------------- -# Training arguments +# Training arguments (AMP OFF) # ---------------------------- print(f"{80 * '_'}\n[6/7] Initializing training arguments...") training_args = TrainingArguments( - output_dir="./qwen-uk-fr-lora", + output_dir="./qwen2.5-7b-uk-fr-lora", per_device_train_batch_size=1, gradient_accumulation_steps=8, learning_rate=2e-4, - num_train_epochs=3, - + num_train_epochs=2, # 2 epochs usually enough for translation fp16=False, bf16=False, - logging_steps=10, save_steps=500, save_total_limit=2, - - # Use 32-bit optimizer optim="paged_adamw_32bit", - report_to="none", ) - print("Training arguments ready.") # ---------------------------- @@ -138,15 +147,15 @@ print("Trainer initialized.") # Train # ---------------------------- print(f"{80 * '_'}\n[7/7] Starting training...") -trainer.train() +trainer.train(resume_from_checkpoint=True) print("Training completed successfully.") # ---------------------------- # Save LoRA adapter # ---------------------------- print("Saving LoRA adapter and tokenizer...") -trainer.model.save_pretrained("./qwen-uk-fr-lora") -tokenizer.save_pretrained("./qwen-uk-fr-lora") +trainer.model.save_pretrained("./qwen2.5-7b-uk-fr-lora") +tokenizer.save_pretrained("./qwen2.5-7b-uk-fr-lora") print("=== Fine-tuning finished ===") -print("LoRA adapter saved in ./qwen-uk-fr-lora") +print("LoRA adapter saved in ./qwen2.5-7b-uk-fr-lora")