python.traduction/Finetunning/mergeLora.py

69 lines
1.8 KiB
Python

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# ----------------------------
# Configuration
# ----------------------------
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct"
LORA_DIR = "./qwen2.5-7b-uk-fr-lora" # dossier issu du fine-tuning
OUTPUT_DIR = "./qwen2.5-7b-uk-fr-merged" # modèle fusionné final
DTYPE = torch.float16 # GGUF-friendly
DEVICE = "cpu" # merge sur CPU (stable, sûr)
print("=== LoRA merge script started ===")
# ----------------------------
# Load base model
# ----------------------------
print(f"{80 * '_'}\n[1/4] Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=DTYPE,
device_map=DEVICE,
trust_remote_code=True,
)
print("Base model loaded.")
# ----------------------------
# Load tokenizer
# ----------------------------
print(f"{80 * '_'}\n[2/4] Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
BASE_MODEL,
trust_remote_code=True
)
tokenizer.pad_token = tokenizer.eos_token
print("Tokenizer loaded.")
# ----------------------------
# Load LoRA adapter
# ----------------------------
print(f"{80 * '_'}\n[3/4] Loading LoRA adapter...")
model = PeftModel.from_pretrained(
base_model,
LORA_DIR,
)
print("LoRA adapter loaded.")
# ----------------------------
# Merge LoRA into base model
# ----------------------------
print(f"{80 * '_'}\n[4/4] Merging LoRA into base model...")
model = model.merge_and_unload()
print("LoRA successfully merged.")
# ----------------------------
# Save merged model
# ----------------------------
print("Saving merged model...")
model.save_pretrained(
OUTPUT_DIR,
safe_serialization=True,
)
tokenizer.save_pretrained(OUTPUT_DIR)
print("=== Merge completed successfully ===")
print(f"Merged model saved in: {OUTPUT_DIR}")