|
|
|
@@ -15,9 +15,12 @@ from peft import ( |
|
|
|
from trl import SFTTrainer |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# Environment safety (Windows) |
|
|
|
# Environment safety (Windows + AMP fixes) |
|
|
|
# ---------------------------- |
|
|
|
os.environ["TORCHDYNAMO_DISABLE"] = "1" |
|
|
|
os.environ["ACCELERATE_MIXED_PRECISION"] = "no" # ✅ disable AMP completely |
|
|
|
os.environ["TORCH_AMP_DISABLE"] = "1" # ✅ disable GradScaler |
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # optional: force first GPU |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# Global configuration |
|
|
|
@@ -25,7 +28,7 @@ os.environ["TORCHDYNAMO_DISABLE"] = "1" |
|
|
|
MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct" |
|
|
|
OUTPUT_DIR = "./qwen2.5-7b-uk-fr-lora" |
|
|
|
DATA_FILE = "paires_clean.json" |
|
|
|
MAX_SEQ_LENGTH = 1024 |
|
|
|
MAX_SEQ_LENGTH = 512 # Reduce for RTX 4080 SUPER VRAM |
|
|
|
|
|
|
|
print(f"\n=== Starting fine-tuning script for {MODEL_NAME} ===\n") |
|
|
|
|
|
|
|
@@ -35,54 +38,50 @@ print(f"\n=== Starting fine-tuning script for {MODEL_NAME} ===\n") |
|
|
|
print(f"{80 * '_'}\n[1/7] Loading tokenizer...") |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
|
MODEL_NAME, |
|
|
|
trust_remote_code=True |
|
|
|
trust_remote_code=True, |
|
|
|
) |
|
|
|
|
|
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
tokenizer.model_max_length = MAX_SEQ_LENGTH |
|
|
|
|
|
|
|
print("Tokenizer loaded.") |
|
|
|
print(f"Pad token id: {tokenizer.pad_token_id}") |
|
|
|
print(f"Max sequence length: {tokenizer.model_max_length}") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# [2/7] Quantization config (QLoRA) |
|
|
|
# [2/7] Load model in 4-bit (QLoRA) |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\n[2/7] Loading model in 4-bit mode (optimized QLoRA)...") |
|
|
|
|
|
|
|
print(f"{80 * '_'}\n[2/7] Loading model in 4-bit mode (QLoRA)...") |
|
|
|
assert torch.cuda.is_available(), "CUDA GPU not detected!" |
|
|
|
print(f"Using GPU: {torch.cuda.get_device_name(0)}") |
|
|
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
|
|
load_in_4bit=True, |
|
|
|
bnb_4bit_quant_type="nf4", |
|
|
|
bnb_4bit_compute_dtype=torch.float16, |
|
|
|
bnb_4bit_compute_dtype=torch.float16, # fp16 internally |
|
|
|
bnb_4bit_use_double_quant=True, |
|
|
|
) |
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
|
MODEL_NAME, |
|
|
|
device_map="cuda", # 🔥 SAFE |
|
|
|
device_map="auto", |
|
|
|
quantization_config=bnb_config, |
|
|
|
low_cpu_mem_usage=True, |
|
|
|
trust_remote_code=True, |
|
|
|
) |
|
|
|
|
|
|
|
# Align model tokens with tokenizer |
|
|
|
model.config.pad_token_id = tokenizer.pad_token_id |
|
|
|
model.config.bos_token_id = tokenizer.bos_token_id |
|
|
|
model.config.eos_token_id = tokenizer.eos_token_id |
|
|
|
print("Model loaded successfully in 4-bit mode on GPU.") |
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# [3/7] Prepare model for k-bit training |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\n[3/7] Preparing model for k-bit training...") |
|
|
|
model = prepare_model_for_kbit_training(model) |
|
|
|
|
|
|
|
model.gradient_checkpointing_enable( |
|
|
|
gradient_checkpointing_kwargs={"use_reentrant": False} |
|
|
|
) |
|
|
|
|
|
|
|
model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={"use_reentrant": False}) |
|
|
|
model.config.use_cache = False # Important with gradient checkpointing + QLoRA |
|
|
|
print("Model prepared for k-bit training.") |
|
|
|
print("Gradient checkpointing enabled (non-reentrant).") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# [4/7] LoRA configuration |
|
|
|
@@ -104,10 +103,8 @@ lora_config = LoraConfig( |
|
|
|
"down_proj", |
|
|
|
], |
|
|
|
) |
|
|
|
|
|
|
|
model = get_peft_model(model, lora_config) |
|
|
|
model.print_trainable_parameters() |
|
|
|
|
|
|
|
print("LoRA adapters successfully attached.") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
@@ -115,18 +112,19 @@ print("LoRA adapters successfully attached.") |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\n[5/7] Loading dataset from JSON file...") |
|
|
|
dataset = load_dataset("json", data_files=DATA_FILE) |
|
|
|
|
|
|
|
print(f"Dataset loaded with {len(dataset['train'])} samples.") |
|
|
|
|
|
|
|
print("Formatting dataset for Ukrainian → French translation...") |
|
|
|
|
|
|
|
def format_prompt(example): |
|
|
|
return { |
|
|
|
"text": ("<|user|>\n" |
|
|
|
"text": ( |
|
|
|
"<|im_start|>user\n" |
|
|
|
"Translate the following Ukrainian text into French.\n" |
|
|
|
f"Ukrainian: {example['text']}\n" |
|
|
|
"<|assistant|>\n" |
|
|
|
"<|im_end|>\n" |
|
|
|
"<|im_start|>assistant\n" |
|
|
|
f"{example['translation']}" |
|
|
|
"<|im_end|>" |
|
|
|
) |
|
|
|
} |
|
|
|
|
|
|
|
@@ -134,7 +132,6 @@ dataset = dataset.map( |
|
|
|
format_prompt, |
|
|
|
remove_columns=dataset["train"].column_names |
|
|
|
) |
|
|
|
|
|
|
|
print("Dataset formatting completed.") |
|
|
|
print("Example prompt:\n") |
|
|
|
print(dataset["train"][0]["text"]) |
|
|
|
@@ -146,46 +143,49 @@ print(f"{80 * '_'}\n[6/7] Initializing training arguments...") |
|
|
|
training_args = TrainingArguments( |
|
|
|
output_dir=OUTPUT_DIR, |
|
|
|
per_device_train_batch_size=1, |
|
|
|
gradient_accumulation_steps=8, |
|
|
|
gradient_accumulation_steps=16, |
|
|
|
learning_rate=1e-4, |
|
|
|
num_train_epochs=3, |
|
|
|
fp16=False, |
|
|
|
bf16=False, |
|
|
|
max_steps=1000, |
|
|
|
|
|
|
|
fp16=False, # ⚠ disable AMP |
|
|
|
bf16=False, # ⚠ disable BF16 |
|
|
|
|
|
|
|
optim="paged_adamw_32bit", |
|
|
|
logging_steps=10, |
|
|
|
save_steps=500, |
|
|
|
save_total_limit=2, |
|
|
|
report_to="none", |
|
|
|
|
|
|
|
dataloader_pin_memory=False, |
|
|
|
max_grad_norm=0.0, # avoid AMP gradient clipping |
|
|
|
) |
|
|
|
|
|
|
|
print("Training arguments ready.") |
|
|
|
print(f"Output directory: {OUTPUT_DIR}") |
|
|
|
print(f"Epochs: {training_args.num_train_epochs}") |
|
|
|
print(f"Effective batch size: " |
|
|
|
f"{training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}" |
|
|
|
) |
|
|
|
print(f"Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# Trainer |
|
|
|
# [7/7] Trainer |
|
|
|
# ---------------------------- |
|
|
|
print("Initializing SFTTrainer...") |
|
|
|
print(f"{80 * '_'}\nInitializing SFTTrainer...") |
|
|
|
trainer = SFTTrainer( |
|
|
|
model=model, |
|
|
|
train_dataset=dataset["train"], |
|
|
|
processing_class=tokenizer, |
|
|
|
args=training_args, |
|
|
|
) |
|
|
|
print("Trainer initialized.") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# [7/7] Training |
|
|
|
# Training |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\n[7/7] Starting training...") |
|
|
|
checkpoint_exists = any( |
|
|
|
d.startswith("checkpoint-") |
|
|
|
for d in os.listdir(OUTPUT_DIR) |
|
|
|
) if os.path.exists(OUTPUT_DIR) else False |
|
|
|
print(f"{80 * '_'}\nStarting training...") |
|
|
|
checkpoint_exists = False |
|
|
|
if os.path.exists(OUTPUT_DIR): |
|
|
|
checkpoint_exists = any( |
|
|
|
d.startswith("checkpoint-") |
|
|
|
for d in os.listdir(OUTPUT_DIR) |
|
|
|
) |
|
|
|
|
|
|
|
if checkpoint_exists: |
|
|
|
print("Checkpoint found → resuming training") |
|
|
|
@@ -194,7 +194,6 @@ else: |
|
|
|
print("No checkpoint found → starting fresh training") |
|
|
|
train_output = trainer.train() |
|
|
|
|
|
|
|
|
|
|
|
print("\n=== Training summary ===") |
|
|
|
print(f"Global steps: {train_output.global_step}") |
|
|
|
print(f"Training loss: {train_output.training_loss}") |
|
|
|
@@ -202,11 +201,10 @@ print(f"Metrics: {train_output.metrics}") |
|
|
|
print("Training completed successfully.") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# Save LoRA adapter |
|
|
|
# Save LoRA adapter and tokenizer |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\nSaving LoRA adapter and tokenizer...") |
|
|
|
trainer.model.save_pretrained(OUTPUT_DIR) |
|
|
|
tokenizer.save_pretrained(OUTPUT_DIR) |
|
|
|
|
|
|
|
print("\n=== Fine-tuning finished ===") |
|
|
|
print(f"LoRA adapter saved in: {OUTPUT_DIR}") |