|
|
|
@ -48,7 +48,11 @@ print(f"Max sequence length: {tokenizer.model_max_length}") |
|
|
|
# ---------------------------- |
|
|
|
# [2/7] Quantization config (QLoRA) |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\n[2/7] Configuring 4-bit quantization (BitsAndBytes)...") |
|
|
|
print(f"{80 * '_'}\n[2/7] Loading model in 4-bit mode (optimized QLoRA)...") |
|
|
|
|
|
|
|
assert torch.cuda.is_available(), "CUDA GPU not detected!" |
|
|
|
print(f"Using GPU: {torch.cuda.get_device_name(0)}") |
|
|
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
|
|
load_in_4bit=True, |
|
|
|
bnb_4bit_quant_type="nf4", |
|
|
|
@ -56,17 +60,16 @@ bnb_config = BitsAndBytesConfig( |
|
|
|
bnb_4bit_use_double_quant=True, |
|
|
|
) |
|
|
|
|
|
|
|
print("4-bit NF4 quantization configured.") |
|
|
|
|
|
|
|
print("Loading model...") |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
|
MODEL_NAME, |
|
|
|
device_map="auto", |
|
|
|
device_map="cuda", # 🔥 SAFE |
|
|
|
quantization_config=bnb_config, |
|
|
|
dtype=torch.float16, |
|
|
|
low_cpu_mem_usage=True, |
|
|
|
trust_remote_code=True, |
|
|
|
) |
|
|
|
print("Model loaded successfully.") |
|
|
|
|
|
|
|
print("Model loaded successfully in 4-bit mode on GPU.") |
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
# [3/7] Prepare model for k-bit training |
|
|
|
@ -119,8 +122,7 @@ print("Formatting dataset for Ukrainian → French translation...") |
|
|
|
|
|
|
|
def format_prompt(example): |
|
|
|
return { |
|
|
|
"text": ( |
|
|
|
"<|user|>\n" |
|
|
|
"text": ("<|user|>\n" |
|
|
|
"Translate the following Ukrainian text into French.\n" |
|
|
|
f"Ukrainian: {example['text']}\n" |
|
|
|
"<|assistant|>\n" |
|
|
|
@ -154,13 +156,13 @@ training_args = TrainingArguments( |
|
|
|
save_steps=500, |
|
|
|
save_total_limit=2, |
|
|
|
report_to="none", |
|
|
|
dataloader_pin_memory=False, |
|
|
|
) |
|
|
|
|
|
|
|
print("Training arguments ready.") |
|
|
|
print(f"Output directory: {OUTPUT_DIR}") |
|
|
|
print(f"Epochs: {training_args.num_train_epochs}") |
|
|
|
print( |
|
|
|
f"Effective batch size: " |
|
|
|
print(f"Effective batch size: " |
|
|
|
f"{training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}" |
|
|
|
) |
|
|
|
|
|
|
|
@ -171,7 +173,7 @@ print("Initializing SFTTrainer...") |
|
|
|
trainer = SFTTrainer( |
|
|
|
model=model, |
|
|
|
train_dataset=dataset["train"], |
|
|
|
tokenizer=tokenizer, |
|
|
|
processing_class=tokenizer, |
|
|
|
args=training_args, |
|
|
|
) |
|
|
|
print("Trainer initialized.") |
|
|
|
@ -181,12 +183,16 @@ print("Trainer initialized.") |
|
|
|
# ---------------------------- |
|
|
|
print(f"{80 * '_'}\n[7/7] Starting training...") |
|
|
|
try: |
|
|
|
trainer.train(resume_from_checkpoint=True) |
|
|
|
train_output = trainer.train(resume_from_checkpoint=True) |
|
|
|
except Exception as e: |
|
|
|
print("No checkpoint found or resume failed, starting fresh training.") |
|
|
|
print(f"Reason: {e}") |
|
|
|
trainer.train() |
|
|
|
train_output = trainer.train() |
|
|
|
|
|
|
|
print("\n=== Training summary ===") |
|
|
|
print(f"Global steps: {train_output.global_step}") |
|
|
|
print(f"Training loss: {train_output.training_loss}") |
|
|
|
print(f"Metrics: {train_output.metrics}") |
|
|
|
print("Training completed successfully.") |
|
|
|
|
|
|
|
# ---------------------------- |
|
|
|
|