From fa3ad61dd793e1e186114f9f2cd76463e84ad2ee Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 15 Jan 2026 16:59:25 +0100 Subject: [PATCH] utilisation du GPU --- Finetunning/finetunning.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/Finetunning/finetunning.py b/Finetunning/finetunning.py index 45a7603..9970578 100644 --- a/Finetunning/finetunning.py +++ b/Finetunning/finetunning.py @@ -48,7 +48,11 @@ print(f"Max sequence length: {tokenizer.model_max_length}") # ---------------------------- # [2/7] Quantization config (QLoRA) # ---------------------------- -print(f"{80 * '_'}\n[2/7] Configuring 4-bit quantization (BitsAndBytes)...") +print(f"{80 * '_'}\n[2/7] Loading model in 4-bit mode (optimized QLoRA)...") + +assert torch.cuda.is_available(), "CUDA GPU not detected!" +print(f"Using GPU: {torch.cuda.get_device_name(0)}") + bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", @@ -56,17 +60,16 @@ bnb_config = BitsAndBytesConfig( bnb_4bit_use_double_quant=True, ) -print("4-bit NF4 quantization configured.") - -print("Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, - device_map="auto", + device_map="cuda", # 🔥 SAFE quantization_config=bnb_config, - dtype=torch.float16, + low_cpu_mem_usage=True, trust_remote_code=True, ) -print("Model loaded successfully.") + +print("Model loaded successfully in 4-bit mode on GPU.") + # ---------------------------- # [3/7] Prepare model for k-bit training @@ -119,8 +122,7 @@ print("Formatting dataset for Ukrainian → French translation...") def format_prompt(example): return { - "text": ( - "<|user|>\n" + "text": ("<|user|>\n" "Translate the following Ukrainian text into French.\n" f"Ukrainian: {example['text']}\n" "<|assistant|>\n" @@ -154,13 +156,13 @@ training_args = TrainingArguments( save_steps=500, save_total_limit=2, report_to="none", + dataloader_pin_memory=False, ) print("Training arguments ready.") print(f"Output directory: {OUTPUT_DIR}") print(f"Epochs: {training_args.num_train_epochs}") -print( - f"Effective batch size: " +print(f"Effective batch size: " f"{training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}" ) @@ -171,7 +173,7 @@ print("Initializing SFTTrainer...") trainer = SFTTrainer( model=model, train_dataset=dataset["train"], - tokenizer=tokenizer, + processing_class=tokenizer, args=training_args, ) print("Trainer initialized.") @@ -181,12 +183,16 @@ print("Trainer initialized.") # ---------------------------- print(f"{80 * '_'}\n[7/7] Starting training...") try: - trainer.train(resume_from_checkpoint=True) + train_output = trainer.train(resume_from_checkpoint=True) except Exception as e: print("No checkpoint found or resume failed, starting fresh training.") print(f"Reason: {e}") - trainer.train() + train_output = trainer.train() +print("\n=== Training summary ===") +print(f"Global steps: {train_output.global_step}") +print(f"Training loss: {train_output.training_loss}") +print(f"Metrics: {train_output.metrics}") print("Training completed successfully.") # ----------------------------