Compare commits
4 Commits
d5313fb143
...
8b45028101
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8b45028101 | ||
|
|
ef4515adcc | ||
|
|
7aea840821 | ||
|
|
71e595a966 |
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,6 @@
|
|||||||
output_temp.txt
|
output_temp.txt
|
||||||
checkpoint.json
|
checkpoint.json
|
||||||
|
Traduction/Modelfile
|
||||||
|
.env
|
||||||
|
Traduction/*.pdf
|
||||||
|
Traduction/*.txt
|
||||||
|
|||||||
@ -145,7 +145,7 @@ training_args = TrainingArguments(
|
|||||||
per_device_train_batch_size=1,
|
per_device_train_batch_size=1,
|
||||||
gradient_accumulation_steps=16,
|
gradient_accumulation_steps=16,
|
||||||
learning_rate=1e-4,
|
learning_rate=1e-4,
|
||||||
num_train_epochs=3,
|
num_train_epochs=2,
|
||||||
max_steps=1000,
|
max_steps=1000,
|
||||||
|
|
||||||
fp16=False, # ⚠ disable AMP
|
fp16=False, # ⚠ disable AMP
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from nltk.translate.bleu_score import corpus_bleu
|
|||||||
# Configuration
|
# Configuration
|
||||||
# ----------------------------
|
# ----------------------------
|
||||||
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # base model
|
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # base model
|
||||||
LORA_DIR = "./qwen2.5-7b-uk-fr-lora" # fine-tuned LoRA
|
LORA_DIR = "./qwen2.5-7b-uk-fr-lora-2epoch" # fine-tuned LoRA
|
||||||
VALIDATION_FILE = "validation.jsonl" # small validation subset
|
VALIDATION_FILE = "validation.jsonl" # small validation subset
|
||||||
MAX_INPUT_LENGTH = 1024
|
MAX_INPUT_LENGTH = 1024
|
||||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|||||||
@ -3,15 +3,13 @@ PARAMETER temperature 0.2
|
|||||||
PARAMETER num_ctx 8192
|
PARAMETER num_ctx 8192
|
||||||
|
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
|
|
||||||
Tu es un traducteur spécialisé dans les mémoires ukrainiennes des années 1910.
|
Tu es un traducteur spécialisé dans les mémoires ukrainiennes des années 1910.
|
||||||
- Garde le style narratif et les tournures orales de l'auteur.
|
|
||||||
- Respecte les règles de traduction suivantes :
|
|
||||||
Règles strictes :
|
Règles strictes :
|
||||||
1. **Conserve tous les noms de lieux** dans leur forme originale (ex. : Львів → Lviv, mais ajoute une note si nécessaire entre [ ]).
|
1. **Conserve tous les noms de lieux** dans leur forme originale (ex. : Львів → Lviv, mais ajoute une note si nécessaire entre [ ]).
|
||||||
2. **Respecte le style narratif** : garde les tournures orales et les expressions propres à l'auteur.
|
2. **Respecte le style narratif** : garde les tournures orales et les expressions propres à l’auteur.
|
||||||
3. **Pour les termes historiques** (ex. : "powiat"), utilise le terme français standard et ajoute une note explicative.
|
3. Tu dois t'exprimer dans un français avec une syntaxe impécable et agréable à lire, reformule la phrase traduite si tu la juge gramaticalement incorrecte.
|
||||||
4. **Conserve les citations** russe/allemand/polonais intégrés au texte (mais ajoute une note de fin de paragraphe entre [ ] en la traduisant et en précisant la langue d'origine.
|
4. **Pour les termes historiques** (ex. : "powiat"), utilise le terme français standard et ajoute une note explicative.
|
||||||
5. **Structure** : Garde les sauts de ligne et la mise en page originale.
|
5. **Conserve les citations** russe/allemand/polonais intégrés au texte (mais ajoute une note de fin de paragraphe entre [ ] en la traduisant et en précisant la langue d'origine.
|
||||||
6. **Notes du traducteur** : Ajoute entre crochets [ ] les explications contextuelles si un contexte historique exist.
|
6. **Structure** : Garde les sauts de ligne et la mise en page originale.
|
||||||
|
7. **Notes du traducteur** : Ajoute entre crochets [ ] les explications contextuelles si un contexte historique existe (ex. : "[Note : le context]").
|
||||||
"""
|
"""
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -11,7 +11,6 @@ from reportlab.pdfbase.ttfonts import TTFont
|
|||||||
import os, time
|
import os, time
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
DEBUG = True
|
|
||||||
PDF_PATH = "Traduction/TaniaBorecMemoir(Ukr).pdf"
|
PDF_PATH = "Traduction/TaniaBorecMemoir(Ukr).pdf"
|
||||||
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
|
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
|
||||||
OLLAMA_URL = "http://localhost:11434/api/generate"
|
OLLAMA_URL = "http://localhost:11434/api/generate"
|
||||||
@ -174,7 +173,6 @@ def load_checkpoint():
|
|||||||
return json.load(f)
|
return json.load(f)
|
||||||
return {"last_processed_index": -1, "results": {}}
|
return {"last_processed_index": -1, "results": {}}
|
||||||
|
|
||||||
# Sauvegarde le checkpoint
|
|
||||||
# Sauvegarde le checkpoint
|
# Sauvegarde le checkpoint
|
||||||
def save_checkpoint(last_index, results):
|
def save_checkpoint(last_index, results):
|
||||||
# Trier les clés du dictionnaire results
|
# Trier les clés du dictionnaire results
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user