Compare commits
No commits in common. "8b450281013c7ac440b05cf6801b7d8389e060b4" and "d5313fb14369e6c83dce4391fbd21fcb9cdaa784" have entirely different histories.
8b45028101
...
d5313fb143
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,6 +1,2 @@
|
|||||||
output_temp.txt
|
output_temp.txt
|
||||||
checkpoint.json
|
checkpoint.json
|
||||||
Traduction/Modelfile
|
|
||||||
.env
|
|
||||||
Traduction/*.pdf
|
|
||||||
Traduction/*.txt
|
|
||||||
|
|||||||
@ -145,7 +145,7 @@ training_args = TrainingArguments(
|
|||||||
per_device_train_batch_size=1,
|
per_device_train_batch_size=1,
|
||||||
gradient_accumulation_steps=16,
|
gradient_accumulation_steps=16,
|
||||||
learning_rate=1e-4,
|
learning_rate=1e-4,
|
||||||
num_train_epochs=2,
|
num_train_epochs=3,
|
||||||
max_steps=1000,
|
max_steps=1000,
|
||||||
|
|
||||||
fp16=False, # ⚠ disable AMP
|
fp16=False, # ⚠ disable AMP
|
||||||
|
|||||||
@ -8,7 +8,7 @@ from nltk.translate.bleu_score import corpus_bleu
|
|||||||
# Configuration
|
# Configuration
|
||||||
# ----------------------------
|
# ----------------------------
|
||||||
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # base model
|
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # base model
|
||||||
LORA_DIR = "./qwen2.5-7b-uk-fr-lora-2epoch" # fine-tuned LoRA
|
LORA_DIR = "./qwen2.5-7b-uk-fr-lora" # fine-tuned LoRA
|
||||||
VALIDATION_FILE = "validation.jsonl" # small validation subset
|
VALIDATION_FILE = "validation.jsonl" # small validation subset
|
||||||
MAX_INPUT_LENGTH = 1024
|
MAX_INPUT_LENGTH = 1024
|
||||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|||||||
@ -3,13 +3,15 @@ PARAMETER temperature 0.2
|
|||||||
PARAMETER num_ctx 8192
|
PARAMETER num_ctx 8192
|
||||||
|
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
|
|
||||||
Tu es un traducteur spécialisé dans les mémoires ukrainiennes des années 1910.
|
Tu es un traducteur spécialisé dans les mémoires ukrainiennes des années 1910.
|
||||||
|
- Garde le style narratif et les tournures orales de l'auteur.
|
||||||
|
- Respecte les règles de traduction suivantes :
|
||||||
Règles strictes :
|
Règles strictes :
|
||||||
1. **Conserve tous les noms de lieux** dans leur forme originale (ex. : Львів → Lviv, mais ajoute une note si nécessaire entre [ ]).
|
1. **Conserve tous les noms de lieux** dans leur forme originale (ex. : Львів → Lviv, mais ajoute une note si nécessaire entre [ ]).
|
||||||
2. **Respecte le style narratif** : garde les tournures orales et les expressions propres à l’auteur.
|
2. **Respecte le style narratif** : garde les tournures orales et les expressions propres à l'auteur.
|
||||||
3. Tu dois t'exprimer dans un français avec une syntaxe impécable et agréable à lire, reformule la phrase traduite si tu la juge gramaticalement incorrecte.
|
3. **Pour les termes historiques** (ex. : "powiat"), utilise le terme français standard et ajoute une note explicative.
|
||||||
4. **Pour les termes historiques** (ex. : "powiat"), utilise le terme français standard et ajoute une note explicative.
|
4. **Conserve les citations** russe/allemand/polonais intégrés au texte (mais ajoute une note de fin de paragraphe entre [ ] en la traduisant et en précisant la langue d'origine.
|
||||||
5. **Conserve les citations** russe/allemand/polonais intégrés au texte (mais ajoute une note de fin de paragraphe entre [ ] en la traduisant et en précisant la langue d'origine.
|
5. **Structure** : Garde les sauts de ligne et la mise en page originale.
|
||||||
6. **Structure** : Garde les sauts de ligne et la mise en page originale.
|
6. **Notes du traducteur** : Ajoute entre crochets [ ] les explications contextuelles si un contexte historique exist.
|
||||||
7. **Notes du traducteur** : Ajoute entre crochets [ ] les explications contextuelles si un contexte historique existe (ex. : "[Note : le context]").
|
|
||||||
"""
|
"""
|
||||||
1674
Traduction/TaniaBorecMemoir(Ukr) (FR) V1.pdf
Normal file
1674
Traduction/TaniaBorecMemoir(Ukr) (FR) V1.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1169
Traduction/TaniaBorecMemoir(Ukr) (FR).txt
Normal file
1169
Traduction/TaniaBorecMemoir(Ukr) (FR).txt
Normal file
File diff suppressed because it is too large
Load Diff
1637
Traduction/TaniaBorecMemoir(Ukr) (FR)_V1.pdf
Normal file
1637
Traduction/TaniaBorecMemoir(Ukr) (FR)_V1.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1637
Traduction/TaniaBorecMemoir(Ukr) (FR)_V2.pdf
Normal file
1637
Traduction/TaniaBorecMemoir(Ukr) (FR)_V2.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1623
Traduction/TaniaBorecMemoir(Ukr) (FR)_V3.pdf
Normal file
1623
Traduction/TaniaBorecMemoir(Ukr) (FR)_V3.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1032
Traduction/TaniaBorecMemoir(Ukr)(FR)_V1.txt
Normal file
1032
Traduction/TaniaBorecMemoir(Ukr)(FR)_V1.txt
Normal file
File diff suppressed because it is too large
Load Diff
1038
Traduction/TaniaBorecMemoir(Ukr)(FR)_V2.txt
Normal file
1038
Traduction/TaniaBorecMemoir(Ukr)(FR)_V2.txt
Normal file
File diff suppressed because it is too large
Load Diff
1030
Traduction/TaniaBorecMemoir(Ukr)(FR)_V3.txt
Normal file
1030
Traduction/TaniaBorecMemoir(Ukr)(FR)_V3.txt
Normal file
File diff suppressed because it is too large
Load Diff
1664
Traduction/TaniaBorecMemoir(Ukr)(FR)_V4.pdf
Normal file
1664
Traduction/TaniaBorecMemoir(Ukr)(FR)_V4.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1119
Traduction/TaniaBorecMemoir(Ukr)(FR)_V4.txt
Normal file
1119
Traduction/TaniaBorecMemoir(Ukr)(FR)_V4.txt
Normal file
File diff suppressed because it is too large
Load Diff
1628
Traduction/TaniaBorecMemoir(Ukr)(FR)_V5.pdf
Normal file
1628
Traduction/TaniaBorecMemoir(Ukr)(FR)_V5.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1107
Traduction/TaniaBorecMemoir(Ukr)(FR)_V5.txt
Normal file
1107
Traduction/TaniaBorecMemoir(Ukr)(FR)_V5.txt
Normal file
File diff suppressed because it is too large
Load Diff
1704
Traduction/TaniaBorecMemoir(Ukr)(FR)_V6.pdf
Normal file
1704
Traduction/TaniaBorecMemoir(Ukr)(FR)_V6.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1209
Traduction/TaniaBorecMemoir(Ukr)(FR)_V6.txt
Normal file
1209
Traduction/TaniaBorecMemoir(Ukr)(FR)_V6.txt
Normal file
File diff suppressed because it is too large
Load Diff
1702
Traduction/TaniaBorecMemoir(Ukr)(FR)_V7.pdf
Normal file
1702
Traduction/TaniaBorecMemoir(Ukr)(FR)_V7.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1214
Traduction/TaniaBorecMemoir(Ukr)(FR)_V7.txt
Normal file
1214
Traduction/TaniaBorecMemoir(Ukr)(FR)_V7.txt
Normal file
File diff suppressed because it is too large
Load Diff
1702
Traduction/TaniaBorecMemoir(Ukr)(FR)_V8.pdf
Normal file
1702
Traduction/TaniaBorecMemoir(Ukr)(FR)_V8.pdf
Normal file
File diff suppressed because it is too large
Load Diff
1214
Traduction/TaniaBorecMemoir(Ukr)(FR)_V8.txt
Normal file
1214
Traduction/TaniaBorecMemoir(Ukr)(FR)_V8.txt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
Traduction/TaniaBorecMemoir(Ukr).pdf
Normal file
BIN
Traduction/TaniaBorecMemoir(Ukr).pdf
Normal file
Binary file not shown.
@ -11,6 +11,7 @@ from reportlab.pdfbase.ttfonts import TTFont
|
|||||||
import os, time
|
import os, time
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
|
DEBUG = True
|
||||||
PDF_PATH = "Traduction/TaniaBorecMemoir(Ukr).pdf"
|
PDF_PATH = "Traduction/TaniaBorecMemoir(Ukr).pdf"
|
||||||
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
|
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
|
||||||
OLLAMA_URL = "http://localhost:11434/api/generate"
|
OLLAMA_URL = "http://localhost:11434/api/generate"
|
||||||
@ -173,6 +174,7 @@ def load_checkpoint():
|
|||||||
return json.load(f)
|
return json.load(f)
|
||||||
return {"last_processed_index": -1, "results": {}}
|
return {"last_processed_index": -1, "results": {}}
|
||||||
|
|
||||||
|
# Sauvegarde le checkpoint
|
||||||
# Sauvegarde le checkpoint
|
# Sauvegarde le checkpoint
|
||||||
def save_checkpoint(last_index, results):
|
def save_checkpoint(last_index, results):
|
||||||
# Trier les clés du dictionnaire results
|
# Trier les clés du dictionnaire results
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user