Compare commits
5 Commits
d5313fb143
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc66ac9520 | ||
|
|
8b45028101 | ||
|
|
ef4515adcc | ||
|
|
7aea840821 | ||
|
|
71e595a966 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1,2 +1,6 @@
|
|||||||
output_temp.txt
|
output_temp.txt
|
||||||
checkpoint.json
|
checkpoint.json
|
||||||
|
Traduction/Modelfile
|
||||||
|
.env
|
||||||
|
Traduction/*.pdf
|
||||||
|
Traduction/*.txt
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ training_args = TrainingArguments(
|
|||||||
per_device_train_batch_size=1,
|
per_device_train_batch_size=1,
|
||||||
gradient_accumulation_steps=16,
|
gradient_accumulation_steps=16,
|
||||||
learning_rate=1e-4,
|
learning_rate=1e-4,
|
||||||
num_train_epochs=3,
|
num_train_epochs=2,
|
||||||
max_steps=1000,
|
max_steps=1000,
|
||||||
|
|
||||||
fp16=False, # ⚠ disable AMP
|
fp16=False, # ⚠ disable AMP
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from nltk.translate.bleu_score import corpus_bleu
|
|||||||
# Configuration
|
# Configuration
|
||||||
# ----------------------------
|
# ----------------------------
|
||||||
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # base model
|
BASE_MODEL = "Qwen/Qwen2.5-7B-Instruct" # base model
|
||||||
LORA_DIR = "./qwen2.5-7b-uk-fr-lora" # fine-tuned LoRA
|
LORA_DIR = "./qwen2.5-7b-uk-fr-lora-2epoch" # fine-tuned LoRA
|
||||||
VALIDATION_FILE = "validation.jsonl" # small validation subset
|
VALIDATION_FILE = "validation.jsonl" # small validation subset
|
||||||
MAX_INPUT_LENGTH = 1024
|
MAX_INPUT_LENGTH = 1024
|
||||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|||||||
@@ -82,6 +82,8 @@ Vous pouvez modifier les paramètres suivants dans `main.py` :
|
|||||||
---
|
---
|
||||||
|
|
||||||
## Finnetunning
|
## Finnetunning
|
||||||
|
/!\ Expérimental !!!
|
||||||
|
|
||||||
Le finne-tunning permet d'avoir une meilleur traduction. C'est un processus long en temps de calcul, mais permet une traduction plus précise.
|
Le finne-tunning permet d'avoir une meilleur traduction. C'est un processus long en temps de calcul, mais permet une traduction plus précise.
|
||||||
|
|
||||||
Le principe est le suivant :
|
Le principe est le suivant :
|
||||||
|
|||||||
@@ -1,17 +1,19 @@
|
|||||||
FROM qwen2.5:14b
|
FROM translategemma:12b
|
||||||
PARAMETER temperature 0.2
|
PARAMETER temperature 0.2
|
||||||
PARAMETER num_ctx 8192
|
PARAMETER num_ctx 8192
|
||||||
|
|
||||||
SYSTEM """
|
SYSTEM """
|
||||||
|
You are a professional Ukrainian (uk) to french (fr) translator. Your goal is to accurately convey the meaning and nuances of the original Ukrainian text while adhering to french grammar, vocabulary, and cultural sensitivities.
|
||||||
|
Produce only the french translation, without any additional explanations or commentary. Please translate the following Ukrainian text into french:
|
||||||
|
|
||||||
|
|
||||||
|
{TEXT}
|
||||||
|
|
||||||
Tu es un traducteur spécialisé dans les mémoires ukrainiennes des années 1910.
|
|
||||||
- Garde le style narratif et les tournures orales de l'auteur.
|
|
||||||
- Respecte les règles de traduction suivantes :
|
|
||||||
Règles strictes :
|
Règles strictes :
|
||||||
1. **Conserve tous les noms de lieux** dans leur forme originale (ex. : Львів → Lviv, mais ajoute une note si nécessaire entre [ ]).
|
1. **Conserve tous les noms de lieux** dans leur forme originale (ex. : Львів → Lviv, mais ajoute une note si nécessaire entre [ ]).
|
||||||
2. **Respecte le style narratif** : garde les tournures orales et les expressions propres à l'auteur.
|
2. **Respecte le style narratif** : garde les tournures orales et les expressions propres à l’auteur.
|
||||||
3. **Pour les termes historiques** (ex. : "powiat"), utilise le terme français standard et ajoute une note explicative.
|
3. **Pour les termes historiques** (ex. : "powiat"), utilise le terme français standard et ajoute une note explicative.
|
||||||
4. **Conserve les citations** russe/allemand/polonais intégrés au texte (mais ajoute une note de fin de paragraphe entre [ ] en la traduisant et en précisant la langue d'origine.
|
4. **Conserve les citations** russe/allemand/polonais intégrés au texte (mais ajoute une note de fin de paragraphe entre [ ] en la traduisant et en précisant la langue d'origine.
|
||||||
5. **Structure** : Garde les sauts de ligne et la mise en page originale.
|
5. **Structure** : Garde les sauts de ligne et la mise en page originale.
|
||||||
6. **Notes du traducteur** : Ajoute entre crochets [ ] les explications contextuelles si un contexte historique exist.
|
6. **Notes du traducteur** : Ajoute entre crochets [ ] les explications contextuelles si un contexte historique existe (ex. : "[Note : le context]").
|
||||||
"""
|
"""
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -11,15 +11,14 @@ from reportlab.pdfbase.ttfonts import TTFont
|
|||||||
import os, time
|
import os, time
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
DEBUG = True
|
|
||||||
PDF_PATH = "Traduction/TaniaBorecMemoir(Ukr).pdf"
|
PDF_PATH = "Traduction/TaniaBorecMemoir(Ukr).pdf"
|
||||||
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
|
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
|
||||||
OLLAMA_URL = "http://localhost:11434/api/generate"
|
OLLAMA_URL = "http://localhost:11434/api/generate"
|
||||||
TARGET_LANGUAGE = "français"
|
TARGET_LANGUAGE = "français"
|
||||||
CHECKPOINT_FILE = "Traduction/checkpoint.json"
|
CHECKPOINT_FILE = "Traduction/checkpoint.json"
|
||||||
TEMP_OUTPUT_TXT = "Traduction/output_temp.txt"
|
TEMP_OUTPUT_TXT = "Traduction/output_temp.txt"
|
||||||
FINAL_OUTPUT_PDF = PDF_PATH.replace(".pdf",f"({TARGET_LANGUAGE.upper()[:2]})_V9.pdf")
|
FINAL_OUTPUT_PDF = PDF_PATH.replace(".pdf",f"({TARGET_LANGUAGE.upper()[:2]})_V10.pdf")
|
||||||
FINAL_OUTPUT_TXT = PDF_PATH.replace(".pdf",f"({TARGET_LANGUAGE.upper()[:2]})_V9.txt")
|
FINAL_OUTPUT_TXT = PDF_PATH.replace(".pdf",f"({TARGET_LANGUAGE.upper()[:2]})_V10.txt")
|
||||||
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
|
|
||||||
@@ -174,7 +173,6 @@ def load_checkpoint():
|
|||||||
return json.load(f)
|
return json.load(f)
|
||||||
return {"last_processed_index": -1, "results": {}}
|
return {"last_processed_index": -1, "results": {}}
|
||||||
|
|
||||||
# Sauvegarde le checkpoint
|
|
||||||
# Sauvegarde le checkpoint
|
# Sauvegarde le checkpoint
|
||||||
def save_checkpoint(last_index, results):
|
def save_checkpoint(last_index, results):
|
||||||
# Trier les clés du dictionnaire results
|
# Trier les clés du dictionnaire results
|
||||||
|
|||||||
1
llama.cpp
Submodule
1
llama.cpp
Submodule
Submodule llama.cpp added at e463bbdf65
Reference in New Issue
Block a user