affichage des paragraphes en mode debug

2026-01-05 17:13:40 +01:00
parent aa2ab7dbec
commit 89c2197a42
1 changed files with 46 additions and 25 deletions
@@ -3,23 +3,26 @@ import requests
 import json
 from reportlab.lib.pagesizes import letter
 from reportlab.lib.units import inch
-from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Flowable
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.enums import TA_JUSTIFY
 from reportlab.pdfbase import pdfmetrics
 from reportlab.pdfbase.ttfonts import TTFont
 import os
 from datetime import datetime
 # Configuration
 DEBUG = True
 PDF_PATH = "TaniaBorecMemoir(Ukr).pdf"
 OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest"
 OLLAMA_URL = "http://localhost:11434/api/generate"
 TARGET_LANGUAGE = "français"
 CHECKPOINT_FILE = "checkpoint.json"
 TEMP_OUTPUT_TXT = "output_temp.txt"
-FINAL_OUTPUT_PDF = PDF_PATH.replace(".pdf",f" ({TARGET_LANGUAGE.upper()[:2]})_V4.pdf")
+FINAL_OUTPUT_PDF = PDF_PATH.replace(".pdf",f"({TARGET_LANGUAGE.upper()[:2]})_V4.pdf")
-FINAL_OUTPUT_TXT = PDF_PATH.replace(".pdf",f" ({TARGET_LANGUAGE.upper()[:2]})_V4.txt")
+FINAL_OUTPUT_TXT = PDF_PATH.replace(".pdf",f"({TARGET_LANGUAGE.upper()[:2]})_V4.txt")
 DEBUG = True
 def extract_parameters_from_template(template_str):
    """Extrait les paramètres du modèle à partir du template."""
@@ -144,8 +147,8 @@ def display_llm_info():
        return "Informations du modèle non disponibles"
 def register_unicode_font():
-    """Enregistre une police TrueType qui supporte le cyrilique."""
+    """Enregistre une police TrueType qui supporte le cyrillique."""
-    # Recherche une police système qui supporte le cyrilique
+    # Recherche une police système qui supporte le cyrillique
    font_paths = [
        r"C:\Windows\Fonts\DejaVuSans.ttf",
        r"C:\Windows\Fonts\Calibri.ttf",
@@ -211,16 +214,13 @@ def send_to_ollama(text, target_lang=TARGET_LANGUAGE, model=OLLAMA_MODEL):
    else:
        raise Exception(f"Erreur Ollama: {response.text}")
-# Création du PDF final (inchangée)
+# Création du PDF final avec numéros de chapitres dans la marge
 def create_pdf_from_results(results, output_path):
-    """Crée un PDF à partir des résultats de traduction."""
+    """Crée un PDF à partir des résultats de traduction, avec des notes dans la marge et un numéro de page."""
    doc = SimpleDocTemplate(output_path, pagesize=letter, topMargin=inch, bottomMargin=inch)
    story = []
    # Enregistre une police qui supporte le cyrilique
    font_name = register_unicode_font()
-    # Style personnalisé
+    # Styles personnalisés
    styles = getSampleStyleSheet()
    title_style = ParagraphStyle(
        'CustomTitle',
@@ -251,17 +251,34 @@ def create_pdf_from_results(results, output_path):
        fontName=font_name
    )
    note_style = ParagraphStyle(
        'CustomBody',
        parent=styles['BodyText'],
        fontSize=8,
        alignment=TA_JUSTIFY,
        spaceAfter=0,
        fontName=font_name
    )
    # Création du document avec les callbacks pour les notes et le numéro de page
    doc = SimpleDocTemplate(
        output_path,
        pagesize=letter,
        topMargin=inch,
        bottomMargin=inch,
    )
    # Titre avec la langue cible
    story.append(Paragraph(f"Traduction - Ukrainien vers {TARGET_LANGUAGE.capitalize()}", title_style))
    story.append(Paragraph(f"Document : {PDF_PATH}", title_style))
    story.append(Spacer(1, 0.2*inch))
    # Contenu
-    for page_num, translation in results.items():
+    for paragraph_num, translation in results.items():
        # Préserver la mise en page en convertissant les sauts de ligne
        formatted_text = translation.replace("\n", "<br/>")
        if DEBUG:
            # Ajoute le paragraphe avec sa note
            story.append(Paragraph(paragraph_num, note_style))
        story.append(Paragraph(formatted_text, body_style))
        # story.append(Spacer(1, 0.1*inch))
    # Infos sur le LLM
    story.append(Spacer(1, 0.2*inch))
@@ -271,6 +288,7 @@ def create_pdf_from_results(results, output_path):
    doc.build(story)
    print(f"PDF généré avec succès : {output_path}")
 def create_txt_from_results(results, output_path):
    """Crée un fichier TXT à partir des résultats de traduction."""
    OUTPUT_TXT_PATH = output_path.replace(".pdf", f".txt")  # Chemin du fichier TXT de sortie
@@ -281,7 +299,10 @@ def create_txt_from_results(results, output_path):
        txt_file.write(title_text + "\n\n")
        # Contenu
-        for page_num, translation in results.items():
+        for paragraph_num, translation in results.items():
            # Ajoute les numéro de paragraphe et chapitre
            if(DEBUG): txt_file.write(f"{paragraph_num}\n")
            # Préserver la mise en page en convertissant les sauts de ligne
            txt_file.write(translation + "\n\n")
@@ -301,7 +322,7 @@ def main():
    paragraphs = split_pages_in_paragraphs(pages)
    # Traitement des paragraphes
-    batch_size = 3
+    batch_size = 5
    for i in range(last_index + 1, len(paragraphs), batch_size):
        batch = paragraphs[i:i + batch_size]
        paragraph_cumul = "\n".join(batch)