|
|
|
@@ -0,0 +1,108 @@ |
|
|
|
import PyPDF2 |
|
|
|
import requests |
|
|
|
import json |
|
|
|
from reportlab.lib.pagesizes import letter |
|
|
|
from reportlab.lib.units import inch |
|
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer |
|
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
|
|
from reportlab.lib.enums import TA_JUSTIFY |
|
|
|
|
|
|
|
# Configuration |
|
|
|
PDF_PATH = "TaniaBorecMemoir(Ukr).pdf" # Fichier original |
|
|
|
OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest" |
|
|
|
OLLAMA_URL = "http://localhost:11434/api/generate" # URL par défaut d'Ollama |
|
|
|
OUTPUT_PDF_PATH = PDF_PATH.replace(".pdf", " (FR).pdf") # Chemin du PDF de sortie |
|
|
|
|
|
|
|
def extract_text_from_pdf(pdf_path): |
|
|
|
"""Extrait le texte page par page d'un PDF.""" |
|
|
|
text_by_page = [] |
|
|
|
with open(pdf_path, "rb") as file: |
|
|
|
reader = PyPDF2.PdfReader(file) |
|
|
|
for page in reader.pages: |
|
|
|
text_by_page.append(page.extract_text()) |
|
|
|
return text_by_page |
|
|
|
|
|
|
|
def send_to_ollama(prompt, model=OLLAMA_MODEL, context_size=128000): |
|
|
|
"""Envoie une requête à Ollama et retourne la réponse.""" |
|
|
|
payload = { |
|
|
|
"model": model, |
|
|
|
"prompt": prompt, |
|
|
|
"stream": False, |
|
|
|
"options": {"num_ctx": context_size} |
|
|
|
} |
|
|
|
response = requests.post(OLLAMA_URL, data=json.dumps(payload)) |
|
|
|
if response.status_code == 200: |
|
|
|
return response.json()["response"] |
|
|
|
else: |
|
|
|
raise Exception(f"Erreur Ollama: {response.text}") |
|
|
|
|
|
|
|
def create_pdf_from_results(results, output_path): |
|
|
|
"""Crée un PDF à partir des résultats de traduction.""" |
|
|
|
doc = SimpleDocTemplate(output_path, pagesize=letter, topMargin=inch, bottomMargin=inch) |
|
|
|
story = [] |
|
|
|
|
|
|
|
# Style personnalisé |
|
|
|
styles = getSampleStyleSheet() |
|
|
|
title_style = ParagraphStyle( |
|
|
|
'CustomTitle', |
|
|
|
parent=styles['Heading1'], |
|
|
|
fontSize=16, |
|
|
|
textColor='#1f4788', |
|
|
|
spaceAfter=0.3*inch, |
|
|
|
alignment=TA_JUSTIFY |
|
|
|
) |
|
|
|
|
|
|
|
page_style = ParagraphStyle( |
|
|
|
'PageHeading', |
|
|
|
parent=styles['Heading2'], |
|
|
|
fontSize=12, |
|
|
|
textColor='#1f4788', |
|
|
|
spaceAfter=0.2*inch, |
|
|
|
spaceBefore=0.2*inch |
|
|
|
) |
|
|
|
|
|
|
|
body_style = ParagraphStyle( |
|
|
|
'CustomBody', |
|
|
|
parent=styles['BodyText'], |
|
|
|
fontSize=11, |
|
|
|
alignment=TA_JUSTIFY, |
|
|
|
spaceAfter=0.2*inch |
|
|
|
) |
|
|
|
|
|
|
|
# Titre |
|
|
|
story.append(Paragraph("Traduction - Ukrainien vers Français", title_style)) |
|
|
|
story.append(Spacer(1, 0.2*inch)) |
|
|
|
|
|
|
|
# Contenu |
|
|
|
for page_num, translation in results.items(): |
|
|
|
story.append(Paragraph(f"Page {page_num}", page_style)) |
|
|
|
story.append(Paragraph(translation, body_style)) |
|
|
|
story.append(Spacer(1, 0.1*inch)) |
|
|
|
|
|
|
|
# Construction du PDF |
|
|
|
doc.build(story) |
|
|
|
print(f"PDF généré avec succès : {output_path}") |
|
|
|
|
|
|
|
def main(): |
|
|
|
# Extraction du texte |
|
|
|
pages = extract_text_from_pdf(PDF_PATH) |
|
|
|
print(f"Nombre de pages extraites : {len(pages)}") |
|
|
|
|
|
|
|
# Dictionnaire pour stocker les résultats |
|
|
|
results = {} |
|
|
|
|
|
|
|
# Traitement page par page |
|
|
|
for i, page_text in enumerate(pages, start=1): |
|
|
|
print(f"Traitement de la page {i}...") |
|
|
|
prompt = f"Traduis le texte suivant de l'ukrainien vers le français : {page_text}" |
|
|
|
try: |
|
|
|
result = send_to_ollama(prompt) |
|
|
|
results[i] = result |
|
|
|
except Exception as e: |
|
|
|
results[i] = f"Erreur lors du traitement de la page {i} : {e}" |
|
|
|
|
|
|
|
# Création du PDF avec tous les résultats |
|
|
|
create_pdf_from_results(results, OUTPUT_PDF_PATH) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
main() |