import PyPDF2 import requests import json import os from datetime import datetime # Configuration PDF_PATH = "TaniaBorecMemoir(Ukr).pdf" OLLAMA_MODEL = "traductionUkrainienVersFrancais:latest" OLLAMA_URL = "http://localhost:11434/api/generate" TARGET_LANGUAGE = "français" CHECKPOINT_FILE = "checkpoint.json" TEMP_OUTPUT_TXT = "output_temp.txt" FINAL_OUTPUT_PDF = PDF_PATH.replace(".pdf",f" ({TARGET_LANGUAGE.upper()[:2]})_V2.pdf") FINAL_OUTPUT_TXT = PDF_PATH.replace(".pdf",f" ({TARGET_LANGUAGE.upper()[:2]})_V2.txt") # Charge ou initialise le checkpoint def load_checkpoint(): if os.path.exists(CHECKPOINT_FILE): with open(CHECKPOINT_FILE, "r") as f: return json.load(f) return {"last_processed_index": -1, "results": {}} # Sauvegarde le checkpoint def save_checkpoint(last_index, results): with open(CHECKPOINT_FILE, "w") as f: json.dump({"last_processed_index": last_index, "results": results}, f) # Sauvegarde les résultats temporaires dans un fichier TXT def save_temp_results(results): with open(TEMP_OUTPUT_TXT, "w", encoding="utf-8") as f: for idx, translation in results.items(): f.write(f"Paragraphe {idx}:\n{translation}\n\n") # Extraction du texte du PDF (inchangée) def extract_text_from_pdf(pdf_path): text_by_page = [] with open(pdf_path, "rb") as file: reader = PyPDF2.PdfReader(file) for page in reader.pages: text = page.extract_text() text_by_page.append(text) return text_by_page # Découpage en paragraphes (inchangé) def split_pages_in_paragraphs(pages_text): import re full_text = "\n".join(pages_text) full_text = re.sub(r'(?