{TEXT["no_data"]}
import gradio as gr import google.generativeai as genai import os import json import time import base64 import fitz # Importation correcte pour Hugging Face from PIL import Image import io import tempfile import shutil # Configuration GOOGLE_API_KEY = "AIzaSyA4ma5pE1pPCzHHn-i9tDWuKqQEgSltMtI" genai.configure(api_key=GOOGLE_API_KEY) model = genai.GenerativeModel('gemini-1.5-flash') # Interface text (English only) TEXT = { "title": "Elixir - Document Intelligence", "description": "This demo showcases the capabilities of a generative AI model to interpret, understand, and classify any type of document WITHOUT CUSTOMIZATION. For developing a complete, precise, and defined pipeline, please contact martial@lexiapro.fr.", "instructions": [ "1. Upload a PDF document (1-10 pages) such as an invoice, regulatory document, report...", "2. Processing by Elixir", "3. Transcription of identified sections and elements (without customization)" ], "upload": "📂 Upload your document", "analyze": "🔍 Analyze document", "preview": "📄 Preview", "tabs": { "overview": "📋 Overview", "entities": "👥 Entities", "values": "💰 Values", "dates": "📅 Dates", "tables": "📊 Tables", "keypoints": "🔑 Key Points", "references": "🔗 References", "json": "📄 Complete JSON" }, "no_data": "No information found", "processing": "Processing...", "error": { "file_not_found": "File not found", "pdf_conversion": "Unable to convert PDF to image", "no_info": "No information extracted from PDF pages", "too_many_pages": "The PDF has more than 10 pages. Please upload a document with 10 pages or less." } } # Modern CSS - Style amélioré CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); :root { --primary: #4f46e5; --primary-light: #818cf8; --primary-dark: #3730a3; --secondary: #10b981; --accent: #f59e0b; --dark: #111827; --light: #f9fafb; --gray-50: #f8fafc; --gray-100: #f1f5f9; --gray-200: #e2e8f0; --gray-300: #cbd5e1; --gray-400: #94a3b8; --gray-500: #64748b; --text-primary: #1e293b; --text-secondary: #475569; --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.05); --shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); --shadow-md: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05); --radius-sm: 0.25rem; --radius: 0.5rem; --radius-md: 0.75rem; --radius-lg: 1rem; } body, .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important; color: var(--text-primary); background-color: var(--light); line-height: 1.6; } /* Layout principal */ .container { max-width: 1300px; margin: 0 auto; padding: 0 1rem; } .main-content { display: flex; gap: 2rem; align-items: flex-start; } .left-panel { flex: 1; } .right-panel { flex: 2; } /* En-tête */ .header { margin-bottom: 2rem; padding: 0.75rem 1.25rem; background: linear-gradient(135deg, var(--primary-light), var(--primary-dark)); border-radius: var(--radius-lg); box-shadow: var(--shadow-md); position: relative; overflow: hidden; color: white; height: 60px; display: flex; align-items: center; justify-content: center; } .header::before { content: ''; position: absolute; top: -50%; left: -50%; width: 200%; height: 200%; background: radial-gradient(circle, rgba(255,255,255,0.1) 0%, rgba(255,255,255,0) 60%); animation: pulse 15s ease-in-out infinite; z-index: 1; } @keyframes pulse { 0% { transform: scale(1); opacity: 0.5; } 50% { transform: scale(1.05); opacity: 0.8; } 100% { transform: scale(1); opacity: 0.5; } } .header img { max-height: 40px !important; object-fit: contain; position: relative; z-index: 2; } /* Intro card */ .intro-card { background: white; border-radius: var(--radius); box-shadow: var(--shadow); border: 1px solid var(--gray-200); overflow: hidden; margin-bottom: 1.5rem; transition: transform 0.3s ease, box-shadow 0.3s ease; } .intro-card:hover { transform: translateY(-3px); box-shadow: var(--shadow-md); } .intro-header { padding: 1.25rem; border-bottom: 1px solid var(--gray-200); background: linear-gradient(135deg, var(--primary-light), var(--primary-dark)); color: white; font-weight: 600; display: flex; align-items: center; gap: 0.5rem; } .intro-header h3 { margin: 0; font-size: 1.25rem; font-weight: 600; text-shadow: 0 1px 2px rgba(0,0,0,0.1); } .intro-body { padding: 1.5rem; } .intro-description { color: var(--text-primary); line-height: 1.7; font-size: 1.05rem; margin-bottom: 1.5rem; } .contact-links { display: flex; flex-wrap: wrap; gap: 1rem; margin-top: 1.5rem; background: linear-gradient(to right, rgba(79, 70, 229, 0.05), rgba(79, 70, 229, 0.1)); padding: 1.25rem; border-radius: var(--radius); border: 1px solid var(--gray-200); } .contact-link { display: flex; align-items: center; gap: 0.5rem; padding: 0.75rem 1rem; background: white; border-radius: var(--radius); color: var(--primary); text-decoration: none; font-weight: 500; transition: all 0.2s ease; box-shadow: var(--shadow-sm); border: 1px solid var(--gray-200); } .contact-link:hover { transform: translateY(-2px); box-shadow: var(--shadow); color: var(--primary-dark); border-color: var(--primary-light); } /* Accordéon pour workflow */ .accordion { border-radius: var(--radius); overflow: hidden; margin-bottom: 1.5rem; } .accordion-header { background: var(--gray-50); padding: 1.25rem; cursor: pointer; display: flex; align-items: center; justify-content: space-between; font-weight: 600; color: var(--primary); border: 1px solid var(--gray-200); border-radius: var(--radius); transition: all 0.3s ease; } .accordion-header:hover { background: var(--gray-100); } .accordion-header::after { content: "↓"; transition: transform 0.3s ease; } .accordion.active .accordion-header::after { transform: rotate(180deg); } .accordion-content { max-height: 0; overflow: hidden; transition: max-height 0.3s ease; background: white; border: 1px solid var(--gray-200); border-top: 0; border-radius: 0 0 var(--radius) var(--radius); padding: 0 1.25rem; } .accordion.active .accordion-content { max-height: 1000px; padding: 1.25rem; } .workflow-container { text-align: center; } .workflow-container img { max-width: 100%; border-radius: var(--radius); box-shadow: var(--shadow); margin-top: 1rem; } /* Instructions */ .instructions { background: white; padding: 1.5rem; border-radius: var(--radius); border: 1px solid var(--gray-200); box-shadow: var(--shadow); margin-bottom: 2rem; } .instructions h3 { color: var(--primary); margin-top: 0; margin-bottom: 1rem; font-weight: 600; font-size: 1.25rem; display: flex; align-items: center; gap: 0.5rem; } .instructions h3::before { content: '📋'; } .instructions ol { margin: 0; padding-left: 1.5rem; } .instructions li { margin-bottom: 0.75rem; position: relative; padding-left: 0.5rem; } .instructions li:last-child { margin-bottom: 0; } /* Upload section */ .upload-section { background: white; border-radius: var(--radius); box-shadow: var(--shadow); border: 1px solid var(--gray-200); padding: 1.5rem; } /* File input styling */ .file-container { border: 2px dashed var(--primary-light) !important; border-radius: var(--radius) !important; padding: 2rem !important; text-align: center !important; transition: all 0.3s ease !important; background-color: rgba(79, 70, 229, 0.05) !important; cursor: pointer !important; position: relative; } .file-container:hover { background-color: rgba(79, 70, 229, 0.1) !important; } .file-container::before { content: "📄"; font-size: 2rem; display: block; margin-bottom: 0.5rem; } button.primary { background: linear-gradient(135deg, var(--primary), var(--primary-dark)) !important; color: white !important; border: none !important; padding: 0.75rem 1.5rem !important; font-weight: 600 !important; border-radius: var(--radius) !important; transition: all 0.3s ease !important; box-shadow: 0 4px 6px rgba(79, 70, 229, 0.25) !important; width: 100% !important; margin-top: 1rem !important; } button.primary:hover { transform: translateY(-2px) !important; box-shadow: 0 7px 14px rgba(79, 70, 229, 0.3) !important; } /* Results tabs */ .tabs .tab-nav { background-color: var(--gray-50) !important; padding: 0.5rem !important; border-radius: var(--radius) var(--radius) 0 0 !important; border: 1px solid var(--gray-200) !important; border-bottom: none !important; } .tabs .tab-nav button { margin: 0 !important; padding: 0.75rem 1rem !important; font-weight: 500 !important; color: var(--text-secondary) !important; position: relative !important; transition: all 0.3s ease !important; } .tabs .tab-nav button.selected { color: var(--primary) !important; font-weight: 600 !important; } .tabs .tab-nav button.selected::after { content: ''; position: absolute; bottom: -0.5rem; left: 0; width: 100%; height: 3px; background: var(--primary); border-radius: 3px 3px 0 0; } .tabs .tabitem { background: white !important; padding: 1.5rem !important; border-radius: 0 0 var(--radius) var(--radius) !important; border: 1px solid var(--gray-200) !important; box-shadow: var(--shadow) !important; } /* Card components */ .info-card { background: white; padding: 0; border-radius: var(--radius); margin-bottom: 1.5rem; border: 1px solid var(--gray-200); box-shadow: var(--shadow); overflow: hidden; transition: transform 0.2s ease, box-shadow 0.2s ease; } .info-card:hover { transform: translateY(-2px); box-shadow: var(--shadow-md); } .info-card h3 { margin: 0; color: white; font-size: 1.1rem; font-weight: 600; padding: 1rem 1.5rem; background: linear-gradient(135deg, var(--primary-light), var(--primary-dark)); position: relative; } .info-card .content { padding: 1.25rem; } /* Formatage des listes dans les cartes */ .list-container { display: flex; flex-direction: column; gap: 1rem; } .list-item { padding: 1rem; background: var(--gray-50); border-radius: var(--radius); border: 1px solid var(--gray-200); transition: all 0.2s ease; } .list-item:hover { background: white; border-color: var(--primary-light); box-shadow: var(--shadow-sm); } .list-item-header { font-weight: 600; color: var(--primary); margin-bottom: 0.5rem; display: flex; align-items: center; gap: 0.5rem; } .list-item-header::before { content: '•'; color: var(--primary); font-size: 1.5rem; line-height: 1; } .list-item-content { color: var(--text-secondary); font-size: 0.95rem; } /* Améliorations tables */ .tables-container { display: flex; flex-direction: column; gap: 2rem; } .table-wrapper { overflow: hidden; border-radius: var(--radius); box-shadow: var(--shadow); background: white; } .table-wrapper h4 { padding: 1rem; margin: 0; background: linear-gradient(to right, var(--primary-light), var(--primary)); color: white; font-weight: 600; } .table-description { margin: 0; padding: 0.75rem 1rem; background: var(--gray-50); color: var(--text-secondary); border-bottom: 1px solid var(--gray-200); font-size: 0.9rem; font-style: italic; } .data-table { width: 100%; border-collapse: collapse; font-size: 0.95rem; } .data-table th { background: var(--gray-100); padding: 0.75rem 1rem; text-align: left; font-weight: 600; color: var(--primary-dark); border-bottom: 2px solid var(--primary-light); } .data-table td { padding: 0.75rem 1rem; border-bottom: 1px solid var(--gray-200); color: var(--text-secondary); } .data-table tr:last-child td { border-bottom: none; } .data-table tr:nth-child(even) { background-color: var(--gray-50); } .data-table tr:hover { background-color: rgba(79, 70, 229, 0.05); } /* Metadata grid */ .metadata-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 1rem; } .metadata-item { background: var(--gray-50); padding: 1rem; border-radius: var(--radius); border: 1px solid var(--gray-200); transition: all 0.2s ease; } .metadata-item:hover { background: white; border-color: var(--primary-light); box-shadow: var(--shadow-sm); } .metadata-item h4 { margin: 0 0 0.5rem 0; color: var(--primary); font-weight: 600; font-size: 0.9rem; text-transform: uppercase; letter-spacing: 0.5px; } .metadata-item p { margin: 0; color: var(--text-primary); font-weight: 500; } /* JSON viewer */ .json-viewer { background: var(--dark); color: #e2e8f0; padding: 1.25rem; border-radius: var(--radius); overflow: auto; font-family: 'Fira Code', 'Courier New', monospace; font-size: 0.9rem; line-height: 1.5; max-height: 400px; white-space: pre-wrap; } /* Loading animation */ .loading-spinner { display: inline-block; width: 50px; height: 50px; border: 3px solid rgba(79, 70, 229, 0.3); border-radius: 50%; border-top-color: var(--primary); animation: spin 1s ease-in-out infinite; } @keyframes spin { to { transform: rotate(360deg); } } /* Error message */ .error { padding: 1rem; background-color: #fee2e2; border: 1px solid #fecaca; border-radius: var(--radius); color: #b91c1c; font-weight: 500; } /* Responsive design */ @media (max-width: 1024px) { .main-content { flex-direction: column; } .left-panel, .right-panel { flex: none; width: 100%; } } """ # Prompt pour Gemini avec instruction améliorée pour les tableaux GEMINI_PROMPT = """ Analyze this document and extract relevant information in JSON format. Adapt the extraction based on the document type (invoice, contract, report, KID, etc.). Expected response structure: { "metadata": { "title": "Document title", "date": "Document date", "type": "Document type", "author": "Document author or issuer" }, "entities": [ { "name": "Entity name", "type": "Entity type (person, organization, etc.)", "role": "Role in the document" } ], "values": [ { "description": "Value description", "value": "Exact value", "unit": "Unit if applicable" } ], "dates": [ { "description": "Date description", "date": "Exact date", "importance": "Importance (high, medium, low)" } ], "tables": [ { "title": "Table title", "description": "Table description", "data": [ { "column1": "Value in row 1, column 1", "column2": "Value in row 1, column 2", "column3": "Value in row 1, column 3" }, { "column1": "Value in row 2, column 1", "column2": "Value in row 2, column 2", "column3": "Value in row 2, column 3" } ] } ], "key_points": [ { "category": "Key point category", "description": "Detailed description", "importance": "Importance (high, medium, low)" } ], "references": [ { "type": "Reference type", "value": "Reference value" } ] } Important instructions: 1. First identify the document type and adapt the extraction accordingly 2. For tables (this is EXTREMELY important): - Pay special attention to detect and extract ALL tables in the document - Carefully identify tables even if they don't have visible borders or lines - Identify column headers correctly (first row or separate header row) - Extract all rows and all columns with exact cell values - Maintain the same number of columns for each row - Preserve the exact structure of each table - For each table, provide a descriptive title based on content - For each table, include a brief description explaining what the table contains - If a table spans multiple pages, try to reconstruct it as one table - Include ALL data from the table, don't omit any rows or columns 3. For values: - Extract amounts, percentages, numbers - Include units when present 4. For dates: - Extract all important dates - Include the context of each date 5. For entities: - Identify people, organizations, locations - Include their role in the document 6. For references: - Extract reference numbers, codes, identifiers 7. For key points: - Identify important information based on document type - Categorize them appropriately General rules: - Respond only with JSON, without any additional text - Extract only factual and verifiable information - Be precise with values and dates - If a category is not relevant for the document, leave an empty array - Adapt categories based on document type - Do not make assumptions about missing data """ def create_info_card(title, content): """Create a formatted information card""" if not content: return f"""
{TEXT["no_data"]}
{description}
{header} | " html += "
---|
{value} | " html += "
{cell} | " html += "
{json.dumps(result, indent=2, ensure_ascii=False)}" # Initialize all tabs with default values outputs = [ metadata_html, create_info_card(TEXT["tabs"]["entities"], format_list(result.get("entities", []), "name", "role")), create_info_card(TEXT["tabs"]["values"], format_list(result.get("values", []), "description", "value")), create_info_card(TEXT["tabs"]["dates"], format_list(result.get("dates", []), "description", "date")), create_info_card(TEXT["tabs"]["tables"], format_table(result.get("tables", []))), create_info_card(TEXT["tabs"]["keypoints"], format_list(result.get("key_points", []), "category", "description")), create_info_card(TEXT["tabs"]["references"], format_list(result.get("references", []), "type", "value")), json_html ] return outputs # Fonction pour encoder les images en base64 def get_image_base64(file_path): try: with open(file_path, "rb") as image_file: encoded_string = base64.b64encode(image_file.read()).decode('utf-8') return encoded_string except Exception as e: print(f"Erreur lors de l'encodage de l'image {file_path}: {str(e)}") return "" # Chemins vers les images logo_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "elixir-logo-typo.png") workflow_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "static", "Editor _ Mermaid Chart-2025-04-15-142548.png") # Encoder les images en base64 logo_base64 = get_image_base64(logo_path) workflow_base64 = get_image_base64(workflow_path) # Logo et workflow HTML logo_html = f"""