rag-insiel / ingest.py
alegio98's picture
Upload 4 files
bc1430f verified
raw
history blame contribute delete
349 Bytes
from docling.document_converter import DocumentConverter
source = "data/insiel.pdf"
converter = DocumentConverter()
result = converter.convert(source)
print(result.document.export_to_markdown())
markdown_text = result.document.export_to_markdown()
image_count = markdown_text.count("<!-- image -->")
print(f"πŸ”’ Immagini trovate: {image_count}")