Spaces:
Running
Running
""" | |
Utility function for processing files with OCR in the Historical OCR Workshop app. | |
""" | |
import os | |
import tempfile | |
from pathlib import Path | |
from datetime import datetime | |
def process_file(uploaded_file, use_vision=True, processor=None, custom_prompt=None): | |
"""Process the uploaded file and return the OCR results | |
Args: | |
uploaded_file: The uploaded file to process | |
use_vision: Whether to use vision model | |
processor: StructuredOCR processor (if None, it will be imported) | |
custom_prompt: Optional additional instructions for the model | |
Returns: | |
dict: The OCR results | |
""" | |
# Import the processor if not provided | |
if processor is None: | |
from structured_ocr import StructuredOCR | |
processor = StructuredOCR() | |
# Save the uploaded file to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp: | |
tmp.write(uploaded_file.getvalue()) | |
temp_path = tmp.name | |
try: | |
# Determine file type from extension | |
file_ext = Path(uploaded_file.name).suffix.lower() | |
file_type = "pdf" if file_ext == ".pdf" else "image" | |
# Get file size in MB | |
file_size_mb = os.path.getsize(temp_path) / (1024 * 1024) | |
# Process the file with file size information for automatic page limiting | |
result = processor.process_file( | |
temp_path, | |
file_type=file_type, | |
use_vision=use_vision, | |
file_size_mb=file_size_mb, | |
custom_prompt=custom_prompt | |
) | |
# Add processing metadata | |
result.update({ | |
"file_name": uploaded_file.name, | |
"processed_at": datetime.now().isoformat(), | |
"file_size_mb": round(file_size_mb, 2), | |
"use_vision": use_vision | |
}) | |
# No longer needed - removing confidence score | |
return result | |
except Exception as e: | |
return { | |
"error": str(e), | |
"file_name": uploaded_file.name | |
} | |
finally: | |
# Clean up the temporary file | |
if os.path.exists(temp_path): | |
os.unlink(temp_path) |