historical-ocr / process_file.py
milwright's picture
UI improvements: Remove confidence scores from display, make Document Preview collapsible, simplify HTML output formatting
b5fe18a
"""
Utility function for processing files with OCR in the Historical OCR Workshop app.
"""
import os
import tempfile
from pathlib import Path
from datetime import datetime
def process_file(uploaded_file, use_vision=True, processor=None, custom_prompt=None):
"""Process the uploaded file and return the OCR results
Args:
uploaded_file: The uploaded file to process
use_vision: Whether to use vision model
processor: StructuredOCR processor (if None, it will be imported)
custom_prompt: Optional additional instructions for the model
Returns:
dict: The OCR results
"""
# Import the processor if not provided
if processor is None:
from structured_ocr import StructuredOCR
processor = StructuredOCR()
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=Path(uploaded_file.name).suffix) as tmp:
tmp.write(uploaded_file.getvalue())
temp_path = tmp.name
try:
# Determine file type from extension
file_ext = Path(uploaded_file.name).suffix.lower()
file_type = "pdf" if file_ext == ".pdf" else "image"
# Get file size in MB
file_size_mb = os.path.getsize(temp_path) / (1024 * 1024)
# Process the file with file size information for automatic page limiting
result = processor.process_file(
temp_path,
file_type=file_type,
use_vision=use_vision,
file_size_mb=file_size_mb,
custom_prompt=custom_prompt
)
# Add processing metadata
result.update({
"file_name": uploaded_file.name,
"processed_at": datetime.now().isoformat(),
"file_size_mb": round(file_size_mb, 2),
"use_vision": use_vision
})
# No longer needed - removing confidence score
return result
except Exception as e:
return {
"error": str(e),
"file_name": uploaded_file.name
}
finally:
# Clean up the temporary file
if os.path.exists(temp_path):
os.unlink(temp_path)