Spaces:

abhisheksan
/

poetica

Sleeping

File size: 6,039 Bytes

from fastapi import FastAPI, HTTPException, status
from pydantic import BaseModel, ConfigDict, Field
from typing import Optional, List
from ctransformers import AutoModelForCausalLM
import time
import logging
from app.config import MODEL_PATH, MODEL_URL

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Initialize FastAPI app
app = FastAPI(
    title="Poetry Generator API",
    description="An API for generating poetry using a local LLM",
    version="1.0.0"
)

# Global model variable
model = None

class PoetryRequest(BaseModel):
    prompt: str = Field(..., description="The topic or theme for the poem", min_length=1)
    style: str = Field(
        default="free verse",
        description="Style of the poem to generate"
    )
    max_length: int = Field(
        default=200,
        description="Maximum length of the generated poem",
        ge=50,
        le=500
    )
    temperature: float = Field(
        default=0.7,
        description="Temperature for text generation",
        ge=0.1,
        le=2.0
    )

class PoetryResponse(BaseModel):
    poem: str
    generation_time: float
    prompt: str
    style: str

class ModelInfo(BaseModel):
    model_config = ConfigDict(protected_namespaces=())
    
    status: str
    model_path: str
    model_name: str
    supported_styles: List[str]
    max_context_length: int

def initialize_model():
    """Initialize the model and return it"""
    if not MODEL_PATH.exists():
        logger.error(f"Model not found at {MODEL_PATH}")
        return None
        
    try:
        logger.info(f"Loading model from {MODEL_PATH}")
        return AutoModelForCausalLM.from_pretrained(
            str(MODEL_PATH.parent),
            model_file=MODEL_PATH.name,
            model_type="llama",
            max_new_tokens=512,
            context_length=512,
            gpu_layers=0  # CPU only
        )
    except Exception as e:
        logger.error(f"Error loading model: {str(e)}")
        return None

@app.on_event("startup")
async def startup_event():
    """Initialize the model during startup"""
    global model
    model = initialize_model()
    if model is None:
        logger.warning("Model failed to load but service will start anyway")

@app.get(
    "/health",
    response_model=ModelInfo,
    status_code=status.HTTP_200_OK,
    tags=["Health Check"]
)
async def health_check():
    """Check if the model is loaded and get basic information"""
    model_status = "ready" if model is not None else "not_loaded"
    
    return ModelInfo(
        status=model_status,
        model_name="Llama-2-7B-Chat",
        model_path=str(MODEL_PATH),
        supported_styles=[
            "free verse",
            "haiku",
            "sonnet",
            "limerick",
            "tanka"
        ],
        max_context_length=512
    )

@app.post(
    "/generate",
    response_model=PoetryResponse,
    status_code=status.HTTP_200_OK,
    tags=["Generation"]
)
async def generate_poem(request: PoetryRequest):
    """Generate a poem based on the provided prompt and parameters"""
    if model is None:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail="Model not loaded. Please check /health endpoint for status."
        )

    try:
        start_time = time.time()
        
        prompt_templates = {
            "haiku": "Write a haiku about {prompt}. Follow the 5-7-5 syllable pattern:\n\n",
            "sonnet": "Write a Shakespearean sonnet about {prompt}. Follow the traditional 14-line format with rhyme scheme ABAB CDCD EFEF GG:\n\n",
            "limerick": "Write a limerick about {prompt}. Follow the AABBA rhyme scheme:\n\n",
            "free verse": "Write a free verse poem about {prompt}. Make it creative and meaningful:\n\n",
            "tanka": "Write a tanka about {prompt}. Follow the 5-7-5-7-7 syllable pattern:\n\n"
        }
        
        template = prompt_templates.get(request.style.lower(), prompt_templates["free verse"])
        full_prompt = template.format(prompt=request.prompt)

        output = model(
            full_prompt,
            max_new_tokens=request.max_length,
            temperature=request.temperature,
            top_p=0.95,
            repeat_penalty=1.2
        )

        generation_time = time.time() - start_time
        
        return PoetryResponse(
            poem=output.strip(),
            generation_time=generation_time,
            prompt=request.prompt,
            style=request.style
        )

    except Exception as e:
        logger.error(f"Generation error: {str(e)}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to generate poem: {str(e)}"
        )
    
def download_model():
    """Download the model if it doesn't exist"""
    import requests
    from tqdm import tqdm
    
    if MODEL_PATH.exists():
        logger.info(f"Model already exists at {MODEL_PATH}")
        return
    
    logger.info(f"Downloading model to {MODEL_PATH}")
    try:
        response = requests.get(MODEL_URL, stream=True)
        response.raise_for_status()
        total_size = int(response.headers.get('content-length', 0))
        
        with open(MODEL_PATH, 'wb') as file, tqdm(
            desc="Downloading",
            total=total_size,
            unit='iB',
            unit_scale=True,
            unit_divisor=1024,
        ) as pbar:
            for data in response.iter_content(chunk_size=1024):
                size = file.write(data)
                pbar.update(size)
                
        logger.info("Model downloaded successfully")
    except Exception as e:
        logger.error(f"Error downloading model: {str(e)}")
        if MODEL_PATH.exists():
            MODEL_PATH.unlink()
        raise

if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app.main:app", host="0.0.0.0", port=8000, reload=True)