import os import requests from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from llama_cpp import Llama from pydantic import BaseModel import uvicorn # Configuration MODEL_URL = "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf" MODEL_NAME = "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf" MODEL_DIR = "model" MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME) # Create model directory if it doesn't exist os.makedirs(MODEL_DIR, exist_ok=True) # Download the model if it doesn't exist if not os.path.exists(MODEL_PATH): print(f"Downloading model from {MODEL_URL}...") response = requests.get(MODEL_URL, stream=True) if response.status_code == 200: with open(MODEL_PATH, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) print("Model downloaded successfully!") else: raise RuntimeError(f"Failed to download model: HTTP {response.status_code}") else: print("Model already exists. Skipping download.") # Initialize FastAPI app = FastAPI( title="DeepSeek-R1 OpenAI-Compatible API", description="OpenAI-compatible API for DeepSeek-R1-Distill-Qwen-1.5B", version="1.0.0", ) # CORS Configuration app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # Load the model print("Loading model...") try: llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_threads=4, n_gpu_layers=0, verbose=False ) print("Model loaded successfully!") except Exception as e: raise RuntimeError(f"Failed to load model: {str(e)}") # Root endpoint with documentation @app.get("/", response_class=HTMLResponse) async def root(): return f"""