Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,658 Bytes
11bdc52 bd1bb19 bd6df67 11bdc52 79cf728 bd1bb19 bd6df67 bd1bb19 79cf728 bd1bb19 79cf728 bd1bb19 79cf728 bd1bb19 79cf728 bd1bb19 79cf728 bd1bb19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from PIL import Image
import torch
from transformers import AutoProcessor, LlavaNextForConditionalGeneration
import spaces
# Load the processor and model
model_id = "llava-hf/llava-v1.6-mistral-7B-hf"
processor = AutoProcessor.from_pretrained(model_id)
model = LlavaNextForConditionalGeneration.from_pretrained(
model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
@spaces.GPU()
def llava_inference(image: Image.Image, prompt: str):
# Format the input as a conversation
conversation = [
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": prompt},
],
},
]
formatted_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
inputs = processor(image, formatted_prompt, return_tensors="pt").to(device)
# Generate response with a max token limit
output_ids = model.generate(**inputs, max_new_tokens=100)
output_text = processor.decode(output_ids[0], skip_special_tokens=True)
return output_text
# Updated Gradio interface using new component syntax
demo = gr.Interface(
fn=llava_inference,
inputs=[
gr.Image(type="pil", label="Input Image"),
gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt")
],
outputs=gr.Text(label="Output Response"),
title="LLaVA-1.6 Gradio Demo",
description="Upload an image and enter a prompt. The model will generate a response using LLaVA-1.6.",
)
if __name__ == "__main__":
demo.launch()
|