Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
from threading import Thread | |
model = AutoModelForCausalLM.from_pretrained("Sigurdur/gpt-sw3-126m-nqii-ruqad") | |
tokenizer = AutoTokenizer.from_pretrained("Sigurdur/gpt-sw3-126m-nqii-ruqad") | |
def streaming_respond(question, history): | |
input_ids = tokenizer.encode(f"### Question: {question} ### Answer:\n", return_tensors="pt") | |
streamer = TextIteratorStreamer( | |
tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True | |
) | |
generate_kwargs = dict( | |
{"input_ids": input_ids}, | |
streamer=streamer, | |
max_new_tokens=100, | |
temperature=0.7, | |
num_beams=1, | |
) | |
t = Thread(target=model.generate, kwargs=generate_kwargs) | |
t.start() | |
outputs = [] | |
for text in streamer: | |
outputs.append(text) | |
yield "".join(outputs) | |
gr.ChatInterface(streaming_respond).launch() |