Indian-School-Scholar-Chatbot

Sleeping

Alysha Creelman commited on Oct 2, 2024

Commit

f9dbf68

unverified ·

1 Parent(s): fef6b7e

changing stream in API from True to False app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ print(token)
 # Inference client setup with token from environment
 # token = os.getenv('HF_TOKEN')
-client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token, stream=False)
 # pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
 pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
@@ -54,7 +54,6 @@ def respond(
             temperature=temperature,
             do_sample=True,
             top_p=top_p,
-            stream=False,
         ):
             if stop_inference:
                 response = "Inference cancelled."
@@ -78,7 +77,7 @@ def respond(
         for message_chunk in client.chat_completion(
             messages,
             max_tokens=max_tokens,
-            stream=True,
             temperature=temperature,
             top_p=top_p,
         ):

 # Inference client setup with token from environment
 # token = os.getenv('HF_TOKEN')
+client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token)
 # pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
 pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
             temperature=temperature,
             do_sample=True,
             top_p=top_p,
         ):
             if stop_inference:
                 response = "Inference cancelled."
         for message_chunk in client.chat_completion(
             messages,
             max_tokens=max_tokens,
+            stream=False,
             temperature=temperature,
             top_p=top_p,
         ):