Alysha Creelman
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -14,9 +14,9 @@ print(token)
|
|
14 |
|
15 |
# Inference client setup with token from environment
|
16 |
# token = os.getenv('HF_TOKEN')
|
17 |
-
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token)
|
18 |
-
pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
|
19 |
-
|
20 |
|
21 |
# Global flag to handle cancellation
|
22 |
stop_inference = False
|
@@ -54,6 +54,7 @@ def respond(
|
|
54 |
temperature=temperature,
|
55 |
do_sample=True,
|
56 |
top_p=top_p,
|
|
|
57 |
):
|
58 |
if stop_inference:
|
59 |
response = "Inference cancelled."
|
|
|
14 |
|
15 |
# Inference client setup with token from environment
|
16 |
# token = os.getenv('HF_TOKEN')
|
17 |
+
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token, stream=False)
|
18 |
+
# pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
|
19 |
+
pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
|
20 |
|
21 |
# Global flag to handle cancellation
|
22 |
stop_inference = False
|
|
|
54 |
temperature=temperature,
|
55 |
do_sample=True,
|
56 |
top_p=top_p,
|
57 |
+
stream=False,
|
58 |
):
|
59 |
if stop_inference:
|
60 |
response = "Inference cancelled."
|