Alysha Creelman commited on
Commit
fef6b7e
·
unverified ·
1 Parent(s): 8629508

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -14,9 +14,9 @@ print(token)
14
 
15
  # Inference client setup with token from environment
16
  # token = os.getenv('HF_TOKEN')
17
- client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token)
18
- pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
19
- # pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
20
 
21
  # Global flag to handle cancellation
22
  stop_inference = False
@@ -54,6 +54,7 @@ def respond(
54
  temperature=temperature,
55
  do_sample=True,
56
  top_p=top_p,
 
57
  ):
58
  if stop_inference:
59
  response = "Inference cancelled."
 
14
 
15
  # Inference client setup with token from environment
16
  # token = os.getenv('HF_TOKEN')
17
+ client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token, stream=False)
18
+ # pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
19
+ pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
20
 
21
  # Global flag to handle cancellation
22
  stop_inference = False
 
54
  temperature=temperature,
55
  do_sample=True,
56
  top_p=top_p,
57
+ stream=False,
58
  ):
59
  if stop_inference:
60
  response = "Inference cancelled."