Alysha Creelman commited on
Commit
f9dbf68
·
unverified ·
1 Parent(s): fef6b7e

changing stream in API from True to False app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -14,7 +14,7 @@ print(token)
14
 
15
  # Inference client setup with token from environment
16
  # token = os.getenv('HF_TOKEN')
17
- client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token, stream=False)
18
  # pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
19
  pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
20
 
@@ -54,7 +54,6 @@ def respond(
54
  temperature=temperature,
55
  do_sample=True,
56
  top_p=top_p,
57
- stream=False,
58
  ):
59
  if stop_inference:
60
  response = "Inference cancelled."
@@ -78,7 +77,7 @@ def respond(
78
  for message_chunk in client.chat_completion(
79
  messages,
80
  max_tokens=max_tokens,
81
- stream=True,
82
  temperature=temperature,
83
  top_p=top_p,
84
  ):
 
14
 
15
  # Inference client setup with token from environment
16
  # token = os.getenv('HF_TOKEN')
17
+ client = InferenceClient(model="HuggingFaceH4/zephyr-7b-alpha", token=token)
18
  # pipe = pipeline("text-generation", "TinyLlama/TinyLlama_v1.1", torch_dtype=torch.bfloat16, device_map="auto")
19
  pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
20
 
 
54
  temperature=temperature,
55
  do_sample=True,
56
  top_p=top_p,
 
57
  ):
58
  if stop_inference:
59
  response = "Inference cancelled."
 
77
  for message_chunk in client.chat_completion(
78
  messages,
79
  max_tokens=max_tokens,
80
+ stream=False,
81
  temperature=temperature,
82
  top_p=top_p,
83
  ):