ErenalpCet commited on
Commit
e9f3084
·
verified ·
1 Parent(s): 1dec77f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -7
app.py CHANGED
@@ -142,7 +142,7 @@ def parse_llm_output(full_output, input_prompt_list):
142
  cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
143
  cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
144
  if not cleaned_text and generated_text:
145
- print("Warning: Parsing resulted in empty string, returning original generation.")
146
  return generated_text
147
  if last_input_content and last_occurrence_index == -1:
148
  print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
@@ -165,10 +165,9 @@ def generate_enhanced_persona(name, bio_text, context=""):
165
  add_generation_prompt=True,
166
  enable_thinking=False # Disable thinking mode
167
  )
168
- model_inputs = tokenizer([text], return_tensors="pt").to(pipe.model.device)
169
  with torch.amp.autocast('cuda', dtype=torch.bfloat16):
170
  outputs = pipe(
171
- model_inputs,
172
  max_new_tokens=512,
173
  do_sample=True,
174
  temperature=0.7, # Recommended for non-thinking mode
@@ -201,10 +200,9 @@ def generate_system_prompt_with_llm(name, enhanced_profile, context=""):
201
  add_generation_prompt=True,
202
  enable_thinking=False # Disable thinking mode
203
  )
204
- model_inputs = tokenizer([text], return_tensors="pt").to(pipe.model.device)
205
  with torch.amp.autocast('cuda', dtype=torch.bfloat16):
206
  outputs = pipe(
207
- model_inputs,
208
  max_new_tokens=300,
209
  do_sample=True,
210
  temperature=0.7, # Recommended for non-thinking mode
@@ -234,10 +232,9 @@ def generate_response(messages):
234
  add_generation_prompt=True,
235
  enable_thinking=False # Disable thinking mode
236
  )
237
- model_inputs = tokenizer([text], return_tensors="pt").to(pipe.model.device)
238
  with torch.amp.autocast('cuda', dtype=torch.bfloat16):
239
  outputs = pipe(
240
- model_inputs,
241
  max_new_tokens=512,
242
  do_sample=True,
243
  top_p=0.8, # Recommended for non-thinking mode
 
142
  cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
143
  cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
144
  if not cleaned_text and generated_text:
145
+ print("Wireturning original generation.")
146
  return generated_text
147
  if last_input_content and last_occurrence_index == -1:
148
  print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
 
165
  add_generation_prompt=True,
166
  enable_thinking=False # Disable thinking mode
167
  )
 
168
  with torch.amp.autocast('cuda', dtype=torch.bfloat16):
169
  outputs = pipe(
170
+ text,
171
  max_new_tokens=512,
172
  do_sample=True,
173
  temperature=0.7, # Recommended for non-thinking mode
 
200
  add_generation_prompt=True,
201
  enable_thinking=False # Disable thinking mode
202
  )
 
203
  with torch.amp.autocast('cuda', dtype=torch.bfloat16):
204
  outputs = pipe(
205
+ text,
206
  max_new_tokens=300,
207
  do_sample=True,
208
  temperature=0.7, # Recommended for non-thinking mode
 
232
  add_generation_prompt=True,
233
  enable_thinking=False # Disable thinking mode
234
  )
 
235
  with torch.amp.autocast('cuda', dtype=torch.bfloat16):
236
  outputs = pipe(
237
+ text,
238
  max_new_tokens=512,
239
  do_sample=True,
240
  top_p=0.8, # Recommended for non-thinking mode