Spaces:

ErenalpCet
/

AI-Persona-Simulator

Running on Zero

App Files Files Community

ErenalpCet commited on 14 days ago

Commit

e9f3084

verified ·

1 Parent(s): 1dec77f

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -7

app.py CHANGED Viewed

@@ -142,7 +142,7 @@ def parse_llm_output(full_output, input_prompt_list):
     cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
     cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
     if not cleaned_text and generated_text:
-        print("Warning: Parsing resulted in empty string, returning original generation.")
         return generated_text
     if last_input_content and last_occurrence_index == -1:
         print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
@@ -165,10 +165,9 @@ def generate_enhanced_persona(name, bio_text, context=""):
             add_generation_prompt=True,
             enable_thinking=False  # Disable thinking mode
         )
-        model_inputs = tokenizer([text], return_tensors="pt").to(pipe.model.device)
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = pipe(
-                model_inputs,
                 max_new_tokens=512,
                 do_sample=True,
                 temperature=0.7,  # Recommended for non-thinking mode
@@ -201,10 +200,9 @@ def generate_system_prompt_with_llm(name, enhanced_profile, context=""):
             add_generation_prompt=True,
             enable_thinking=False  # Disable thinking mode
         )
-        model_inputs = tokenizer([text], return_tensors="pt").to(pipe.model.device)
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = pipe(
-                model_inputs,
                 max_new_tokens=300,
                 do_sample=True,
                 temperature=0.7,  # Recommended for non-thinking mode
@@ -234,10 +232,9 @@ def generate_response(messages):
             add_generation_prompt=True,
             enable_thinking=False  # Disable thinking mode
         )
-        model_inputs = tokenizer([text], return_tensors="pt").to(pipe.model.device)
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = pipe(
-                model_inputs,
                 max_new_tokens=512,
                 do_sample=True,
                 top_p=0.8,  # Recommended for non-thinking mode

     cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
     cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
     if not cleaned_text and generated_text:
+        print("Wireturning original generation.")
         return generated_text
     if last_input_content and last_occurrence_index == -1:
         print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
             add_generation_prompt=True,
             enable_thinking=False  # Disable thinking mode
         )
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = pipe(
+                text,
                 max_new_tokens=512,
                 do_sample=True,
                 temperature=0.7,  # Recommended for non-thinking mode
             add_generation_prompt=True,
             enable_thinking=False  # Disable thinking mode
         )
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = pipe(
+                text,
                 max_new_tokens=300,
                 do_sample=True,
                 temperature=0.7,  # Recommended for non-thinking mode
             add_generation_prompt=True,
             enable_thinking=False  # Disable thinking mode
         )
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = pipe(
+                text,
                 max_new_tokens=512,
                 do_sample=True,
                 top_p=0.8,  # Recommended for non-thinking mode