Spaces:

ErenalpCet
/

AI-Persona-Simulator

Running on Zero

App Files Files Community

ErenalpCet commited on 14 days ago

Commit

6285fcd

verified ·

1 Parent(s): c1d70a2

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -156

app.py CHANGED Viewed

@@ -18,14 +18,14 @@ from duckduckgo_search import DDGS
 import re
 import time
 from huggingface_hub import HfApi
-from spaces import GPU # Directly import GPU from spaces - Crucial for HF Spaces
 # --- Constants and Configuration ---
 MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
 MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation
 # --- Model Loading ---
-@GPU(memory=40) # ****** THIS DECORATOR IS ESSENTIAL FOR SPACES STARTUP ******
 def load_model():
     """Load the LLM model optimized for A100 GPU using 4-bit quantization."""
     print(f"Attempting to load model: {MODEL_ID} with 4-bit quantization")
@@ -39,39 +39,23 @@ def load_model():
         )
         # Device map will handle placing layers, relying on accelerate
-        # No need to explicitly set max_memory when using device_map="auto" typically
         pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
-            # Note: torch_dtype is sometimes ignored when quantization_config is used,
-            # but specifying compute_dtype in BitsAndBytesConfig is key.
-            # Keep torch_dtype=torch.bfloat16 here for consistency if needed by other parts.
             torch_dtype=torch.bfloat16,
-            device_map="auto", # Let accelerate handle layer placement
             model_kwargs={
                 "quantization_config": quantization_config,
                 "use_cache": True,
-                # "trust_remote_code=True" # Add if model requires it (check model card)
             }
         )
         print(f"Model {MODEL_ID} loaded successfully on device: {pipe.device} (using 4-bit quantization)")
         return pipe
     except Exception as e:
         print(f"FATAL Error loading model '{MODEL_ID}' (check memory/config): {e}")
-        # Raise the error to ensure it's visible in Spaces logs
         raise e
-# --- REST OF THE CODE REMAINS THE SAME ---
-# (search_person, create_synthetic_profile, extract_text_from_search_results,
-#  parse_llm_output, generate_enhanced_persona, generate_system_prompt_with_llm,
-#  generate_response, PersonaChat class, create_interface function, __main__ block)
-# ... include the rest of the Python code from the previous correct version here ...
-# Make sure the rest of your app.py file follows this modified load_model function.
-# Keep all other functions and the Gradio interface definition as they were.
 # --- Web Search ---
-# (Keep search_person, create_synthetic_profile, extract_text_from_search_results as before)
 def search_person(name, context=""):
     """Search for information about a person using DuckDuckGo."""
     print(f"Searching for: {name} with context: {context}")
@@ -128,7 +112,7 @@ def create_synthetic_profile(name, context):
                 profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
                 profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
             except ValueError:
-                 profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
     profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
     return [profile]
@@ -166,14 +150,15 @@ def parse_llm_output(full_output, input_prompt_list):
     if isinstance(full_output, list) and len(full_output) > 0:
         if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
             generated_text = full_output[0]["generated_text"]
-        else: return str(full_output)
-    elif isinstance(full_output, str): generated_text = full_output
-    else: return str(full_output)
     last_input_content = ""
     if isinstance(input_prompt_list, list) and input_prompt_list:
-        # Find the last message with 'user' or 'system' role potentially?
-        # Let's stick to finding the last message content for simplicity
         last_input_content = input_prompt_list[-1].get("content", "")
     if last_input_content:
@@ -181,52 +166,41 @@ def parse_llm_output(full_output, input_prompt_list):
         if last_occurrence_index != -1:
             potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
             if potential_response:
-                # Basic cleanup
                 potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
                 potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
-                # Check if the response is just whitespace or seems empty after cleanup
                 if potential_response:
                     return potential_response
-    # Fallback or if model correctly outputted only the response
     cleaned_text = generated_text
     if isinstance(input_prompt_list, list) and input_prompt_list:
-         first_prompt_content = input_prompt_list[0].get("content", "")
-         if first_prompt_content and cleaned_text.startswith(first_prompt_content):
-              # Be careful not to strip if the response happens to start the same way
-              pass # Let's rely more on the end-stripping heuristic above
-    # General cleanup
     cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
     cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
-    # If after all this, it's empty, maybe return original generated_text?
-    # Or log a warning and return the cleaned version.
     if not cleaned_text and generated_text:
-         print("Warning: Parsing resulted in empty string, returning original generation.")
-         return generated_text # Return original if cleaning failed
-    # If input prompt wasn't found, assume the model outputted only the response (ideal case)
-    # or the whole thing (fallback case). The cleaning helps for the latter.
     if last_input_content and last_occurrence_index == -1:
         print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
     return cleaned_text
-@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
-def generate_enhanced_persona(model, name, bio_text, context=""):
     """Use the LLM to enhance the persona profile."""
     print(f"Generating enhanced persona for {name}...")
-    if model is None: raise ValueError("Model is not loaded.")
     enhancement_prompt = [
         {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits. Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
         {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile. Context: {context} Information Found:\n{bio_text}\n\nCreate the profile based *only* on the text above."""}
     ]
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
-            outputs = model(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
         parsed_output = parse_llm_output(outputs, enhancement_prompt)
         print("Enhanced persona generated.")
         return parsed_output if parsed_output else f"Could not generate profile based on:\n{bio_text}"
@@ -235,22 +209,19 @@ def generate_enhanced_persona(model, name, bio_text, context=""):
         print(error_msg)
         return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"
-@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
-def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
     """Generate an optimized system prompt for the persona."""
     print(f"Generating system prompt for {name}...")
-    if model is None: raise ValueError("Model is not loaded.")
     fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:\n{enhanced_profile}\nAdditional context for the simulation: {context}\n---\nMaintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""
     prompt = [
         {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile. The system prompt should instruct the LLM to embody the character, covering: 1. Core personality, attitude, and speaking style (based on the profile). 2. Key interests or knowledge areas (if mentioned in the profile). 3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally). 4. Explicitly state it should *not* break character or mention being an AI. 5. Incorporate age-appropriateness if the profile suggests a specific age group. Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
         {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'. Context for simulation: {context} Character Profile:\n{enhanced_profile}\n\nGenerate the system prompt based *only* on the profile and context provided."""}
     ]
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
-            outputs = model(prompt, max_new_tokens=300, do_sample=True, temperature=0.6)
         parsed_output = parse_llm_output(outputs, prompt)
         print("System prompt generated.")
         return parsed_output if parsed_output else fallback_prompt
@@ -259,25 +230,22 @@ def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
         print(error_msg)
         return fallback_prompt
-@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
-def generate_response(model, messages):
     """Generate a response using the LLM."""
     print("Generating response...")
-    if model is None: raise ValueError("Model is not loaded.")
-    if not messages: return "Error: No message history provided."
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
-            # Ensure pad_token_id is set correctly if needed, especially for batching or specific models
-            outputs = model(
                 messages,
                 max_new_tokens=512,
                 do_sample=True,
                 top_p=0.9,
                 temperature=0.7,
-                use_cache=True,
-                # Check if EOS token is needed for this model/pipeline setup
-                pad_token_id=model.tokenizer.eos_token_id if model.tokenizer.eos_token_id else None
             )
         parsed_output = parse_llm_output(outputs, messages)
         print("Response generated.")
@@ -285,50 +253,27 @@ def generate_response(model, messages):
     except Exception as e:
         error_msg = f"Error during response generation: {str(e)}"
         print(error_msg)
-        # Consider if the specific error should be shown to the user
         return f"Sorry, I encountered an error trying to respond."
 # --- Persona Chat Class ---
 class PersonaChat:
     def __init__(self):
-        self.model = None
         self.system_prompt = "You are a helpful assistant."
         self.persona_name = "Assistant"
         self.persona_context = ""
         self.messages = []
         self.enhanced_profile = ""
-        self.model_loaded = False
-    # No @GPU decorator needed here typically, as it calls functions that ARE decorated
-    def load_model_if_needed(self):
-        """Loads the model if it hasn't been loaded successfully."""
-        if not self.model_loaded or self.model is None: # Check self.model too
-            print("Model not loaded or instance lost. Attempting to load...")
-            # Call the @GPU decorated load_model function
-            self.model = load_model() # This function IS decorated
-            if self.model is None:
-                # load_model now raises error, but double-check here
-                raise RuntimeError("Failed to load the language model. Cannot proceed.")
-            else:
-                self.model_loaded = True
-                print("Model loaded successfully within PersonaChat instance.")
-        # else: print("Model already loaded.") # Reduce log noise
-    # No @GPU decorator needed here typically
     def set_persona(self, name, context=""):
         """Orchestrates persona creation: search, enhance, generate prompt."""
-        # This method calls other functions that have @GPU decorators
         try:
-            self.load_model_if_needed() # Ensures model is ready
             self.persona_name = name
             self.persona_context = context
             self.messages = []
             self.enhanced_profile = ""
             status = f"Searching for information about {name}..."
-            yield status, "", "", [{"role": "system", "content": "Initializing persona creation..."}] # Added empty profile yield
             search_results = search_person(name, context)
             if isinstance(search_results, str) and search_results.startswith("Error"):
@@ -338,82 +283,58 @@ class PersonaChat:
             bio_text = extract_text_from_search_results(search_results)
             if bio_text.startswith("Could not extract text"):
-                 yield f"Warning: {bio_text}", "", "", [{"role": "system", "content": bio_text}]
             status = f"Creating enhanced profile for {name}..."
-            yield status, "", bio_text, [{"role": "system", "content": status}] # Show basic bio while enhancing
-            # Call the @GPU decorated function
-            self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
             profile_for_prompt = self.enhanced_profile
             if self.enhanced_profile.startswith("Error enhancing profile"):
-                 yield f"Warning: Could not enhance profile. Using basic info.", "", self.enhanced_profile, [{"role": "system", "content": self.enhanced_profile}]
-                 profile_for_prompt = bio_text # Fallback
             status = f"Generating optimal system prompt for {name}..."
-            # Yield the enhanced profile while generating prompt
             yield status, self.enhanced_profile, self.enhanced_profile, [{"role": "system", "content": status}]
-            # Call the @GPU decorated function
-            self.system_prompt = generate_system_prompt_with_llm(self.model, name, profile_for_prompt, context)
             self.messages = [{"role": "system", "content": self.system_prompt}]
             yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.enhanced_profile, self.messages
-        except RuntimeError as e:
-            error_msg = f"Critical Error: {str(e)}"
-            print(error_msg)
-            yield error_msg, "", "", [{"role": "system", "content": error_msg}]
         except Exception as e:
             error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
             print(error_msg)
-            # Attempt to yield current state even on error
             yield error_msg, self.system_prompt, self.enhanced_profile, [{"role": "system", "content": error_msg}]
-    # No @GPU decorator needed here typically
     def chat(self, user_message):
         """Processes a user message and returns the AI's response."""
-        # This method calls generate_response which has the @GPU decorator
         try:
-            self.load_model_if_needed()
             if not self.messages:
-                 print("Error: Chat called before persona was set.")
-                 return "Please set a persona first using the controls above."
             print(f"User message: {user_message}")
-            formatted_message = {"role": "user", "content": user_message}
-            # Keep internal history, pass copy to model if needed, but pipeline usually handles state
-            self.messages.append(formatted_message)
-            # Call the @GPU decorated function
-            response = generate_response(self.model, self.messages)
-            # Append assistant response IF generation succeeded
             if not response.startswith("Sorry, I encountered an error"):
-                 assistant_message = {"role": "assistant", "content": response}
-                 self.messages.append(assistant_message)
-                 print(f"Assistant response: {response}")
             else:
-                 print(f"Assistant error response: {response}")
-                 # Do not add the error message itself to the persistent history
-                 # Let the UI show the error, but don't make the bot repeat it next turn.
             return response
-        except RuntimeError as e:
-            error_msg = f"Critical Error: {str(e)}. Cannot generate response."
-            print(error_msg)
-            return error_msg
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
             print(error_msg)
             return f"Sorry, I encountered an error: {str(e)}"
 # --- Gradio Interface ---
 def create_interface():
-    persona_chat = PersonaChat() # Instantiate the handler class
     css = """
     .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
@@ -428,7 +349,6 @@ def create_interface():
     .persona-button { background-color: #4ca1af !important; color: white !important; }
     .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
     .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
-    /* Use default chatbot message styling provided by type='messages' */
     .typing-indicator { color: #aaa; font-style: italic; }
     """
@@ -451,18 +371,15 @@ def create_interface():
                         enhanced_profile_display = gr.TextArea(label="Enhanced Profile (Generated by AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
                         system_prompt_display = gr.TextArea(label="System Prompt (Instructions for the AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
                 with gr.Column(elem_classes="chat-section"):
                     gr.Markdown("### 2. Chat with Your Character")
                     character_name_display = gr.Markdown(value="*No persona created yet*", elem_id="character-name-display")
-                    # ***** FIX GRADIO WARNINGS *****
                     chatbot = gr.Chatbot(
                         label="Conversation",
                         height=450,
                         elem_classes="chat-container",
-                        # bubble_full_width=False, # Deprecated
-                        avatar_images=(None, "🤖"), # User default, Bot emoji
-                        type="messages" # ***** USE RECOMMENDED TYPE *****
                     )
                     with gr.Row():
                         msg_input = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter...", elem_classes="message-input", scale=4)
@@ -481,7 +398,6 @@ def create_interface():
             initial_character_display = f"### Preparing to chat with {name}..."
             initial_prompt = "System prompt will appear here..."
             initial_profile = "Enhanced profile will appear here..."
-            # Start with empty history for messages type
             initial_history = []
             yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history
@@ -489,55 +405,46 @@ def create_interface():
             final_status, final_prompt, final_profile = "Error", "", ""
             final_history = initial_history
             try:
-                # Use the PersonaChat instance's method generator
-                # Expected yield order: status, system_prompt, enhanced_profile, messages_list
                 for status_update, prompt_update, profile_update, history_update in persona_chat.set_persona(name, context):
                     final_status, final_prompt, final_profile = status_update, prompt_update, profile_update
-                    if isinstance(history_update, list): final_history = history_update
                     character_display = f"### Preparing chat with {name}..."
                     if "Ready to chat" in status_update:
                         character_display = f"### Chatting with {name}"
                     elif "Error" in status_update:
-                         character_display = f"### Error creating {name}"
                     yield status_update, final_prompt, final_profile, character_display, final_history
-                    time.sleep(0.1) # Small delay for UI update visibility
             except Exception as e:
-                 error_msg = f"Failed to set persona (interface error): {str(e)}"
-                 print(error_msg)
-                 # Try to yield error state
-                 yield error_msg, final_prompt, final_profile, f"### Error creating {name}", final_history
         def send_message_flow(message, history):
-             # Ensure history is a list (for messages type)
-            if history is None: history = []
             if not message.strip():
                 return "", history
-            # Check if persona is ready (looks for system message in internal state)
             if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
                 history.append({"role": "user", "content": message})
                 history.append({"role": "assistant", "content": "Error: Please create a valid persona first."})
                 return "", history
-            # Append user message to UI history
             history.append({"role": "user", "content": message})
-            # Append placeholder for bot response (typing indicator)
-            history.append({"role": "assistant", "content": None}) # Use None for typing indicator with type='messages'
-            yield "", history # Update UI to show user msg + typing
-            # Call chat method (uses internal state, returns string response)
             response_text = persona_chat.chat(message)
-            # Update the placeholder in UI history with the actual response
             history[-1]["content"] = response_text
-            yield "", history # Update UI with final response
         set_persona_button.click(
             set_persona_flow,
@@ -561,10 +468,9 @@ def create_interface():
 if __name__ == "__main__":
     print("Starting Gradio application for Hugging Face Spaces...")
     demo = create_interface()
-    demo.queue().launch( # queue() is recommended for Spaces
         server_name="0.0.0.0",
         server_port=7860,
-        # share=False is default and usually needed for Spaces deployment structure
-        show_error=True, # Good for debugging in Spaces logs
-        debug=True # More verbose logging
     )

 import re
 import time
 from huggingface_hub import HfApi
+from spaces import GPU  # Directly import GPU from spaces - Crucial for HF Spaces
 # --- Constants and Configuration ---
 MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
 MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation
 # --- Model Loading ---
+@GPU(memory=40)  # ****** THIS DECORATOR IS ESSENTIAL FOR SPACES STARTUP ******
 def load_model():
     """Load the LLM model optimized for A100 GPU using 4-bit quantization."""
     print(f"Attempting to load model: {MODEL_ID} with 4-bit quantization")
         )
         # Device map will handle placing layers, relying on accelerate
         pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             torch_dtype=torch.bfloat16,
+            device_map="auto",  # Let accelerate handle layer placement
             model_kwargs={
                 "quantization_config": quantization_config,
                 "use_cache": True,
             }
         )
         print(f"Model {MODEL_ID} loaded successfully on device: {pipe.device} (using 4-bit quantization)")
         return pipe
     except Exception as e:
         print(f"FATAL Error loading model '{MODEL_ID}' (check memory/config): {e}")
         raise e
 # --- Web Search ---
 def search_person(name, context=""):
     """Search for information about a person using DuckDuckGo."""
     print(f"Searching for: {name} with context: {context}")
                 profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
                 profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
             except ValueError:
+                profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
     profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
     return [profile]
     if isinstance(full_output, list) and len(full_output) > 0:
         if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
             generated_text = full_output[0]["generated_text"]
+        else:
+            return str(full_output)
+    elif isinstance(full_output, str):
+        generated_text = full_output
+    else:
+        return str(full_output)
     last_input_content = ""
     if isinstance(input_prompt_list, list) and input_prompt_list:
         last_input_content = input_prompt_list[-1].get("content", "")
     if last_input_content:
         if last_occurrence_index != -1:
             potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
             if potential_response:
                 potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
                 potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
                 if potential_response:
                     return potential_response
     cleaned_text = generated_text
     if isinstance(input_prompt_list, list) and input_prompt_list:
+        first_prompt_content = input_prompt_list[0].get("content", "")
+        if first_prompt_content and cleaned_text.startswith(first_prompt_content):
+            pass  # Rely on end-stripping heuristic
     cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
     cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
     if not cleaned_text and generated_text:
+        print("Warning: Parsing resulted in empty string, returning original generation.")
+        return generated_text
     if last_input_content and last_occurrence_index == -1:
         print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
     return cleaned_text
+@GPU(memory=40)  # Decorator needed for Spaces resource allocation during calls
+def generate_enhanced_persona(name, bio_text, context=""):
     """Use the LLM to enhance the persona profile."""
+    pipe = load_model()  # Load model within GPU context
     print(f"Generating enhanced persona for {name}...")
     enhancement_prompt = [
         {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits. Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
         {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile. Context: {context} Information Found:\n{bio_text}\n\nCreate the profile based *only* on the text above."""}
     ]
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            outputs = pipe(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
         parsed_output = parse_llm_output(outputs, enhancement_prompt)
         print("Enhanced persona generated.")
         return parsed_output if parsed_output else f"Could not generate profile based on:\n{bio_text}"
         print(error_msg)
         return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"
+@GPU(memory=40)  # Decorator needed for Spaces resource allocation during calls
+def generate_system_prompt_with_llm(name, enhanced_profile, context=""):
     """Generate an optimized system prompt for the persona."""
+    pipe = load_model()
     print(f"Generating system prompt for {name}...")
     fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:\n{enhanced_profile}\nAdditional context for the simulation: {context}\n---\nMaintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""
     prompt = [
         {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile. The system prompt should instruct the LLM to embody the character, covering: 1. Core personality, attitude, and speaking style (based on the profile). 2. Key interests or knowledge areas (if mentioned in the profile). 3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally). 4. Explicitly state it should *not* break character or mention being an AI. 5. Incorporate age-appropriateness if the profile suggests a specific age group. Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
         {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'. Context for simulation: {context} Character Profile:\n{enhanced_profile}\n\nGenerate the system prompt based *only* on the profile and context provided."""}
     ]
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            outputs = pipe(prompt, max_new_tokens=300, do_sample=True, temperature=0.6)
         parsed_output = parse_llm_output(outputs, prompt)
         print("System prompt generated.")
         return parsed_output if parsed_output else fallback_prompt
         print(error_msg)
         return fallback_prompt
+@GPU(memory=40)  # Decorator needed for Spaces resource allocation during calls
+def generate_response(messages):
     """Generate a response using the LLM."""
+    pipe = load_model()
     print("Generating response...")
+    if not messages:
+        return "Error: No message history provided."
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            outputs = pipe(
                 messages,
                 max_new_tokens=512,
                 do_sample=True,
                 top_p=0.9,
                 temperature=0.7,
+                pad_token_id=pipe.tokenizer.eos_token_id if pipe.tokenizer.eos_token_id else None
             )
         parsed_output = parse_llm_output(outputs, messages)
         print("Response generated.")
     except Exception as e:
         error_msg = f"Error during response generation: {str(e)}"
         print(error_msg)
         return f"Sorry, I encountered an error trying to respond."
 # --- Persona Chat Class ---
 class PersonaChat:
     def __init__(self):
         self.system_prompt = "You are a helpful assistant."
         self.persona_name = "Assistant"
         self.persona_context = ""
         self.messages = []
         self.enhanced_profile = ""
     def set_persona(self, name, context=""):
         """Orchestrates persona creation: search, enhance, generate prompt."""
         try:
             self.persona_name = name
             self.persona_context = context
             self.messages = []
             self.enhanced_profile = ""
             status = f"Searching for information about {name}..."
+            yield status, "", "", [{"role": "system", "content": "Initializing persona creation..."}]
             search_results = search_person(name, context)
             if isinstance(search_results, str) and search_results.startswith("Error"):
             bio_text = extract_text_from_search_results(search_results)
             if bio_text.startswith("Could not extract text"):
+                yield f"Warning: {bio_text}", "", "", [{"role": "system", "content": bio_text}]
             status = f"Creating enhanced profile for {name}..."
+            yield status, "", bio_text, [{"role": "system", "content": status}]
+            self.enhanced_profile = generate_enhanced_persona(name, bio_text, context)
             profile_for_prompt = self.enhanced_profile
             if self.enhanced_profile.startswith("Error enhancing profile"):
+                yield f"Warning: Could not enhance profile. Using basic info.", "", self.enhanced_profile, [{"role": "system", "content": self.enhanced_profile}]
+                profile_for_prompt = bio_text
             status = f"Generating optimal system prompt for {name}..."
             yield status, self.enhanced_profile, self.enhanced_profile, [{"role": "system", "content": status}]
+            self.system_prompt = generate_system_prompt_with_llm(name, profile_for_prompt, context)
             self.messages = [{"role": "system", "content": self.system_prompt}]
             yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.enhanced_profile, self.messages
         except Exception as e:
             error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
             print(error_msg)
             yield error_msg, self.system_prompt, self.enhanced_profile, [{"role": "system", "content": error_msg}]
     def chat(self, user_message):
         """Processes a user message and returns the AI's response."""
         try:
             if not self.messages:
+                print("Error: Chat called before persona was set.")
+                return "Please set a persona first using the controls above."
             print(f"User message: {user_message}")
+            self.messages.append({"role": "user", "content": user_message})
+            response = generate_response(self.messages)
             if not response.startswith("Sorry, I encountered an error"):
+                self.messages.append({"role": "assistant", "content": response})
+                print(f"Assistant response: {response}")
             else:
+                print(f"Assistant error response: {response}")
             return response
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
             print(error_msg)
             return f"Sorry, I encountered an error: {str(e)}"
 # --- Gradio Interface ---
 def create_interface():
+    persona_chat = PersonaChat()  # Instantiate the handler class
     css = """
     .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
     .persona-button { background-color: #4ca1af !important; color: white !important; }
     .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
     .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
     .typing-indicator { color: #aaa; font-style: italic; }
     """
                         enhanced_profile_display = gr.TextArea(label="Enhanced Profile (Generated by AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
                         system_prompt_display = gr.TextArea(label="System Prompt (Instructions for the AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
                 with gr.Column(elem_classes="chat-section"):
                     gr.Markdown("### 2. Chat with Your Character")
                     character_name_display = gr.Markdown(value="*No persona created yet*", elem_id="character-name-display")
                     chatbot = gr.Chatbot(
                         label="Conversation",
                         height=450,
                         elem_classes="chat-container",
+                        avatar_images=(None, "🤖"),  # User default, Bot emoji
+                        type="messages"  # Use recommended type
                     )
                     with gr.Row():
                         msg_input = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter...", elem_classes="message-input", scale=4)
             initial_character_display = f"### Preparing to chat with {name}..."
             initial_prompt = "System prompt will appear here..."
             initial_profile = "Enhanced profile will appear here..."
             initial_history = []
             yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history
             final_status, final_prompt, final_profile = "Error", "", ""
             final_history = initial_history
             try:
                 for status_update, prompt_update, profile_update, history_update in persona_chat.set_persona(name, context):
                     final_status, final_prompt, final_profile = status_update, prompt_update, profile_update
+                    if isinstance(history_update, list):
+                        final_history = history_update
                     character_display = f"### Preparing chat with {name}..."
                     if "Ready to chat" in status_update:
                         character_display = f"### Chatting with {name}"
                     elif "Error" in status_update:
+                        character_display = f"### Error creating {name}"
                     yield status_update, final_prompt, final_profile, character_display, final_history
+                    time.sleep(0.1)  # Small delay for UI update visibility
             except Exception as e:
+                error_msg = f"Failed to set persona (interface error): {str(e)}"
+                print(error_msg)
+                yield error_msg, final_prompt, final_profile, f"### Error creating {name}", final_history
         def send_message_flow(message, history):
+            if history is None:
+                history = []
             if not message.strip():
                 return "", history
             if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
                 history.append({"role": "user", "content": message})
                 history.append({"role": "assistant", "content": "Error: Please create a valid persona first."})
                 return "", history
             history.append({"role": "user", "content": message})
+            history.append({"role": "assistant", "content": None})  # Typing indicator
+            yield "", history  # Show user msg + typing
             response_text = persona_chat.chat(message)
             history[-1]["content"] = response_text
+            yield "", history  # Show final response
         set_persona_button.click(
             set_persona_flow,
 if __name__ == "__main__":
     print("Starting Gradio application for Hugging Face Spaces...")
     demo = create_interface()
+    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=7860,
+        show_error=True,
+        debug=True
     )