Spaces:

ErenalpCet
/

AI-Persona-Simulator

Running on Zero

App Files Files Community

ErenalpCet commited on 14 days ago

Commit

acc34b2

verified ·

1 Parent(s): 144ed5f

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -328

app.py CHANGED Viewed

@@ -1,37 +1,31 @@
 # --- Required Installs ---
-# pip install gradio transformers torch duckduckgo_search huggingface_hub accelerate bitsandbytes sentencepiece
 import gradio as gr
 import transformers
 import torch
-from transformers import pipeline, BitsAndBytesConfig # Added BitsAndBytesConfig
 from duckduckgo_search import DDGS
 import re
 import time
 from huggingface_hub import HfApi
-# from spaces import GPU # Assuming this is specific to Hugging Face Spaces deployment environment
 # --- Constants and Configuration ---
 MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
 MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation
-# --- GPU Decorator (Placeholder if not using HF Spaces GPU class) ---
-# If not running on Hugging Face Spaces with their specific @GPU decorator,
-# remove or comment out the @GPU decorators below.
-# The resource allocation might need to be handled differently depending on your environment.
-# For simplicity, assuming the decorator exists or is not strictly needed for function.
-try:
-    from spaces import GPU
-except ImportError:
-    print("Warning: 'spaces.GPU' not found. Assuming standard environment.")
-    # Define a dummy decorator if 'spaces' is not available
-    def GPU(memory=None):
-        def decorator(func):
-            return func
-        return decorator
 # --- Model Loading ---
-# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def load_model():
     """Load the LLM model optimized for A100 GPU."""
     print(f"Attempting to load model: {MODEL_ID}")
@@ -43,47 +37,41 @@ def load_model():
             "text-generation",
             model=MODEL_ID,
             torch_dtype=torch.bfloat16,
-            device_map="auto",
             model_kwargs={
-                # Use quantization_config instead of load_in_8bit directly
                 "quantization_config": quantization_config,
                 "use_cache": True,
-                # max_memory might be implicitly handled by device_map="auto" with accelerate
-                # but explicitly setting can be safer. Adjust if needed.
-                # "max_memory": {0: MAX_GPU_MEMORY} # Keep if necessary for your setup
             }
         )
-        print(f"Model {MODEL_ID} loaded successfully.")
         return pipe
     except Exception as e:
-        print(f"Error loading model '{MODEL_ID}': {e}")
-        # Optionally raise the error to halt execution if model loading is critical
-        # raise e
-        return None # Return None to indicate failure
 # --- Web Search ---
 def search_person(name, context=""):
     """Search for information about a person using DuckDuckGo."""
     print(f"Searching for: {name} with context: {context}")
     results = []
     search_terms = []
-    # Prioritize context-specific search
     if context:
         search_terms.append(f"{name} {context}")
-        # Add grade-specific search if applicable
         grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
             grade = grade_match.group(1)
             search_terms.append(f"{name} student {grade} grade")
-    # Add general search terms
-    search_terms.append(f"{name}") # Just the name
     search_terms.append(f"{name} biography")
     search_terms.append(f"{name} interests")
     search_terms.append(f"{name} personality")
-    # Remove duplicates
     search_terms = list(dict.fromkeys(search_terms))
     print(f"Using search terms: {search_terms}")
@@ -91,14 +79,13 @@ def search_person(name, context=""):
         with DDGS() as ddgs:
             for term in search_terms:
                 print(f"Searching DDG for: '{term}'")
-                # Fetch fewer results per term to keep context concise
                 search_results = list(ddgs.text(term, max_results=2))
                 results.extend(search_results)
-                time.sleep(0.2) # Small delay between searches
     except Exception as e:
         error_msg = f"Error during DuckDuckGo search: {str(e)}"
         print(error_msg)
-        return error_msg # Return the error message string
     if not results:
         print(f"No search results found for {name}. Creating synthetic profile.")
@@ -114,39 +101,34 @@ def create_synthetic_profile(name, context):
         "href": "",
         "body": f"{name} is a person described with the context: '{context}'. "
     }
-    # Try to infer age from grade
     if "grade" in context.lower():
         grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
             try:
                 grade = int(grade_match.group(1))
-                age = 5 + grade # Approximate age based on US school system
                 profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
                 profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
             except ValueError:
                  profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
     profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
-    # Return as a list containing the dictionary, matching search_person's format
     return [profile]
 def extract_text_from_search_results(search_results):
     """Extract relevant text from search results."""
-    if isinstance(search_results, str): # Handle case where search_person returned an error string
         return f"Could not extract text due to search error: {search_results}"
     combined_text = ""
     seen_bodies = set()
     count = 0
-    max_results_to_process = 5 # Limit the number of results processed
     for result in search_results:
         if count >= max_results_to_process:
             break
         if isinstance(result, dict) and 'body' in result and result['body']:
             body = result['body'].strip()
-            # Avoid adding duplicate snippets
             if body not in seen_bodies:
                 combined_text += body + "\n\n"
                 seen_bodies.add(body)
@@ -155,184 +137,138 @@ def extract_text_from_search_results(search_results):
     if not combined_text:
         return "No relevant text found in search results."
-    # Basic cleaning
     combined_text = re.sub(r'\s+', ' ', combined_text).strip()
-    # Limit length to avoid excessive prompt size
-    max_length = 2000 # Characters
     return combined_text[:max_length] + "..." if len(combined_text) > max_length else combined_text
 # --- LLM Generation Functions ---
 def parse_llm_output(full_output, input_prompt_list):
-    """
-    Attempts to parse only the newly generated text from the LLM output,
-    assuming the output might contain the input prompt messages.
-    """
-    # If the output is a list of dicts (as expected from pipeline), get the text
     if isinstance(full_output, list) and len(full_output) > 0:
         if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
             generated_text = full_output[0]["generated_text"]
-        else:
-            return str(full_output) # Unexpected format, return raw output
-    elif isinstance(full_output, str):
-         generated_text = full_output # If it's already a string
-    else:
-        return str(full_output) # Unexpected format
-    # Heuristic: Find the last message's content from the input prompt
-    # The actual formatting depends on the tokenizer's chat template.
-    # This is a simplified approach.
     last_input_content = ""
     if isinstance(input_prompt_list, list) and input_prompt_list:
         last_input_content = input_prompt_list[-1].get("content", "")
-    # Try to find the last input message content in the generated text
-    # If found, take the text after it. This might fail if formatting differs.
     if last_input_content:
         last_occurrence_index = generated_text.rfind(last_input_content)
         if last_occurrence_index != -1:
             potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
-            # Further heuristics could be added (e.g., look for assistant role markers)
-            if potential_response: # Check if we got something after the input
-                 # Simple cleanup for potential role markers if model adds them
                 potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
                 potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
-                return potential_response
-    # Fallback: If parsing fails, return the whole generated text, possibly with a warning
-    # Or, if the prompt asked for ONLY the output, the model might have behaved correctly.
-    # Let's clean up potential boilerplate often added by models
     cleaned_text = generated_text
     if isinstance(input_prompt_list, list) and input_prompt_list:
-         # Remove potential initial prompt remnants if possible (very basic)
          first_prompt_content = input_prompt_list[0].get("content", "")
          if first_prompt_content and cleaned_text.startswith(first_prompt_content):
-              cleaned_text = cleaned_text[len(first_prompt_content):].strip()
-    # Simple cleanup for common markers
     cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
     cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
-    print("Warning: Could not reliably parse LLM response. Returning cleaned full output.")
-    return cleaned_text # Return the potentially full (but cleaned) text as a fallback
-# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def generate_enhanced_persona(model, name, bio_text, context=""):
     """Use the LLM to enhance the persona profile."""
     print(f"Generating enhanced persona for {name}...")
-    if model is None:
-        raise ValueError("Model is not loaded.")
     enhancement_prompt = [
-        {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits.
-Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
-        {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile.
-Context: {context}
-Information Found:
-{bio_text}
-Create the profile based *only* on the text above."""}
     ]
     try:
-        # Use torch.amp.autocast instead of torch.cuda.amp.autocast
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = model(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
-        # Parse the output
         parsed_output = parse_llm_output(outputs, enhancement_prompt)
         print("Enhanced persona generated.")
-        # Return the parsed text, or fallback to original bio if parsing fails badly
-        return parsed_output if parsed_output else bio_text
     except Exception as e:
         error_msg = f"Error generating enhanced persona: {str(e)}"
         print(error_msg)
-        # Fallback to the original bio text in case of error
         return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"
-# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
     """Generate an optimized system prompt for the persona."""
     print(f"Generating system prompt for {name}...")
-    if model is None:
-        raise ValueError("Model is not loaded.")
-    fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:
-{enhanced_profile}
-Additional context for the simulation: {context}
----
-Maintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""
     prompt = [
-        {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile.
-The system prompt should instruct the LLM to embody the character, covering:
-1. Core personality, attitude, and speaking style (based on the profile).
-2. Key interests or knowledge areas (if mentioned in the profile).
-3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally).
-4. Explicitly state it should *not* break character or mention being an AI.
-5. Incorporate age-appropriateness if the profile suggests a specific age group.
-Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
-        {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'.
-Context for simulation: {context}
-Character Profile:
-{enhanced_profile}
-Generate the system prompt based *only* on the profile and context provided."""}
     ]
     try:
-        # Use torch.amp.autocast instead of torch.cuda.amp.autocast
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
-            outputs = model(prompt, max_new_tokens=300, do_sample=True, temperature=0.6) # Shorter, less creative prompt
-        # Parse the output
         parsed_output = parse_llm_output(outputs, prompt)
         print("System prompt generated.")
-        # Return parsed output or fallback
         return parsed_output if parsed_output else fallback_prompt
     except Exception as e:
         error_msg = f"Error generating system prompt: {str(e)}"
         print(error_msg)
-        # Fallback to a basic system prompt in case of error
         return fallback_prompt
-# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def generate_response(model, messages):
     """Generate a response using the LLM."""
     print("Generating response...")
-    if model is None:
-        raise ValueError("Model is not loaded.")
-    if not messages:
-        return "Error: No message history provided."
     try:
-        # Use torch.amp.autocast instead of torch.cuda.amp.autocast
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = model(
                 messages,
-                max_new_tokens=512, # Reasonable length for chat
                 do_sample=True,
                 top_p=0.9,
                 temperature=0.7,
                 use_cache=True,
-                pad_token_id=model.tokenizer.eos_token_id # Important for stopping generation
             )
-        # Parse the output - expecting only the assistant's new reply
         parsed_output = parse_llm_output(outputs, messages)
         print("Response generated.")
-        return parsed_output if parsed_output else "..." # Return ellipsis if parsing failed
     except Exception as e:
         error_msg = f"Error during response generation: {str(e)}"
         print(error_msg)
-        return f"Sorry, I encountered an error: {str(e)}"
 # --- Persona Chat Class ---
@@ -344,140 +280,123 @@ class PersonaChat:
         self.persona_context = ""
         self.messages = []
         self.enhanced_profile = ""
-        self.model_loaded = False # Flag to track loading status
-    # @GPU(memory=40) # Apply decorator here if needed by Gradio for resource allocation on method call
     def load_model_if_needed(self):
         """Loads the model if it hasn't been loaded successfully."""
-        if not self.model_loaded:
-            print("Model not loaded yet. Attempting to load...")
-            # Use the global load_model function
-            self.model = load_model()
             if self.model is None:
-                # Raise error if loading failed, caught by calling methods
                 raise RuntimeError("Failed to load the language model. Cannot proceed.")
             else:
                 self.model_loaded = True
                 print("Model loaded successfully within PersonaChat instance.")
-        else:
-            print("Model already loaded.")
-    # This method orchestrates steps that need the GPU, so decorating it might be relevant for Gradio/Spaces
-    # @GPU(memory=40)
     def set_persona(self, name, context=""):
         """Orchestrates persona creation: search, enhance, generate prompt."""
         try:
-            # Ensure model is loaded before proceeding
-            self.load_model_if_needed() # This will raise RuntimeError if it fails
             self.persona_name = name
             self.persona_context = context
-            self.messages = [] # Reset message history for new persona
-            self.enhanced_profile = "" # Reset profile
             status = f"Searching for information about {name}..."
-            yield status, "", [{"role": "system", "content": "Initializing persona creation..."}]
             search_results = search_person(name, context)
-            # Check if search returned an error string
             if isinstance(search_results, str) and search_results.startswith("Error"):
                 error_msg = f"Failed to set persona: {search_results}"
-                yield error_msg, "", [{"role": "system", "content": error_msg}]
-                return # Stop processing if search failed
             bio_text = extract_text_from_search_results(search_results)
             if bio_text.startswith("Could not extract text"):
-                 yield f"Warning: {bio_text}", "", [{"role": "system", "content": bio_text}]
-                 # Continue with potentially limited info
             status = f"Creating enhanced profile for {name}..."
-            yield status, "", [{"role": "system", "content": status}]
-            # Pass the loaded model to the generation function
             self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
-            # Check if enhancement failed
             if self.enhanced_profile.startswith("Error enhancing profile"):
-                 yield f"Warning: Could not enhance profile. Using basic info.", "", [{"role": "system", "content": self.enhanced_profile}]
-                 # Use the basic bio text for the system prompt instead
-                 profile_for_prompt = bio_text
-            else:
-                 profile_for_prompt = self.enhanced_profile
             status = f"Generating optimal system prompt for {name}..."
-            yield status, self.enhanced_profile, [{"role": "system", "content": status}] # Show profile while generating prompt
-            # Pass the loaded model
             self.system_prompt = generate_system_prompt_with_llm(self.model, name, profile_for_prompt, context)
-            # Set the initial system message for the chat history
             self.messages = [{"role": "system", "content": self.system_prompt}]
-            yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.messages
         except RuntimeError as e:
-            # Catch model loading errors from load_model_if_needed
             error_msg = f"Critical Error: {str(e)}"
             print(error_msg)
-            yield error_msg, "", [{"role": "system", "content": error_msg}]
         except Exception as e:
-            # Catch other unexpected errors during persona setting
             error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
             print(error_msg)
-            yield error_msg, self.enhanced_profile, [{"role": "system", "content": error_msg}]
-    # This method uses the GPU for inference, so decorating might be relevant
-    # @GPU(memory=40)
     def chat(self, user_message):
         """Processes a user message and returns the AI's response."""
         try:
-            # Ensure model is loaded
-            self.load_model_if_needed() # Raises RuntimeError if model failed to load initially
             if not self.messages:
-                 # This case should ideally be prevented by UI logic
-                 # but handle it defensively.
                  print("Error: Chat called before persona was set.")
                  return "Please set a persona first using the controls above."
             print(f"User message: {user_message}")
-            # Append user message (ensure correct format)
             formatted_message = {"role": "user", "content": user_message}
             self.messages.append(formatted_message)
-            # Generate response using the loaded model
             response = generate_response(self.model, self.messages)
-            # Append assistant response
-            assistant_message = {"role": "assistant", "content": response}
-            self.messages.append(assistant_message)
-            print(f"Assistant response: {response}")
             return response
         except RuntimeError as e:
-            # Catch model loading errors
             error_msg = f"Critical Error: {str(e)}. Cannot generate response."
             print(error_msg)
             return error_msg
         except Exception as e:
-            # Catch errors during generation itself
             error_msg = f"Error generating response: {str(e)}"
             print(error_msg)
-            # Append error message as assistant response? Or just return error?
-            # Let's return the error string directly.
-            # We might want to avoid adding the error to self.messages history
-            return error_msg
 # --- Gradio Interface ---
 def create_interface():
-    # Instantiate the chat handler class ONCE
-    persona_chat = PersonaChat()
-    # Custom CSS (minor adjustments possible)
     css = """
     .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
     .main-container { max-width: 1200px; margin: auto; padding: 0; }
@@ -491,105 +410,50 @@ def create_interface():
     .persona-button { background-color: #4ca1af !important; color: white !important; }
     .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
     .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
-    .user-message > .message { background-color: #e1f5fe; border-radius: 15px 15px 0 15px !important; padding: 10px 15px !important; margin: 8px 0 8px auto !important; max-width: 80%; float: right; clear: both; color: #333; }
-    .bot-message > .message { background-color: #f1f3f5; border-radius: 15px 15px 15px 0 !important; padding: 10px 15px !important; margin: 8px auto 8px 0 !important; max-width: 80%; float: left; clear: both; color: #333; }
-    .message p { margin: 0 !important; padding: 0 !important; } /* Prevent extra margins in chatbot messages */
     .typing-indicator { color: #aaa; font-style: italic; }
     """
     with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
         with gr.Row(elem_classes="main-container"):
             with gr.Column():
-                # Header
                 with gr.Column(elem_classes="header"):
                     gr.Markdown("# AI Persona Simulator")
                     gr.Markdown("Create and interact with AI-driven character simulations")
-                # Setup Section
                 with gr.Column(elem_classes="setup-section"):
                     gr.Markdown("### 1. Create Your Persona")
-                    gr.Markdown("Enter a name and any defining context (e.g., age, job, key traits, situation). The AI will search for info and build a profile.")
                     with gr.Row():
-                        name_input = gr.Textbox(
-                            label="Character Name",
-                            placeholder="e.g., Sherlock Holmes, Erenalp, A curious 7th grader",
-                            elem_id="name_input"
-                        )
-                        context_input = gr.Textbox(
-                            label="Character Context / Description",
-                            placeholder="e.g., Living in 221B Baker Street, London. OR 7th grade, loves math and video games, has a pet cat named Luna. OR A spaceship captain exploring Alpha Centauri.",
-                            lines=2,
-                            elem_id="context_input"
-                        )
-                    set_persona_button = gr.Button(
-                        "Create Persona & Start Chat",
-                        variant="primary",
-                        elem_classes="persona-button"
-                    )
-                    status_output = gr.Textbox(
-                        label="Status",
-                        value="Enter details above and click 'Create Persona'.",
-                        interactive=False,
-                        elem_classes="status-bar"
-                    )
-                    with gr.Accordion("View Generated System Prompt", open=False):
-                        system_prompt_display = gr.TextArea(
-                            label="System Prompt (Instructions for the AI)",
-                            interactive=False,
-                            lines=10,
-                             elem_classes="system-prompt-display" # Use dedicated class
-                        )
-                        enhanced_profile_display = gr.TextArea(
-                            label="Enhanced Profile (Generated by AI)",
-                            interactive=False,
-                            lines=10,
-                             elem_classes="system-prompt-display" # Reuse style or create new
-                        )
-                # Chat Section
                 with gr.Column(elem_classes="chat-section"):
                     gr.Markdown("### 2. Chat with Your Character")
-                    character_name_display = gr.Markdown(
-                        value="*No persona created yet*",
-                        elem_id="character-name-display"
-                    )
                     chatbot = gr.Chatbot(
                         label="Conversation",
                         height=450,
                         elem_classes="chat-container",
-                        bubble_full_width=False, # Makes bubbles fit content
-                        avatar_images=(None, "🤖") # User avatar default, bot uses emoji
                     )
                     with gr.Row():
-                        msg_input = gr.Textbox(
-                            label="Your message",
-                            placeholder="Type your message here and press Enter...",
-                            elem_classes="message-input",
-                            scale=4 # Make input wider
-                        )
-                        send_button = gr.Button(
-                            "Send",
-                            variant="primary",
-                            elem_classes="send-button",
-                            scale=1
-                        )
-                # Footer
                 with gr.Column(elem_classes="footer"):
                     gr.Markdown(f"Powered by {MODEL_ID}")
         # --- Event Handlers ---
-        # Generator function for smoother UI updates during persona creation
         def set_persona_flow(name, context):
             if not name:
                 yield "Status: Please enter a character name.", "", "", "*No persona created yet*", []
@@ -599,89 +463,74 @@ def create_interface():
             initial_character_display = f"### Preparing to chat with {name}..."
             initial_prompt = "System prompt will appear here..."
             initial_profile = "Enhanced profile will appear here..."
-            initial_history = [{"role": "system", "content": "Initializing..."}] # Start with system message
-            # Initial yield to show activity starting
             yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history
-            # Use the PersonaChat instance's method, which is a generator
             final_status, final_prompt, final_profile = "Error", "", ""
             final_history = initial_history
             try:
-                # Iterate through the status updates yielded by set_persona
-                for status, prompt, profile, history_update in persona_chat.set_persona(name, context):
-                    final_status, final_prompt, final_profile = status, prompt, profile
-                    # Use the latest history from the generator if available
-                    if isinstance(history_update, list):
-                        final_history = history_update
-                    # Determine character display based on status
                     character_display = f"### Preparing chat with {name}..."
-                    if "Ready to chat" in status:
                         character_display = f"### Chatting with {name}"
-                    elif "Error" in status:
                          character_display = f"### Error creating {name}"
-                    yield status, final_prompt, final_profile, character_display, final_history
-                    # Add a small delay to ensure UI updates between yields if needed
-                    # time.sleep(0.1)
             except Exception as e:
-                 # Catch any error not handled within set_persona generator
-                 error_msg = f"Failed to set persona due to an unexpected error: {str(e)}"
                  print(error_msg)
-                 yield error_msg, "", "", f"### Error creating {name}", [{"role": "system", "content": error_msg}]
-        # Function to handle sending messages
         def send_message_flow(message, history):
             if not message.strip():
-                # Ignore empty messages
-                return "", history # Return unchanged history and clear input box
-            # Check if persona is ready (check for system prompt in history)
             if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
-                # Persona not set or history is corrupted
                 history.append({"role": "user", "content": message})
-                history.append({"role": "assistant", "content": "Error: Please create a valid persona first using the form above."})
-                return "", history # Clear input box, update history with error
-            # Append user message to chatbot UI immediately
             history.append({"role": "user", "content": message})
-            # Add typing indicator for the bot
-            history.append({"role": "assistant", "content": None}) # Use None for Gradio typing indicator
-            # Yield the updated history to show user message and typing indicator
-            yield "", history
-            # Call the chat method (which handles model loading and generation)
-            response = persona_chat.chat(message) # This now uses the internal self.messages
-            # Update the last message (the typing indicator) with the actual response
-            history[-1]["content"] = response
-            # Yield the final history with the bot's response
-            # The input box is cleared, history is updated
-            yield "", history
-        # Connect Gradio components to functions
-        # Use the generator for persona setting
         set_persona_button.click(
             set_persona_flow,
             inputs=[name_input, context_input],
             outputs=[status_output, system_prompt_display, enhanced_profile_display, character_name_display, chatbot]
         )
-        # Use the generator for sending messages
         send_button.click(
             send_message_flow,
             inputs=[msg_input, chatbot],
             outputs=[msg_input, chatbot]
         )
-        # Allow submitting message with Enter key
         msg_input.submit(
             send_message_flow,
             inputs=[msg_input, chatbot],
@@ -692,14 +541,12 @@ def create_interface():
 # --- Main Execution ---
 if __name__ == "__main__":
-    print("Starting Gradio application...")
-    # Ensure necessary packages are installed:
-    # pip install gradio transformers torch duckduckgo_search huggingface_hub accelerate bitsandbytes sentencepiece
     demo = create_interface()
-    demo.queue().launch( # Add queue for better handling of multiple users/requests
-        server_name="0.0.0.0", # Allows access from other devices on the network
         server_port=7860,
-        share=False, # Set to True to create a temporary public link (requires internet)
-        show_error=True, # Show errors in the browser console
-        debug=True # Provides more detailed logs in the terminal
     )

 # --- Required Installs ---
+# Ensure these are in your requirements.txt for Hugging Face Spaces
+# gradio
+# transformers
+# torch
+# duckduckgo_search
+# huggingface_hub
+# accelerate
+# bitsandbytes
+# sentencepiece
+# spaces <--- Provided by the Spaces environment
 import gradio as gr
 import transformers
 import torch
+from transformers import pipeline, BitsAndBytesConfig
 from duckduckgo_search import DDGS
 import re
 import time
 from huggingface_hub import HfApi
+from spaces import GPU # Directly import GPU from spaces - Crucial for HF Spaces
 # --- Constants and Configuration ---
 MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
 MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation
 # --- Model Loading ---
+@GPU(memory=40) # ****** THIS DECORATOR IS ESSENTIAL FOR SPACES STARTUP ******
 def load_model():
     """Load the LLM model optimized for A100 GPU."""
     print(f"Attempting to load model: {MODEL_ID}")
             "text-generation",
             model=MODEL_ID,
             torch_dtype=torch.bfloat16,
+            device_map="auto", # Relies on accelerate
             model_kwargs={
                 "quantization_config": quantization_config,
                 "use_cache": True,
+                # "max_memory": {0: MAX_GPU_MEMORY} # Often handled by device_map="auto"
             }
         )
+        print(f"Model {MODEL_ID} loaded successfully on device: {pipe.device}")
         return pipe
     except Exception as e:
+        print(f"FATAL Error loading model '{MODEL_ID}': {e}")
+        # Raise the error to potentially get more detailed logs in Spaces
+        raise e
+        # return None # Returning None might hide the root cause in Spaces logs
 # --- Web Search ---
+# (Keep search_person, create_synthetic_profile, extract_text_from_search_results as before)
 def search_person(name, context=""):
     """Search for information about a person using DuckDuckGo."""
     print(f"Searching for: {name} with context: {context}")
     results = []
     search_terms = []
     if context:
         search_terms.append(f"{name} {context}")
         grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
             grade = grade_match.group(1)
             search_terms.append(f"{name} student {grade} grade")
+    search_terms.append(f"{name}")
     search_terms.append(f"{name} biography")
     search_terms.append(f"{name} interests")
     search_terms.append(f"{name} personality")
     search_terms = list(dict.fromkeys(search_terms))
     print(f"Using search terms: {search_terms}")
         with DDGS() as ddgs:
             for term in search_terms:
                 print(f"Searching DDG for: '{term}'")
                 search_results = list(ddgs.text(term, max_results=2))
                 results.extend(search_results)
+                time.sleep(0.2)
     except Exception as e:
         error_msg = f"Error during DuckDuckGo search: {str(e)}"
         print(error_msg)
+        return error_msg
     if not results:
         print(f"No search results found for {name}. Creating synthetic profile.")
         "href": "",
         "body": f"{name} is a person described with the context: '{context}'. "
     }
     if "grade" in context.lower():
         grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
             try:
                 grade = int(grade_match.group(1))
+                age = 5 + grade
                 profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
                 profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
             except ValueError:
                  profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
     profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
     return [profile]
 def extract_text_from_search_results(search_results):
     """Extract relevant text from search results."""
+    if isinstance(search_results, str):
         return f"Could not extract text due to search error: {search_results}"
     combined_text = ""
     seen_bodies = set()
     count = 0
+    max_results_to_process = 5
     for result in search_results:
         if count >= max_results_to_process:
             break
         if isinstance(result, dict) and 'body' in result and result['body']:
             body = result['body'].strip()
             if body not in seen_bodies:
                 combined_text += body + "\n\n"
                 seen_bodies.add(body)
     if not combined_text:
         return "No relevant text found in search results."
     combined_text = re.sub(r'\s+', ' ', combined_text).strip()
+    max_length = 2000
     return combined_text[:max_length] + "..." if len(combined_text) > max_length else combined_text
 # --- LLM Generation Functions ---
 def parse_llm_output(full_output, input_prompt_list):
+    """Attempts to parse only the newly generated text from the LLM output."""
     if isinstance(full_output, list) and len(full_output) > 0:
         if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
             generated_text = full_output[0]["generated_text"]
+        else: return str(full_output)
+    elif isinstance(full_output, str): generated_text = full_output
+    else: return str(full_output)
     last_input_content = ""
     if isinstance(input_prompt_list, list) and input_prompt_list:
+        # Find the last message with 'user' or 'system' role potentially?
+        # Let's stick to finding the last message content for simplicity
         last_input_content = input_prompt_list[-1].get("content", "")
     if last_input_content:
         last_occurrence_index = generated_text.rfind(last_input_content)
         if last_occurrence_index != -1:
             potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
+            if potential_response:
+                # Basic cleanup
                 potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
                 potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
+                # Check if the response is just whitespace or seems empty after cleanup
+                if potential_response:
+                    return potential_response
+    # Fallback or if model correctly outputted only the response
     cleaned_text = generated_text
     if isinstance(input_prompt_list, list) and input_prompt_list:
          first_prompt_content = input_prompt_list[0].get("content", "")
          if first_prompt_content and cleaned_text.startswith(first_prompt_content):
+              # Be careful not to strip if the response happens to start the same way
+              pass # Let's rely more on the end-stripping heuristic above
+    # General cleanup
     cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
     cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
+    # If after all this, it's empty, maybe return original generated_text?
+    # Or log a warning and return the cleaned version.
+    if not cleaned_text and generated_text:
+         print("Warning: Parsing resulted in empty string, returning original generation.")
+         return generated_text # Return original if cleaning failed
+    # If input prompt wasn't found, assume the model outputted only the response (ideal case)
+    # or the whole thing (fallback case). The cleaning helps for the latter.
+    if last_input_content and last_occurrence_index == -1:
+        print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")
+    return cleaned_text
+@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
 def generate_enhanced_persona(model, name, bio_text, context=""):
     """Use the LLM to enhance the persona profile."""
     print(f"Generating enhanced persona for {name}...")
+    if model is None: raise ValueError("Model is not loaded.")
     enhancement_prompt = [
+        {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits. Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
+        {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile. Context: {context} Information Found:\n{bio_text}\n\nCreate the profile based *only* on the text above."""}
     ]
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = model(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
         parsed_output = parse_llm_output(outputs, enhancement_prompt)
         print("Enhanced persona generated.")
+        return parsed_output if parsed_output else f"Could not generate profile based on:\n{bio_text}"
     except Exception as e:
         error_msg = f"Error generating enhanced persona: {str(e)}"
         print(error_msg)
         return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"
+@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
 def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
     """Generate an optimized system prompt for the persona."""
     print(f"Generating system prompt for {name}...")
+    if model is None: raise ValueError("Model is not loaded.")
+    fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:\n{enhanced_profile}\nAdditional context for the simulation: {context}\n---\nMaintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""
     prompt = [
+        {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile. The system prompt should instruct the LLM to embody the character, covering: 1. Core personality, attitude, and speaking style (based on the profile). 2. Key interests or knowledge areas (if mentioned in the profile). 3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally). 4. Explicitly state it should *not* break character or mention being an AI. 5. Incorporate age-appropriateness if the profile suggests a specific age group. Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
+        {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'. Context for simulation: {context} Character Profile:\n{enhanced_profile}\n\nGenerate the system prompt based *only* on the profile and context provided."""}
     ]
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            outputs = model(prompt, max_new_tokens=300, do_sample=True, temperature=0.6)
         parsed_output = parse_llm_output(outputs, prompt)
         print("System prompt generated.")
         return parsed_output if parsed_output else fallback_prompt
     except Exception as e:
         error_msg = f"Error generating system prompt: {str(e)}"
         print(error_msg)
         return fallback_prompt
+@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
 def generate_response(model, messages):
     """Generate a response using the LLM."""
     print("Generating response...")
+    if model is None: raise ValueError("Model is not loaded.")
+    if not messages: return "Error: No message history provided."
     try:
         with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            # Ensure pad_token_id is set correctly if needed, especially for batching or specific models
             outputs = model(
                 messages,
+                max_new_tokens=512,
                 do_sample=True,
                 top_p=0.9,
                 temperature=0.7,
                 use_cache=True,
+                # Check if EOS token is needed for this model/pipeline setup
+                pad_token_id=model.tokenizer.eos_token_id if model.tokenizer.eos_token_id else None
             )
         parsed_output = parse_llm_output(outputs, messages)
         print("Response generated.")
+        return parsed_output if parsed_output else "..."
     except Exception as e:
         error_msg = f"Error during response generation: {str(e)}"
         print(error_msg)
+        # Consider if the specific error should be shown to the user
+        return f"Sorry, I encountered an error trying to respond."
 # --- Persona Chat Class ---
         self.persona_context = ""
         self.messages = []
         self.enhanced_profile = ""
+        self.model_loaded = False
+    # No @GPU decorator needed here typically, as it calls functions that ARE decorated
     def load_model_if_needed(self):
         """Loads the model if it hasn't been loaded successfully."""
+        if not self.model_loaded or self.model is None: # Check self.model too
+            print("Model not loaded or instance lost. Attempting to load...")
+            # Call the @GPU decorated load_model function
+            self.model = load_model() # This function IS decorated
             if self.model is None:
+                # load_model now raises error, but double-check here
                 raise RuntimeError("Failed to load the language model. Cannot proceed.")
             else:
                 self.model_loaded = True
                 print("Model loaded successfully within PersonaChat instance.")
+        # else: print("Model already loaded.") # Reduce log noise
+    # No @GPU decorator needed here typically
     def set_persona(self, name, context=""):
         """Orchestrates persona creation: search, enhance, generate prompt."""
+        # This method calls other functions that have @GPU decorators
         try:
+            self.load_model_if_needed() # Ensures model is ready
             self.persona_name = name
             self.persona_context = context
+            self.messages = []
+            self.enhanced_profile = ""
             status = f"Searching for information about {name}..."
+            yield status, "", "", [{"role": "system", "content": "Initializing persona creation..."}] # Added empty profile yield
             search_results = search_person(name, context)
             if isinstance(search_results, str) and search_results.startswith("Error"):
                 error_msg = f"Failed to set persona: {search_results}"
+                yield error_msg, "", "", [{"role": "system", "content": error_msg}]
+                return
             bio_text = extract_text_from_search_results(search_results)
             if bio_text.startswith("Could not extract text"):
+                 yield f"Warning: {bio_text}", "", "", [{"role": "system", "content": bio_text}]
             status = f"Creating enhanced profile for {name}..."
+            yield status, "", bio_text, [{"role": "system", "content": status}] # Show basic bio while enhancing
+            # Call the @GPU decorated function
             self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
+            profile_for_prompt = self.enhanced_profile
             if self.enhanced_profile.startswith("Error enhancing profile"):
+                 yield f"Warning: Could not enhance profile. Using basic info.", "", self.enhanced_profile, [{"role": "system", "content": self.enhanced_profile}]
+                 profile_for_prompt = bio_text # Fallback
             status = f"Generating optimal system prompt for {name}..."
+            # Yield the enhanced profile while generating prompt
+            yield status, self.enhanced_profile, self.enhanced_profile, [{"role": "system", "content": status}]
+            # Call the @GPU decorated function
             self.system_prompt = generate_system_prompt_with_llm(self.model, name, profile_for_prompt, context)
             self.messages = [{"role": "system", "content": self.system_prompt}]
+            yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.enhanced_profile, self.messages
         except RuntimeError as e:
             error_msg = f"Critical Error: {str(e)}"
             print(error_msg)
+            yield error_msg, "", "", [{"role": "system", "content": error_msg}]
         except Exception as e:
             error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
             print(error_msg)
+            # Attempt to yield current state even on error
+            yield error_msg, self.system_prompt, self.enhanced_profile, [{"role": "system", "content": error_msg}]
+    # No @GPU decorator needed here typically
     def chat(self, user_message):
         """Processes a user message and returns the AI's response."""
+        # This method calls generate_response which has the @GPU decorator
         try:
+            self.load_model_if_needed()
             if not self.messages:
                  print("Error: Chat called before persona was set.")
                  return "Please set a persona first using the controls above."
             print(f"User message: {user_message}")
             formatted_message = {"role": "user", "content": user_message}
+            # Keep internal history, pass copy to model if needed, but pipeline usually handles state
             self.messages.append(formatted_message)
+            # Call the @GPU decorated function
             response = generate_response(self.model, self.messages)
+            # Append assistant response IF generation succeeded
+            if not response.startswith("Sorry, I encountered an error"):
+                 assistant_message = {"role": "assistant", "content": response}
+                 self.messages.append(assistant_message)
+                 print(f"Assistant response: {response}")
+            else:
+                 print(f"Assistant error response: {response}")
+                 # Do not add the error message itself to the persistent history
+                 # Let the UI show the error, but don't make the bot repeat it next turn.
             return response
         except RuntimeError as e:
             error_msg = f"Critical Error: {str(e)}. Cannot generate response."
             print(error_msg)
             return error_msg
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
             print(error_msg)
+            return f"Sorry, I encountered an error: {str(e)}"
 # --- Gradio Interface ---
 def create_interface():
+    persona_chat = PersonaChat() # Instantiate the handler class
     css = """
     .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
     .main-container { max-width: 1200px; margin: auto; padding: 0; }
     .persona-button { background-color: #4ca1af !important; color: white !important; }
     .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
     .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
+    /* Use default chatbot message styling provided by type='messages' */
     .typing-indicator { color: #aaa; font-style: italic; }
     """
     with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
         with gr.Row(elem_classes="main-container"):
             with gr.Column():
                 with gr.Column(elem_classes="header"):
                     gr.Markdown("# AI Persona Simulator")
                     gr.Markdown("Create and interact with AI-driven character simulations")
                 with gr.Column(elem_classes="setup-section"):
                     gr.Markdown("### 1. Create Your Persona")
+                    gr.Markdown("Enter a name and context. The AI will search, build a profile, and prepare for chat.")
                     with gr.Row():
+                        name_input = gr.Textbox(label="Character Name", placeholder="e.g., Sherlock Holmes, Erenalp, A curious 7th grader", elem_id="name_input")
+                        context_input = gr.Textbox(label="Character Context / Description", placeholder="e.g., Living in 221B Baker Street, London. OR 7th grade, loves math...", lines=2, elem_id="context_input")
+                    set_persona_button = gr.Button("Create Persona & Start Chat", variant="primary", elem_classes="persona-button")
+                    status_output = gr.Textbox(label="Status", value="Enter details above and click 'Create Persona'.", interactive=False, elem_classes="status-bar")
+                    with gr.Accordion("View Generated Details", open=False):
+                        enhanced_profile_display = gr.TextArea(label="Enhanced Profile (Generated by AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
+                        system_prompt_display = gr.TextArea(label="System Prompt (Instructions for the AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
                 with gr.Column(elem_classes="chat-section"):
                     gr.Markdown("### 2. Chat with Your Character")
+                    character_name_display = gr.Markdown(value="*No persona created yet*", elem_id="character-name-display")
+                    # ***** FIX GRADIO WARNINGS *****
                     chatbot = gr.Chatbot(
                         label="Conversation",
                         height=450,
                         elem_classes="chat-container",
+                        # bubble_full_width=False, # Deprecated
+                        avatar_images=(None, "🤖"), # User default, Bot emoji
+                        type="messages" # ***** USE RECOMMENDED TYPE *****
                     )
                     with gr.Row():
+                        msg_input = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter...", elem_classes="message-input", scale=4)
+                        send_button = gr.Button("Send", variant="primary", elem_classes="send-button", scale=1)
                 with gr.Column(elem_classes="footer"):
                     gr.Markdown(f"Powered by {MODEL_ID}")
         # --- Event Handlers ---
         def set_persona_flow(name, context):
             if not name:
                 yield "Status: Please enter a character name.", "", "", "*No persona created yet*", []
             initial_character_display = f"### Preparing to chat with {name}..."
             initial_prompt = "System prompt will appear here..."
             initial_profile = "Enhanced profile will appear here..."
+            # Start with empty history for messages type
+            initial_history = []
             yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history
             final_status, final_prompt, final_profile = "Error", "", ""
             final_history = initial_history
             try:
+                # Use the PersonaChat instance's method generator
+                # Expected yield order: status, system_prompt, enhanced_profile, messages_list
+                for status_update, prompt_update, profile_update, history_update in persona_chat.set_persona(name, context):
+                    final_status, final_prompt, final_profile = status_update, prompt_update, profile_update
+                    if isinstance(history_update, list): final_history = history_update
                     character_display = f"### Preparing chat with {name}..."
+                    if "Ready to chat" in status_update:
                         character_display = f"### Chatting with {name}"
+                    elif "Error" in status_update:
                          character_display = f"### Error creating {name}"
+                    yield status_update, final_prompt, final_profile, character_display, final_history
+                    time.sleep(0.1) # Small delay for UI update visibility
             except Exception as e:
+                 error_msg = f"Failed to set persona (interface error): {str(e)}"
                  print(error_msg)
+                 # Try to yield error state
+                 yield error_msg, final_prompt, final_profile, f"### Error creating {name}", final_history
         def send_message_flow(message, history):
+             # Ensure history is a list (for messages type)
+            if history is None: history = []
             if not message.strip():
+                return "", history
+            # Check if persona is ready (looks for system message in internal state)
             if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
                 history.append({"role": "user", "content": message})
+                history.append({"role": "assistant", "content": "Error: Please create a valid persona first."})
+                return "", history
+            # Append user message to UI history
             history.append({"role": "user", "content": message})
+            # Append placeholder for bot response (typing indicator)
+            history.append({"role": "assistant", "content": None}) # Use None for typing indicator with type='messages'
+            yield "", history # Update UI to show user msg + typing
+            # Call chat method (uses internal state, returns string response)
+            response_text = persona_chat.chat(message)
+            # Update the placeholder in UI history with the actual response
+            history[-1]["content"] = response_text
+            yield "", history # Update UI with final response
         set_persona_button.click(
             set_persona_flow,
             inputs=[name_input, context_input],
             outputs=[status_output, system_prompt_display, enhanced_profile_display, character_name_display, chatbot]
         )
         send_button.click(
             send_message_flow,
             inputs=[msg_input, chatbot],
             outputs=[msg_input, chatbot]
         )
         msg_input.submit(
             send_message_flow,
             inputs=[msg_input, chatbot],
 # --- Main Execution ---
 if __name__ == "__main__":
+    print("Starting Gradio application for Hugging Face Spaces...")
     demo = create_interface()
+    demo.queue().launch( # queue() is recommended for Spaces
+        server_name="0.0.0.0",
         server_port=7860,
+        # share=False is default and usually needed for Spaces deployment structure
+        show_error=True, # Good for debugging in Spaces logs
+        debug=True # More verbose logging
     )