Spaces:

ErenalpCet
/

AI-Persona-Simulator

Running on Zero

App Files Files Community

ErenalpCet commited on 14 days ago

Commit

bdf9a64

verified ·

1 Parent(s): a84a428

Update app.py

Browse files

Files changed (1) hide show

app.py +536 -360

app.py CHANGED Viewed

@@ -1,183 +1,341 @@
 import gradio as gr
 import transformers
 import torch
-from transformers import pipeline
 from duckduckgo_search import DDGS
 import re
 import time
 from huggingface_hub import HfApi
-from spaces import GPU
 # --- Constants and Configuration ---
 MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
 MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation
-@GPU(memory=40)
 def load_model():
     """Load the LLM model optimized for A100 GPU."""
     try:
         pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             torch_dtype=torch.bfloat16,
             device_map="auto",
             model_kwargs={
-                "load_in_8bit": True,
                 "use_cache": True,
-                "max_memory": {0: MAX_GPU_MEMORY}
             }
         )
         return pipe
     except Exception as e:
-        print(f"Error loading model: {e}")
-        return None
 def search_person(name, context=""):
     """Search for information about a person using DuckDuckGo."""
     results = []
     search_terms = []
-    if "grade" in context.lower():
-        grade_match = re.search(r'(\d+)(st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
             grade = grade_match.group(1)
             search_terms.append(f"{name} student {grade} grade")
-    search_terms.extend([
-        f"{name} {context}" if context else name,
-        f"{name} interests",
-        f"{name} personality"
-    ])
     try:
         with DDGS() as ddgs:
             for term in search_terms:
-                search_results = list(ddgs.text(term, max_results=3))
                 results.extend(search_results)
     except Exception as e:
-        return f"Error during search: {str(e)}"
-    if not results and context:
         return create_synthetic_profile(name, context)
     return results
 def create_synthetic_profile(name, context):
     """Create a synthetic profile when search returns no results."""
     profile = {
-        "body": f"{name} is a person described as: {context}."
     }
     if "grade" in context.lower():
-        grade_match = re.search(r'(\d+)(st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
-            grade = grade_match.group(1)
-            age = 5 + int(grade)
-            profile["body"] += f" {name} is approximately {age} years old and in {grade}th grade."
-            profile["body"] += f" Like most {grade}th graders, {name} is likely interested in friends, learning new things, and developing their own identity."
     return [profile]
 def extract_text_from_search_results(search_results):
     """Extract relevant text from search results."""
     combined_text = ""
     for result in search_results:
-        if isinstance(result, dict) and 'body' in result:
-            combined_text += result['body'] + "\n\n"
-    combined_text = re.sub(r'\s+', ' ', combined_text)
-    return combined_text
-@GPU(memory=40)
 def generate_enhanced_persona(model, name, bio_text, context=""):
-    """Use the LLM to enhance the persona profile with GPU optimization."""
     enhancement_prompt = [
-        {"role": "system", "content": """You are an expert AI character developer.
-Your task is to create a detailed character profile based on limited information.
-Output ONLY the enhanced profile with no additional explanations or formatting."""},
-        {"role": "user", "content": f"""Here's some information I found about {name}:
 {bio_text}
-Additional context: {context}
-Based on this information, create a detailed, rich character profile for {name}.
-Include personality traits, speaking style, interests, and mannerisms.
-If this is a child, include age-appropriate details."""}
     ]
     try:
-        with torch.cuda.amp.autocast():
-            outputs = model(enhancement_prompt, max_new_tokens=1024, do_sample=True)
-        if isinstance(outputs, list) and len(outputs) > 0:
-            if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
-                return outputs[0]["generated_text"]
-        return bio_text
     except Exception as e:
-        print(f"Error generating enhanced persona: {str(e)}")
-        return bio_text
-@GPU(memory=40)
 def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
     """Generate an optimized system prompt for the persona."""
     prompt = [
-        {"role": "system", "content": """You are an expert AI prompt engineer.
-Your task is to create an optimal system prompt for character simulation."""},
-        {"role": "user", "content": f"""Create a system prompt for {name} based on:
-Profile: {enhanced_profile}
-Context: {context}
-The prompt should define the character's:
-1. Personality and speaking style
-2. Knowledge boundaries
-3. Typical responses
-4. Age-appropriate behavior if applicable"""}
     ]
     try:
-        with torch.cuda.amp.autocast():
-            outputs = model(prompt, max_new_tokens=1024, do_sample=True)
-        if isinstance(outputs, list) and len(outputs) > 0:
-            if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
-                return outputs[0]["generated_text"]
-        return f"""You are {name}. Use this information to respond:
-{enhanced_profile}
-{context}
-Always stay in character and respond naturally."""
     except Exception as e:
-        print(f"Error generating system prompt: {str(e)}")
-        return f"""You are {name}. Use this information to respond:
-{enhanced_profile}
-{context}
-Always stay in character and respond naturally."""
-@GPU(memory=40)
 def generate_response(model, messages):
-    """Generate a response using the LLM with GPU optimization."""
     try:
-        with torch.cuda.amp.autocast():
             outputs = model(
                 messages,
-                max_new_tokens=512,
                 do_sample=True,
                 top_p=0.9,
                 temperature=0.7,
-                use_cache=True
             )
-        if isinstance(outputs, list) and len(outputs) > 0:
-            if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
-                return outputs[0]["generated_text"]
-        return "I couldn't generate a proper response. Please try again."
     except Exception as e:
-        print(f"Error generating response: {str(e)}")
-        return f"Error: {str(e)}"
 class PersonaChat:
     def __init__(self):
         self.model = None
@@ -186,344 +344,362 @@ class PersonaChat:
         self.persona_context = ""
         self.messages = []
         self.enhanced_profile = ""
-    @GPU(memory=40)
     def load_model_if_needed(self):
-        if self.model is None:
             self.model = load_model()
     def set_persona(self, name, context=""):
-        self.load_model_if_needed()
-        self.persona_name = name
-        self.persona_context = context
-        status = f"Searching for information about {name}..."
-        yield status, "", [{"role": "system", "content": "Starting persona creation..."}]
-        search_results = search_person(name, context)
-        if isinstance(search_results, str) and search_results.startswith("Error"):
-            yield f"Error: {search_results}", "", [{"role": "system", "content": f"Error: {search_results}"}]
-            return
-        bio_text = extract_text_from_search_results(search_results)
-        status = f"Creating enhanced profile for {name}..."
-        yield status, "", [{"role": "system", "content": status}]
-        self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
-        status = f"Generating optimal system prompt for {name}..."
-        yield status, "", [{"role": "system", "content": status}]
-        self.system_prompt = generate_system_prompt_with_llm(self.model, name, self.enhanced_profile, context)
-        self.messages = [{"role": "system", "content": self.system_prompt}]
-        yield f"Persona set to {name}. Ready to chat!", self.system_prompt, self.messages
     def chat(self, user_message):
-        """Process a chat message and return the response."""
-        self.load_model_if_needed()
         try:
-            formatted_message = {"role": "user", "content": user_message} if isinstance(user_message, str) else user_message
             self.messages.append(formatted_message)
             response = generate_response(self.model, self.messages)
             assistant_message = {"role": "assistant", "content": response}
             self.messages.append(assistant_message)
             return response
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
             print(error_msg)
             return error_msg
 def create_interface():
     persona_chat = PersonaChat()
-    # Custom CSS for better UI
     css = """
-    .gradio-container {
-        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-    }
-    .main-container {
-        max-width: 1200px;
-        margin: auto;
-        padding: 0;
-    }
-    .header {
-        background: linear-gradient(90deg, #2c3e50, #4ca1af);
-        color: white;
-        padding: 20px;
-        border-radius: 10px 10px 0 0;
-        margin-bottom: 20px;
-        text-align: center;
-    }
-    .setup-section {
-        background-color: #f9f9f9;
-        border-radius: 10px;
-        padding: 20px;
-        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-        margin-bottom: 20px;
-    }
-    .chat-section {
-        background-color: white;
-        border-radius: 10px;
-        padding: 20px;
-        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-    }
-    .status-bar {
-        background: #f0f0f0;
-        padding: 10px 15px;
-        border-radius: 5px;
-        margin: 15px 0;
-        font-weight: 500;
-    }
-    .chat-container {
-        border: 1px solid #eaeaea;
-        border-radius: 10px;
-        height: 500px !important;
-        overflow-y: auto;
-        background-color: #f9f9f9;
-    }
-    .message-input {
-        margin-top: 10px;
-    }
-    .send-button {
-        background-color: #2c3e50 !important;
-    }
-    .persona-button {
-        background-color: #4ca1af !important;
-    }
-    .system-prompt {
-        background-color: #f5f5f5;
-        border-radius: 8px;
-        padding: 10px;
-        margin-top: 15px;
-        border: 1px solid #e0e0e0;
-    }
-    .footer {
-        text-align: center;
-        margin-top: 20px;
-        font-size: 0.9rem;
-        color: #666;
-    }
-    /* Avatar styling */
-    .user-message {
-        background-color: #e1f5fe;
-        border-radius: 15px 15px 0 15px;
-        padding: 10px 15px;
-        margin: 8px 0;
-        max-width: 80%;
-        float: right;
-        clear: both;
-    }
-    .bot-message {
-        background-color: #f0f0f0;
-        border-radius: 15px 15px 15px 0;
-        padding: 10px 15px;
-        margin: 8px 0;
-        max-width: 80%;
-        float: left;
-        clear: both;
-    }
-    /* Loading animation */
-    @keyframes pulse {
-        0% { opacity: 0.6; }
-        50% { opacity: 1; }
-        100% { opacity: 0.6; }
-    }
-    .loading {
-        animation: pulse 1.5s infinite;
-        padding: 10px;
-        background-color: #eee;
-        border-radius: 5px;
-        display: inline-block;
-    }
     """
     with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
         with gr.Row(elem_classes="main-container"):
             with gr.Column():
                 # Header
                 with gr.Column(elem_classes="header"):
                     gr.Markdown("# AI Persona Simulator")
-                    gr.Markdown("Create lifelike character simulations with advanced AI")
                 # Setup Section
                 with gr.Column(elem_classes="setup-section"):
-                    gr.Markdown("### Create Your Persona")
-                    gr.Markdown("Enter details about the character you want to simulate")
                     with gr.Row():
                         name_input = gr.Textbox(
                             label="Character Name",
-                            placeholder="e.g. Erenalp",
-                            elem_classes="input-field"
                         )
-                    with gr.Row():
                         context_input = gr.Textbox(
-                            label="Character Context",
-                            placeholder="e.g. in 7th grade, loves math and video games, has a pet cat",
                             lines=2,
-                            elem_classes="input-field"
-                        )
-                    with gr.Row():
-                        set_persona_button = gr.Button(
-                            "Create Persona",
-                            variant="primary",
-                            elem_classes="persona-button"
                         )
                     status_output = gr.Textbox(
-                        label="Status",
                         interactive=False,
                         elem_classes="status-bar"
                     )
-                    with gr.Accordion("Character System Prompt", open=False, elem_classes="system-prompt-section"):
                         system_prompt_display = gr.TextArea(
-                            label="",
                             interactive=False,
                             lines=10,
-                            elem_classes="system-prompt"
                         )
                 # Chat Section
                 with gr.Column(elem_classes="chat-section"):
-                    gr.Markdown("### Chat with Your Character")
-                    # Display character name dynamically
                     character_name_display = gr.Markdown(
-                        elem_id="character-name",
-                        value="Start by creating a persona above"
                     )
                     chatbot = gr.Chatbot(
-                        label="",
                         height=450,
                         elem_classes="chat-container",
-                        avatar_images=("👤", "🤖"),
-                        type="messages"
                     )
                     with gr.Row():
                         msg_input = gr.Textbox(
                             label="Your message",
-                            placeholder="Type your message here...",
-                            elem_classes="message-input"
                         )
                         send_button = gr.Button(
-                            "Send",
                             variant="primary",
-                            elem_classes="send-button"
                         )
                 # Footer
                 with gr.Column(elem_classes="footer"):
-                    gr.Markdown("Powered by Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct")
-        # Functions
-        def update_character_name(name):
-            if name:
-                return f"### Chatting with {name}"
-            return "### Start by creating a persona above"
-        def set_persona_generator(name, context):
-            initial_status = f"Creating persona for {name}..."
-            initial_character_display = f"### Creating persona for {name}..."
-            initial_prompt = ""
-            initial_history = [{"role": "system", "content": "Initializing..."}]
-            # Initial yield
-            yield initial_status, initial_prompt, initial_history, initial_character_display
-            # Process persona creation
-            for status, prompt, history in persona_chat.set_persona(name, context):
-                character_display = f"### Creating persona for {name}..."
-                if "Ready to chat" in status:
-                    character_display = f"### Chatting with {name}"
-                yield status, prompt, history, character_display
-        def send_message(message, history):
-            if not message.strip():
-                return "", history
-            if not persona_chat.messages:
-                new_history = list(history) if history else []
-                new_history.append({"role": "user", "content": message})
-                new_history.append({"role": "assistant", "content": "Please create a persona first using the form above."})
-                return "", new_history
             try:
-                # Show typing indicator
-                new_history = list(history) if history else []
-                new_history.append({"role": "user", "content": message})
-                new_history.append({"role": "assistant", "content": "..."})
-                yield "", new_history
-                # Generate actual response
-                response = persona_chat.chat(message)
-                new_history[-1]["content"] = response
-                yield "", new_history
             except Exception as e:
-                print(f"Error in send_message: {str(e)}")
-                new_history[-1]["content"] = "Sorry, there was an error processing your message."
-                yield "", new_history
-        # Event handlers
         set_persona_button.click(
-            set_persona_generator,
             inputs=[name_input, context_input],
-            outputs=[status_output, system_prompt_display, chatbot, character_name_display]
         )
-        name_input.change(
-            update_character_name,
-            inputs=[name_input],
-            outputs=[character_name_display]
-        )
         send_button.click(
-            send_message,
             inputs=[msg_input, chatbot],
             outputs=[msg_input, chatbot]
         )
         msg_input.submit(
-            send_message,
             inputs=[msg_input, chatbot],
             outputs=[msg_input, chatbot]
         )
     return interface
 if __name__ == "__main__":
     demo = create_interface()
-    demo.queue(max_size=5).launch(
-        server_name="0.0.0.0",
         server_port=7860,
-        share=False,
-        show_error=True,
-        debug=True
     )

+# --- Required Installs ---
+# pip install gradio transformers torch duckduckgo_search huggingface_hub accelerate bitsandbytes sentencepiece
 import gradio as gr
 import transformers
 import torch
+from transformers import pipeline, BitsAndBytesConfig # Added BitsAndBytesConfig
 from duckduckgo_search import DDGS
 import re
 import time
 from huggingface_hub import HfApi
+# from spaces import GPU # Assuming this is specific to Hugging Face Spaces deployment environment
 # --- Constants and Configuration ---
 MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
 MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation
+# --- GPU Decorator (Placeholder if not using HF Spaces GPU class) ---
+# If not running on Hugging Face Spaces with their specific @GPU decorator,
+# remove or comment out the @GPU decorators below.
+# The resource allocation might need to be handled differently depending on your environment.
+# For simplicity, assuming the decorator exists or is not strictly needed for function.
+try:
+    from spaces import GPU
+except ImportError:
+    print("Warning: 'spaces.GPU' not found. Assuming standard environment.")
+    # Define a dummy decorator if 'spaces' is not available
+    def GPU(memory=None):
+        def decorator(func):
+            return func
+        return decorator
+# --- Model Loading ---
+# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def load_model():
     """Load the LLM model optimized for A100 GPU."""
+    print(f"Attempting to load model: {MODEL_ID}")
     try:
+        # Configure quantization
+        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
         pipe = pipeline(
             "text-generation",
             model=MODEL_ID,
             torch_dtype=torch.bfloat16,
             device_map="auto",
             model_kwargs={
+                # Use quantization_config instead of load_in_8bit directly
+                "quantization_config": quantization_config,
                 "use_cache": True,
+                # max_memory might be implicitly handled by device_map="auto" with accelerate
+                # but explicitly setting can be safer. Adjust if needed.
+                # "max_memory": {0: MAX_GPU_MEMORY} # Keep if necessary for your setup
             }
         )
+        print(f"Model {MODEL_ID} loaded successfully.")
         return pipe
     except Exception as e:
+        print(f"Error loading model '{MODEL_ID}': {e}")
+        # Optionally raise the error to halt execution if model loading is critical
+        # raise e
+        return None # Return None to indicate failure
+# --- Web Search ---
 def search_person(name, context=""):
     """Search for information about a person using DuckDuckGo."""
+    print(f"Searching for: {name} with context: {context}")
     results = []
     search_terms = []
+    # Prioritize context-specific search
+    if context:
+        search_terms.append(f"{name} {context}")
+        # Add grade-specific search if applicable
+        grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
             grade = grade_match.group(1)
             search_terms.append(f"{name} student {grade} grade")
+    # Add general search terms
+    search_terms.append(f"{name}") # Just the name
+    search_terms.append(f"{name} biography")
+    search_terms.append(f"{name} interests")
+    search_terms.append(f"{name} personality")
+    # Remove duplicates
+    search_terms = list(dict.fromkeys(search_terms))
+    print(f"Using search terms: {search_terms}")
     try:
         with DDGS() as ddgs:
             for term in search_terms:
+                print(f"Searching DDG for: '{term}'")
+                # Fetch fewer results per term to keep context concise
+                search_results = list(ddgs.text(term, max_results=2))
                 results.extend(search_results)
+                time.sleep(0.2) # Small delay between searches
     except Exception as e:
+        error_msg = f"Error during DuckDuckGo search: {str(e)}"
+        print(error_msg)
+        return error_msg # Return the error message string
+    if not results:
+        print(f"No search results found for {name}. Creating synthetic profile.")
         return create_synthetic_profile(name, context)
+    print(f"Found {len(results)} potential search results.")
     return results
 def create_synthetic_profile(name, context):
     """Create a synthetic profile when search returns no results."""
     profile = {
+        "title": f"Synthetic Profile for {name}",
+        "href": "",
+        "body": f"{name} is a person described with the context: '{context}'. "
     }
+    # Try to infer age from grade
     if "grade" in context.lower():
+        grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
         if grade_match:
+            try:
+                grade = int(grade_match.group(1))
+                age = 5 + grade # Approximate age based on US school system
+                profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
+                profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
+            except ValueError:
+                 profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
+    profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
+    # Return as a list containing the dictionary, matching search_person's format
     return [profile]
 def extract_text_from_search_results(search_results):
     """Extract relevant text from search results."""
+    if isinstance(search_results, str): # Handle case where search_person returned an error string
+        return f"Could not extract text due to search error: {search_results}"
     combined_text = ""
+    seen_bodies = set()
+    count = 0
+    max_results_to_process = 5 # Limit the number of results processed
     for result in search_results:
+        if count >= max_results_to_process:
+            break
+        if isinstance(result, dict) and 'body' in result and result['body']:
+            body = result['body'].strip()
+            # Avoid adding duplicate snippets
+            if body not in seen_bodies:
+                combined_text += body + "\n\n"
+                seen_bodies.add(body)
+                count += 1
+    if not combined_text:
+        return "No relevant text found in search results."
+    # Basic cleaning
+    combined_text = re.sub(r'\s+', ' ', combined_text).strip()
+    # Limit length to avoid excessive prompt size
+    max_length = 2000 # Characters
+    return combined_text[:max_length] + "..." if len(combined_text) > max_length else combined_text
+# --- LLM Generation Functions ---
+def parse_llm_output(full_output, input_prompt_list):
+    """
+    Attempts to parse only the newly generated text from the LLM output,
+    assuming the output might contain the input prompt messages.
+    """
+    # If the output is a list of dicts (as expected from pipeline), get the text
+    if isinstance(full_output, list) and len(full_output) > 0:
+        if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
+            generated_text = full_output[0]["generated_text"]
+        else:
+            return str(full_output) # Unexpected format, return raw output
+    elif isinstance(full_output, str):
+         generated_text = full_output # If it's already a string
+    else:
+        return str(full_output) # Unexpected format
+    # Heuristic: Find the last message's content from the input prompt
+    # The actual formatting depends on the tokenizer's chat template.
+    # This is a simplified approach.
+    last_input_content = ""
+    if isinstance(input_prompt_list, list) and input_prompt_list:
+        last_input_content = input_prompt_list[-1].get("content", "")
+    # Try to find the last input message content in the generated text
+    # If found, take the text after it. This might fail if formatting differs.
+    if last_input_content:
+        last_occurrence_index = generated_text.rfind(last_input_content)
+        if last_occurrence_index != -1:
+            potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
+            # Further heuristics could be added (e.g., look for assistant role markers)
+            if potential_response: # Check if we got something after the input
+                 # Simple cleanup for potential role markers if model adds them
+                potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
+                potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
+                return potential_response
+    # Fallback: If parsing fails, return the whole generated text, possibly with a warning
+    # Or, if the prompt asked for ONLY the output, the model might have behaved correctly.
+    # Let's clean up potential boilerplate often added by models
+    cleaned_text = generated_text
+    if isinstance(input_prompt_list, list) and input_prompt_list:
+         # Remove potential initial prompt remnants if possible (very basic)
+         first_prompt_content = input_prompt_list[0].get("content", "")
+         if first_prompt_content and cleaned_text.startswith(first_prompt_content):
+              cleaned_text = cleaned_text[len(first_prompt_content):].strip()
+    # Simple cleanup for common markers
+    cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
+    cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
+    print("Warning: Could not reliably parse LLM response. Returning cleaned full output.")
+    return cleaned_text # Return the potentially full (but cleaned) text as a fallback
+# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def generate_enhanced_persona(model, name, bio_text, context=""):
+    """Use the LLM to enhance the persona profile."""
+    print(f"Generating enhanced persona for {name}...")
+    if model is None:
+        raise ValueError("Model is not loaded.")
     enhancement_prompt = [
+        {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits.
+Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
+        {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile.
+Context: {context}
+Information Found:
 {bio_text}
+Create the profile based *only* on the text above."""}
     ]
     try:
+        # Use torch.amp.autocast instead of torch.cuda.amp.autocast
+        with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            outputs = model(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
+        # Parse the output
+        parsed_output = parse_llm_output(outputs, enhancement_prompt)
+        print("Enhanced persona generated.")
+        # Return the parsed text, or fallback to original bio if parsing fails badly
+        return parsed_output if parsed_output else bio_text
     except Exception as e:
+        error_msg = f"Error generating enhanced persona: {str(e)}"
+        print(error_msg)
+        # Fallback to the original bio text in case of error
+        return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"
+# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
     """Generate an optimized system prompt for the persona."""
+    print(f"Generating system prompt for {name}...")
+    if model is None:
+        raise ValueError("Model is not loaded.")
+    fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:
+{enhanced_profile}
+Additional context for the simulation: {context}
+---
+Maintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""
     prompt = [
+        {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile.
+The system prompt should instruct the LLM to embody the character, covering:
+1. Core personality, attitude, and speaking style (based on the profile).
+2. Key interests or knowledge areas (if mentioned in the profile).
+3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally).
+4. Explicitly state it should *not* break character or mention being an AI.
+5. Incorporate age-appropriateness if the profile suggests a specific age group.
+Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
+        {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'.
+Context for simulation: {context}
+Character Profile:
+{enhanced_profile}
+Generate the system prompt based *only* on the profile and context provided."""}
     ]
     try:
+        # Use torch.amp.autocast instead of torch.cuda.amp.autocast
+        with torch.amp.autocast('cuda', dtype=torch.bfloat16):
+            outputs = model(prompt, max_new_tokens=300, do_sample=True, temperature=0.6) # Shorter, less creative prompt
+        # Parse the output
+        parsed_output = parse_llm_output(outputs, prompt)
+        print("System prompt generated.")
+        # Return parsed output or fallback
+        return parsed_output if parsed_output else fallback_prompt
     except Exception as e:
+        error_msg = f"Error generating system prompt: {str(e)}"
+        print(error_msg)
+        # Fallback to a basic system prompt in case of error
+        return fallback_prompt
+# @GPU(memory=40) # Decorator might be specific to HF Spaces
 def generate_response(model, messages):
+    """Generate a response using the LLM."""
+    print("Generating response...")
+    if model is None:
+        raise ValueError("Model is not loaded.")
+    if not messages:
+        return "Error: No message history provided."
     try:
+        # Use torch.amp.autocast instead of torch.cuda.amp.autocast
+        with torch.amp.autocast('cuda', dtype=torch.bfloat16):
             outputs = model(
                 messages,
+                max_new_tokens=512, # Reasonable length for chat
                 do_sample=True,
                 top_p=0.9,
                 temperature=0.7,
+                use_cache=True,
+                pad_token_id=model.tokenizer.eos_token_id # Important for stopping generation
             )
+        # Parse the output - expecting only the assistant's new reply
+        parsed_output = parse_llm_output(outputs, messages)
+        print("Response generated.")
+        return parsed_output if parsed_output else "..." # Return ellipsis if parsing failed
     except Exception as e:
+        error_msg = f"Error during response generation: {str(e)}"
+        print(error_msg)
+        return f"Sorry, I encountered an error: {str(e)}"
+# --- Persona Chat Class ---
 class PersonaChat:
     def __init__(self):
         self.model = None
         self.persona_context = ""
         self.messages = []
         self.enhanced_profile = ""
+        self.model_loaded = False # Flag to track loading status
+    # @GPU(memory=40) # Apply decorator here if needed by Gradio for resource allocation on method call
     def load_model_if_needed(self):
+        """Loads the model if it hasn't been loaded successfully."""
+        if not self.model_loaded:
+            print("Model not loaded yet. Attempting to load...")
+            # Use the global load_model function
             self.model = load_model()
+            if self.model is None:
+                # Raise error if loading failed, caught by calling methods
+                raise RuntimeError("Failed to load the language model. Cannot proceed.")
+            else:
+                self.model_loaded = True
+                print("Model loaded successfully within PersonaChat instance.")
+        else:
+            print("Model already loaded.")
+    # This method orchestrates steps that need the GPU, so decorating it might be relevant for Gradio/Spaces
+    # @GPU(memory=40)
     def set_persona(self, name, context=""):
+        """Orchestrates persona creation: search, enhance, generate prompt."""
+        try:
+            # Ensure model is loaded before proceeding
+            self.load_model_if_needed() # This will raise RuntimeError if it fails
+            self.persona_name = name
+            self.persona_context = context
+            self.messages = [] # Reset message history for new persona
+            self.enhanced_profile = "" # Reset profile
+            status = f"Searching for information about {name}..."
+            yield status, "", [{"role": "system", "content": "Initializing persona creation..."}]
+            search_results = search_person(name, context)
+            # Check if search returned an error string
+            if isinstance(search_results, str) and search_results.startswith("Error"):
+                error_msg = f"Failed to set persona: {search_results}"
+                yield error_msg, "", [{"role": "system", "content": error_msg}]
+                return # Stop processing if search failed
+            bio_text = extract_text_from_search_results(search_results)
+            if bio_text.startswith("Could not extract text"):
+                 yield f"Warning: {bio_text}", "", [{"role": "system", "content": bio_text}]
+                 # Continue with potentially limited info
+            status = f"Creating enhanced profile for {name}..."
+            yield status, "", [{"role": "system", "content": status}]
+            # Pass the loaded model to the generation function
+            self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
+            # Check if enhancement failed
+            if self.enhanced_profile.startswith("Error enhancing profile"):
+                 yield f"Warning: Could not enhance profile. Using basic info.", "", [{"role": "system", "content": self.enhanced_profile}]
+                 # Use the basic bio text for the system prompt instead
+                 profile_for_prompt = bio_text
+            else:
+                 profile_for_prompt = self.enhanced_profile
+            status = f"Generating optimal system prompt for {name}..."
+            yield status, self.enhanced_profile, [{"role": "system", "content": status}] # Show profile while generating prompt
+            # Pass the loaded model
+            self.system_prompt = generate_system_prompt_with_llm(self.model, name, profile_for_prompt, context)
+            # Set the initial system message for the chat history
+            self.messages = [{"role": "system", "content": self.system_prompt}]
+            yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.messages
+        except RuntimeError as e:
+            # Catch model loading errors from load_model_if_needed
+            error_msg = f"Critical Error: {str(e)}"
+            print(error_msg)
+            yield error_msg, "", [{"role": "system", "content": error_msg}]
+        except Exception as e:
+            # Catch other unexpected errors during persona setting
+            error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
+            print(error_msg)
+            yield error_msg, self.enhanced_profile, [{"role": "system", "content": error_msg}]
+    # This method uses the GPU for inference, so decorating might be relevant
+    # @GPU(memory=40)
     def chat(self, user_message):
+        """Processes a user message and returns the AI's response."""
         try:
+            # Ensure model is loaded
+            self.load_model_if_needed() # Raises RuntimeError if model failed to load initially
+            if not self.messages:
+                 # This case should ideally be prevented by UI logic
+                 # but handle it defensively.
+                 print("Error: Chat called before persona was set.")
+                 return "Please set a persona first using the controls above."
+            print(f"User message: {user_message}")
+            # Append user message (ensure correct format)
+            formatted_message = {"role": "user", "content": user_message}
             self.messages.append(formatted_message)
+            # Generate response using the loaded model
             response = generate_response(self.model, self.messages)
+            # Append assistant response
             assistant_message = {"role": "assistant", "content": response}
             self.messages.append(assistant_message)
+            print(f"Assistant response: {response}")
             return response
+        except RuntimeError as e:
+            # Catch model loading errors
+            error_msg = f"Critical Error: {str(e)}. Cannot generate response."
+            print(error_msg)
+            return error_msg
         except Exception as e:
+            # Catch errors during generation itself
             error_msg = f"Error generating response: {str(e)}"
             print(error_msg)
+            # Append error message as assistant response? Or just return error?
+            # Let's return the error string directly.
+            # We might want to avoid adding the error to self.messages history
             return error_msg
+# --- Gradio Interface ---
 def create_interface():
+    # Instantiate the chat handler class ONCE
     persona_chat = PersonaChat()
+    # Custom CSS (minor adjustments possible)
     css = """
+    .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
+    .main-container { max-width: 1200px; margin: auto; padding: 0; }
+    .header { background: linear-gradient(90deg, #2c3e50, #4ca1af); color: white; padding: 20px; border-radius: 10px 10px 0 0; margin-bottom: 20px; text-align: center; }
+    .setup-section { background-color: #f9f9f9; border-radius: 10px; padding: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 20px; }
+    .chat-section { background-color: white; border-radius: 10px; padding: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); }
+    .status-bar { background: #e9ecef; padding: 10px 15px; border-radius: 5px; margin: 15px 0; font-weight: 500; border: 1px solid #ced4da; }
+    .chat-container { border: 1px solid #eaeaea; border-radius: 10px; height: 500px !important; overflow-y: auto; background-color: #ffffff; padding: 10px; }
+    .message-input { margin-top: 10px; }
+    .send-button { background-color: #2c3e50 !important; color: white !important; }
+    .persona-button { background-color: #4ca1af !important; color: white !important; }
+    .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
+    .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
+    .user-message > .message { background-color: #e1f5fe; border-radius: 15px 15px 0 15px !important; padding: 10px 15px !important; margin: 8px 0 8px auto !important; max-width: 80%; float: right; clear: both; color: #333; }
+    .bot-message > .message { background-color: #f1f3f5; border-radius: 15px 15px 15px 0 !important; padding: 10px 15px !important; margin: 8px auto 8px 0 !important; max-width: 80%; float: left; clear: both; color: #333; }
+    .message p { margin: 0 !important; padding: 0 !important; } /* Prevent extra margins in chatbot messages */
+    .typing-indicator { color: #aaa; font-style: italic; }
     """
     with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
         with gr.Row(elem_classes="main-container"):
             with gr.Column():
                 # Header
                 with gr.Column(elem_classes="header"):
                     gr.Markdown("# AI Persona Simulator")
+                    gr.Markdown("Create and interact with AI-driven character simulations")
                 # Setup Section
                 with gr.Column(elem_classes="setup-section"):
+                    gr.Markdown("### 1. Create Your Persona")
+                    gr.Markdown("Enter a name and any defining context (e.g., age, job, key traits, situation). The AI will search for info and build a profile.")
                     with gr.Row():
                         name_input = gr.Textbox(
                             label="Character Name",
+                            placeholder="e.g., Sherlock Holmes, Erenalp, A curious 7th grader",
+                            elem_id="name_input"
                         )
                         context_input = gr.Textbox(
+                            label="Character Context / Description",
+                            placeholder="e.g., Living in 221B Baker Street, London. OR 7th grade, loves math and video games, has a pet cat named Luna. OR A spaceship captain exploring Alpha Centauri.",
                             lines=2,
+                            elem_id="context_input"
                         )
+                    set_persona_button = gr.Button(
+                        "Create Persona & Start Chat",
+                        variant="primary",
+                        elem_classes="persona-button"
+                    )
                     status_output = gr.Textbox(
+                        label="Status",
+                        value="Enter details above and click 'Create Persona'.",
                         interactive=False,
                         elem_classes="status-bar"
                     )
+                    with gr.Accordion("View Generated System Prompt", open=False):
                         system_prompt_display = gr.TextArea(
+                            label="System Prompt (Instructions for the AI)",
                             interactive=False,
                             lines=10,
+                             elem_classes="system-prompt-display" # Use dedicated class
                         )
+                        enhanced_profile_display = gr.TextArea(
+                            label="Enhanced Profile (Generated by AI)",
+                            interactive=False,
+                            lines=10,
+                             elem_classes="system-prompt-display" # Reuse style or create new
+                        )
                 # Chat Section
                 with gr.Column(elem_classes="chat-section"):
+                    gr.Markdown("### 2. Chat with Your Character")
                     character_name_display = gr.Markdown(
+                        value="*No persona created yet*",
+                        elem_id="character-name-display"
                     )
                     chatbot = gr.Chatbot(
+                        label="Conversation",
                         height=450,
                         elem_classes="chat-container",
+                        bubble_full_width=False, # Makes bubbles fit content
+                        avatar_images=(None, "🤖") # User avatar default, bot uses emoji
                     )
                     with gr.Row():
                         msg_input = gr.Textbox(
                             label="Your message",
+                            placeholder="Type your message here and press Enter...",
+                            elem_classes="message-input",
+                            scale=4 # Make input wider
                         )
                         send_button = gr.Button(
+                            "Send",
                             variant="primary",
+                            elem_classes="send-button",
+                            scale=1
                         )
                 # Footer
                 with gr.Column(elem_classes="footer"):
+                    gr.Markdown(f"Powered by {MODEL_ID}")
+        # --- Event Handlers ---
+        # Generator function for smoother UI updates during persona creation
+        def set_persona_flow(name, context):
+            if not name:
+                yield "Status: Please enter a character name.", "", "", "*No persona created yet*", []
+                return
+            initial_status = f"Creating persona for '{name}'..."
+            initial_character_display = f"### Preparing to chat with {name}..."
+            initial_prompt = "System prompt will appear here..."
+            initial_profile = "Enhanced profile will appear here..."
+            initial_history = [{"role": "system", "content": "Initializing..."}] # Start with system message
+            # Initial yield to show activity starting
+            yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history
+            # Use the PersonaChat instance's method, which is a generator
+            final_status, final_prompt, final_profile = "Error", "", ""
+            final_history = initial_history
             try:
+                # Iterate through the status updates yielded by set_persona
+                for status, prompt, profile, history_update in persona_chat.set_persona(name, context):
+                    final_status, final_prompt, final_profile = status, prompt, profile
+                    # Use the latest history from the generator if available
+                    if isinstance(history_update, list):
+                        final_history = history_update
+                    # Determine character display based on status
+                    character_display = f"### Preparing chat with {name}..."
+                    if "Ready to chat" in status:
+                        character_display = f"### Chatting with {name}"
+                    elif "Error" in status:
+                         character_display = f"### Error creating {name}"
+                    yield status, final_prompt, final_profile, character_display, final_history
+                    # Add a small delay to ensure UI updates between yields if needed
+                    # time.sleep(0.1)
             except Exception as e:
+                 # Catch any error not handled within set_persona generator
+                 error_msg = f"Failed to set persona due to an unexpected error: {str(e)}"
+                 print(error_msg)
+                 yield error_msg, "", "", f"### Error creating {name}", [{"role": "system", "content": error_msg}]
+        # Function to handle sending messages
+        def send_message_flow(message, history):
+            if not message.strip():
+                # Ignore empty messages
+                return "", history # Return unchanged history and clear input box
+            # Check if persona is ready (check for system prompt in history)
+            if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
+                # Persona not set or history is corrupted
+                history.append({"role": "user", "content": message})
+                history.append({"role": "assistant", "content": "Error: Please create a valid persona first using the form above."})
+                return "", history # Clear input box, update history with error
+            # Append user message to chatbot UI immediately
+            history.append({"role": "user", "content": message})
+            # Add typing indicator for the bot
+            history.append({"role": "assistant", "content": None}) # Use None for Gradio typing indicator
+            # Yield the updated history to show user message and typing indicator
+            yield "", history
+            # Call the chat method (which handles model loading and generation)
+            response = persona_chat.chat(message) # This now uses the internal self.messages
+            # Update the last message (the typing indicator) with the actual response
+            history[-1]["content"] = response
+            # Yield the final history with the bot's response
+            # The input box is cleared, history is updated
+            yield "", history
+        # Connect Gradio components to functions
+        # Use the generator for persona setting
         set_persona_button.click(
+            set_persona_flow,
             inputs=[name_input, context_input],
+            outputs=[status_output, system_prompt_display, enhanced_profile_display, character_name_display, chatbot]
         )
+        # Use the generator for sending messages
         send_button.click(
+            send_message_flow,
             inputs=[msg_input, chatbot],
             outputs=[msg_input, chatbot]
         )
+        # Allow submitting message with Enter key
         msg_input.submit(
+            send_message_flow,
             inputs=[msg_input, chatbot],
             outputs=[msg_input, chatbot]
         )
     return interface
+# --- Main Execution ---
 if __name__ == "__main__":
+    print("Starting Gradio application...")
+    # Ensure necessary packages are installed:
+    # pip install gradio transformers torch duckduckgo_search huggingface_hub accelerate bitsandbytes sentencepiece
     demo = create_interface()
+    demo.queue().launch( # Add queue for better handling of multiple users/requests
+        server_name="0.0.0.0", # Allows access from other devices on the network
         server_port=7860,
+        share=False, # Set to True to create a temporary public link (requires internet)
+        show_error=True, # Show errors in the browser console
+        debug=True # Provides more detailed logs in the terminal
     )