# --- Required Installs ---
# Ensure these are in your requirements.txt for Hugging Face Spaces
# gradio
# transformers
# torch
# duckduckgo_search
# huggingface_hub
# accelerate
# bitsandbytes
# sentencepiece
# spaces <--- Provided by the Spaces environment

import gradio as gr
import transformers
import torch
from transformers import pipeline, BitsAndBytesConfig
from duckduckgo_search import DDGS
import re
import time
from huggingface_hub import HfApi
from spaces import GPU # Directly import GPU from spaces - Crucial for HF Spaces

# --- Constants and Configuration ---
MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
MAX_GPU_MEMORY = "40GiB"  # A100 memory allocation

# --- Model Loading ---
@GPU(memory=40) # ****** THIS DECORATOR IS ESSENTIAL FOR SPACES STARTUP ******
def load_model():
    """Load the LLM model optimized for A100 GPU using 4-bit quantization."""
    print(f"Attempting to load model: {MODEL_ID} with 4-bit quantization")
    try:
        # Configure quantization for 4-bit
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",            # NF4 is often recommended
            bnb_4bit_compute_dtype=torch.bfloat16, # Use bfloat16 for compute
            bnb_4bit_use_double_quant=True,       # Use double quantization to save more memory
        )

        # Device map will handle placing layers, relying on accelerate
        # No need to explicitly set max_memory when using device_map="auto" typically
        pipe = pipeline(
            "text-generation",
            model=MODEL_ID,
            # Note: torch_dtype is sometimes ignored when quantization_config is used,
            # but specifying compute_dtype in BitsAndBytesConfig is key.
            # Keep torch_dtype=torch.bfloat16 here for consistency if needed by other parts.
            torch_dtype=torch.bfloat16,
            device_map="auto", # Let accelerate handle layer placement
            model_kwargs={
                "quantization_config": quantization_config,
                "use_cache": True,
                # "trust_remote_code=True" # Add if model requires it (check model card)
            }
        )
        print(f"Model {MODEL_ID} loaded successfully on device: {pipe.device} (using 4-bit quantization)")
        return pipe
    except Exception as e:
        print(f"FATAL Error loading model '{MODEL_ID}' (check memory/config): {e}")
        # Raise the error to ensure it's visible in Spaces logs
        raise e

# --- Web Search ---
# (Keep search_person, create_synthetic_profile, extract_text_from_search_results as before)
def search_person(name, context=""):
    """Search for information about a person using DuckDuckGo."""
    print(f"Searching for: {name} with context: {context}")
    results = []
    search_terms = []

    if context:
        search_terms.append(f"{name} {context}")
        grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
        if grade_match:
            grade = grade_match.group(1)
            search_terms.append(f"{name} student {grade} grade")

    search_terms.append(f"{name}")
    search_terms.append(f"{name} biography")
    search_terms.append(f"{name} interests")
    search_terms.append(f"{name} personality")

    search_terms = list(dict.fromkeys(search_terms))
    print(f"Using search terms: {search_terms}")

    try:
        with DDGS() as ddgs:
            for term in search_terms:
                print(f"Searching DDG for: '{term}'")
                search_results = list(ddgs.text(term, max_results=2))
                results.extend(search_results)
                time.sleep(0.2)
    except Exception as e:
        error_msg = f"Error during DuckDuckGo search: {str(e)}"
        print(error_msg)
        return error_msg

    if not results:
        print(f"No search results found for {name}. Creating synthetic profile.")
        return create_synthetic_profile(name, context)

    print(f"Found {len(results)} potential search results.")
    return results

def create_synthetic_profile(name, context):
    """Create a synthetic profile when search returns no results."""
    profile = {
        "title": f"Synthetic Profile for {name}",
        "href": "",
        "body": f"{name} is a person described with the context: '{context}'. "
    }
    if "grade" in context.lower():
        grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
        if grade_match:
            try:
                grade = int(grade_match.group(1))
                age = 5 + grade
                profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
                profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
            except ValueError:
                 profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
    profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
    return [profile]

def extract_text_from_search_results(search_results):
    """Extract relevant text from search results."""
    if isinstance(search_results, str):
        return f"Could not extract text due to search error: {search_results}"

    combined_text = ""
    seen_bodies = set()
    count = 0
    max_results_to_process = 5

    for result in search_results:
        if count >= max_results_to_process:
            break
        if isinstance(result, dict) and 'body' in result and result['body']:
            body = result['body'].strip()
            if body not in seen_bodies:
                combined_text += body + "\n\n"
                seen_bodies.add(body)
                count += 1

    if not combined_text:
        return "No relevant text found in search results."

    combined_text = re.sub(r'\s+', ' ', combined_text).strip()
    max_length = 2000
    return combined_text[:max_length] + "..." if len(combined_text) > max_length else combined_text

# --- LLM Generation Functions ---

def parse_llm_output(full_output, input_prompt_list):
    """Attempts to parse only the newly generated text from the LLM output."""
    if isinstance(full_output, list) and len(full_output) > 0:
        if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
            generated_text = full_output[0]["generated_text"]
        else: return str(full_output)
    elif isinstance(full_output, str): generated_text = full_output
    else: return str(full_output)

    last_input_content = ""
    if isinstance(input_prompt_list, list) and input_prompt_list:
        # Find the last message with 'user' or 'system' role potentially?
        # Let's stick to finding the last message content for simplicity
        last_input_content = input_prompt_list[-1].get("content", "")

    if last_input_content:
        last_occurrence_index = generated_text.rfind(last_input_content)
        if last_occurrence_index != -1:
            potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
            if potential_response:
                # Basic cleanup
                potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
                potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
                # Check if the response is just whitespace or seems empty after cleanup
                if potential_response:
                    return potential_response

    # Fallback or if model correctly outputted only the response
    cleaned_text = generated_text
    if isinstance(input_prompt_list, list) and input_prompt_list:
         first_prompt_content = input_prompt_list[0].get("content", "")
         if first_prompt_content and cleaned_text.startswith(first_prompt_content):
              # Be careful not to strip if the response happens to start the same way
              pass # Let's rely more on the end-stripping heuristic above

    # General cleanup
    cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
    cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()

    # If after all this, it's empty, maybe return original generated_text?
    # Or log a warning and return the cleaned version.
    if not cleaned_text and generated_text:
         print("Warning: Parsing resulted in empty string, returning original generation.")
         return generated_text # Return original if cleaning failed

    # If input prompt wasn't found, assume the model outputted only the response (ideal case)
    # or the whole thing (fallback case). The cleaning helps for the latter.
    if last_input_content and last_occurrence_index == -1:
        print("Warning: Could not find last input prompt in LLM output. Returning cleaned full output.")

    return cleaned_text

@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
def generate_enhanced_persona(model, name, bio_text, context=""):
    """Use the LLM to enhance the persona profile."""
    print(f"Generating enhanced persona for {name}...")
    if model is None: raise ValueError("Model is not loaded.")

    enhancement_prompt = [
        {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits. Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
        {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile. Context: {context} Information Found:\n{bio_text}\n\nCreate the profile based *only* on the text above."""}
    ]

    try:
        with torch.amp.autocast('cuda', dtype=torch.bfloat16):
            outputs = model(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
        parsed_output = parse_llm_output(outputs, enhancement_prompt)
        print("Enhanced persona generated.")
        return parsed_output if parsed_output else f"Could not generate profile based on:\n{bio_text}"
    except Exception as e:
        error_msg = f"Error generating enhanced persona: {str(e)}"
        print(error_msg)
        return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"

@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
    """Generate an optimized system prompt for the persona."""
    print(f"Generating system prompt for {name}...")
    if model is None: raise ValueError("Model is not loaded.")

    fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:\n{enhanced_profile}\nAdditional context for the simulation: {context}\n---\nMaintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""

    prompt = [
        {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile. The system prompt should instruct the LLM to embody the character, covering: 1. Core personality, attitude, and speaking style (based on the profile). 2. Key interests or knowledge areas (if mentioned in the profile). 3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally). 4. Explicitly state it should *not* break character or mention being an AI. 5. Incorporate age-appropriateness if the profile suggests a specific age group. Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
        {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'. Context for simulation: {context} Character Profile:\n{enhanced_profile}\n\nGenerate the system prompt based *only* on the profile and context provided."""}
    ]

    try:
        with torch.amp.autocast('cuda', dtype=torch.bfloat16):
            outputs = model(prompt, max_new_tokens=300, do_sample=True, temperature=0.6)
        parsed_output = parse_llm_output(outputs, prompt)
        print("System prompt generated.")
        return parsed_output if parsed_output else fallback_prompt
    except Exception as e:
        error_msg = f"Error generating system prompt: {str(e)}"
        print(error_msg)
        return fallback_prompt

@GPU(memory=40) # Decorator needed for Spaces resource allocation during calls
def generate_response(model, messages):
    """Generate a response using the LLM."""
    print("Generating response...")
    if model is None: raise ValueError("Model is not loaded.")
    if not messages: return "Error: No message history provided."

    try:
        with torch.amp.autocast('cuda', dtype=torch.bfloat16):
            # Ensure pad_token_id is set correctly if needed, especially for batching or specific models
            outputs = model(
                messages,
                max_new_tokens=512,
                do_sample=True,
                top_p=0.9,
                temperature=0.7,
                use_cache=True,
                # Check if EOS token is needed for this model/pipeline setup
                pad_token_id=model.tokenizer.eos_token_id if model.tokenizer.eos_token_id else None
            )
        parsed_output = parse_llm_output(outputs, messages)
        print("Response generated.")
        return parsed_output if parsed_output else "..."
    except Exception as e:
        error_msg = f"Error during response generation: {str(e)}"
        print(error_msg)
        # Consider if the specific error should be shown to the user
        return f"Sorry, I encountered an error trying to respond."


# --- Persona Chat Class ---
class PersonaChat:
    def __init__(self):
        self.model = None
        self.system_prompt = "You are a helpful assistant."
        self.persona_name = "Assistant"
        self.persona_context = ""
        self.messages = []
        self.enhanced_profile = ""
        self.model_loaded = False

    # No @GPU decorator needed here typically, as it calls functions that ARE decorated
    def load_model_if_needed(self):
        """Loads the model if it hasn't been loaded successfully."""
        if not self.model_loaded or self.model is None: # Check self.model too
            print("Model not loaded or instance lost. Attempting to load...")
            # Call the @GPU decorated load_model function
            self.model = load_model() # This function IS decorated
            if self.model is None:
                # load_model now raises error, but double-check here
                raise RuntimeError("Failed to load the language model. Cannot proceed.")
            else:
                self.model_loaded = True
                print("Model loaded successfully within PersonaChat instance.")
        # else: print("Model already loaded.") # Reduce log noise

    # No @GPU decorator needed here typically
    def set_persona(self, name, context=""):
        """Orchestrates persona creation: search, enhance, generate prompt."""
        # This method calls other functions that have @GPU decorators
        try:
            self.load_model_if_needed() # Ensures model is ready

            self.persona_name = name
            self.persona_context = context
            self.messages = []
            self.enhanced_profile = ""

            status = f"Searching for information about {name}..."
            yield status, "", "", [{"role": "system", "content": "Initializing persona creation..."}] # Added empty profile yield

            search_results = search_person(name, context)
            if isinstance(search_results, str) and search_results.startswith("Error"):
                error_msg = f"Failed to set persona: {search_results}"
                yield error_msg, "", "", [{"role": "system", "content": error_msg}]
                return

            bio_text = extract_text_from_search_results(search_results)
            if bio_text.startswith("Could not extract text"):
                 yield f"Warning: {bio_text}", "", "", [{"role": "system", "content": bio_text}]

            status = f"Creating enhanced profile for {name}..."
            yield status, "", bio_text, [{"role": "system", "content": status}] # Show basic bio while enhancing

            # Call the @GPU decorated function
            self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
            profile_for_prompt = self.enhanced_profile
            if self.enhanced_profile.startswith("Error enhancing profile"):
                 yield f"Warning: Could not enhance profile. Using basic info.", "", self.enhanced_profile, [{"role": "system", "content": self.enhanced_profile}]
                 profile_for_prompt = bio_text # Fallback

            status = f"Generating optimal system prompt for {name}..."
            # Yield the enhanced profile while generating prompt
            yield status, self.enhanced_profile, self.enhanced_profile, [{"role": "system", "content": status}]

            # Call the @GPU decorated function
            self.system_prompt = generate_system_prompt_with_llm(self.model, name, profile_for_prompt, context)
            self.messages = [{"role": "system", "content": self.system_prompt}]

            yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.enhanced_profile, self.messages

        except RuntimeError as e:
            error_msg = f"Critical Error: {str(e)}"
            print(error_msg)
            yield error_msg, "", "", [{"role": "system", "content": error_msg}]
        except Exception as e:
            error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
            print(error_msg)
            # Attempt to yield current state even on error
            yield error_msg, self.system_prompt, self.enhanced_profile, [{"role": "system", "content": error_msg}]

    # No @GPU decorator needed here typically
    def chat(self, user_message):
        """Processes a user message and returns the AI's response."""
        # This method calls generate_response which has the @GPU decorator
        try:
            self.load_model_if_needed()

            if not self.messages:
                 print("Error: Chat called before persona was set.")
                 return "Please set a persona first using the controls above."

            print(f"User message: {user_message}")
            formatted_message = {"role": "user", "content": user_message}
            # Keep internal history, pass copy to model if needed, but pipeline usually handles state
            self.messages.append(formatted_message)

            # Call the @GPU decorated function
            response = generate_response(self.model, self.messages)

            # Append assistant response IF generation succeeded
            if not response.startswith("Sorry, I encountered an error"):
                 assistant_message = {"role": "assistant", "content": response}
                 self.messages.append(assistant_message)
                 print(f"Assistant response: {response}")
            else:
                 print(f"Assistant error response: {response}")
                 # Do not add the error message itself to the persistent history
                 # Let the UI show the error, but don't make the bot repeat it next turn.

            return response

        except RuntimeError as e:
            error_msg = f"Critical Error: {str(e)}. Cannot generate response."
            print(error_msg)
            return error_msg
        except Exception as e:
            error_msg = f"Error generating response: {str(e)}"
            print(error_msg)
            return f"Sorry, I encountered an error: {str(e)}"


# --- Gradio Interface ---
def create_interface():
    persona_chat = PersonaChat() # Instantiate the handler class

    css = """
    .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
    .main-container { max-width: 1200px; margin: auto; padding: 0; }
    .header { background: linear-gradient(90deg, #2c3e50, #4ca1af); color: white; padding: 20px; border-radius: 10px 10px 0 0; margin-bottom: 20px; text-align: center; }
    .setup-section { background-color: #f9f9f9; border-radius: 10px; padding: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 20px; }
    .chat-section { background-color: white; border-radius: 10px; padding: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); }
    .status-bar { background: #e9ecef; padding: 10px 15px; border-radius: 5px; margin: 15px 0; font-weight: 500; border: 1px solid #ced4da; }
    .chat-container { border: 1px solid #eaeaea; border-radius: 10px; height: 500px !important; overflow-y: auto; background-color: #ffffff; padding: 10px; }
    .message-input { margin-top: 10px; }
    .send-button { background-color: #2c3e50 !important; color: white !important; }
    .persona-button { background-color: #4ca1af !important; color: white !important; }
    .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
    .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
    /* Use default chatbot message styling provided by type='messages' */
    .typing-indicator { color: #aaa; font-style: italic; }
    """

    with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
        with gr.Row(elem_classes="main-container"):
            with gr.Column():
                with gr.Column(elem_classes="header"):
                    gr.Markdown("# AI Persona Simulator")
                    gr.Markdown("Create and interact with AI-driven character simulations")

                with gr.Column(elem_classes="setup-section"):
                    gr.Markdown("### 1. Create Your Persona")
                    gr.Markdown("Enter a name and context. The AI will search, build a profile, and prepare for chat.")
                    with gr.Row():
                        name_input = gr.Textbox(label="Character Name", placeholder="e.g., Sherlock Holmes, Erenalp, A curious 7th grader", elem_id="name_input")
                        context_input = gr.Textbox(label="Character Context / Description", placeholder="e.g., Living in 221B Baker Street, London. OR 7th grade, loves math...", lines=2, elem_id="context_input")
                    set_persona_button = gr.Button("Create Persona & Start Chat", variant="primary", elem_classes="persona-button")
                    status_output = gr.Textbox(label="Status", value="Enter details above and click 'Create Persona'.", interactive=False, elem_classes="status-bar")
                    with gr.Accordion("View Generated Details", open=False):
                        enhanced_profile_display = gr.TextArea(label="Enhanced Profile (Generated by AI)", interactive=False, lines=10, elem_classes="system-prompt-display")
                        system_prompt_display = gr.TextArea(label="System Prompt (Instructions for the AI)", interactive=False, lines=10, elem_classes="system-prompt-display")


                with gr.Column(elem_classes="chat-section"):
                    gr.Markdown("### 2. Chat with Your Character")
                    character_name_display = gr.Markdown(value="*No persona created yet*", elem_id="character-name-display")
                    # ***** FIX GRADIO WARNINGS *****
                    chatbot = gr.Chatbot(
                        label="Conversation",
                        height=450,
                        elem_classes="chat-container",
                        # bubble_full_width=False, # Deprecated
                        avatar_images=(None, "🤖"), # User default, Bot emoji
                        type="messages" # ***** USE RECOMMENDED TYPE *****
                    )
                    with gr.Row():
                        msg_input = gr.Textbox(label="Your message", placeholder="Type your message here and press Enter...", elem_classes="message-input", scale=4)
                        send_button = gr.Button("Send", variant="primary", elem_classes="send-button", scale=1)

                with gr.Column(elem_classes="footer"):
                    gr.Markdown(f"Powered by {MODEL_ID}")

        # --- Event Handlers ---
        def set_persona_flow(name, context):
            if not name:
                yield "Status: Please enter a character name.", "", "", "*No persona created yet*", []
                return

            initial_status = f"Creating persona for '{name}'..."
            initial_character_display = f"### Preparing to chat with {name}..."
            initial_prompt = "System prompt will appear here..."
            initial_profile = "Enhanced profile will appear here..."
            # Start with empty history for messages type
            initial_history = []

            yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history

            final_status, final_prompt, final_profile = "Error", "", ""
            final_history = initial_history
            try:
                # Use the PersonaChat instance's method generator
                # Expected yield order: status, system_prompt, enhanced_profile, messages_list
                for status_update, prompt_update, profile_update, history_update in persona_chat.set_persona(name, context):
                    final_status, final_prompt, final_profile = status_update, prompt_update, profile_update
                    if isinstance(history_update, list): final_history = history_update

                    character_display = f"### Preparing chat with {name}..."
                    if "Ready to chat" in status_update:
                        character_display = f"### Chatting with {name}"
                    elif "Error" in status_update:
                         character_display = f"### Error creating {name}"

                    yield status_update, final_prompt, final_profile, character_display, final_history
                    time.sleep(0.1) # Small delay for UI update visibility

            except Exception as e:
                 error_msg = f"Failed to set persona (interface error): {str(e)}"
                 print(error_msg)
                 # Try to yield error state
                 yield error_msg, final_prompt, final_profile, f"### Error creating {name}", final_history


        def send_message_flow(message, history):
             # Ensure history is a list (for messages type)
            if history is None: history = []
            if not message.strip():
                return "", history

            # Check if persona is ready (looks for system message in internal state)
            if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
                history.append({"role": "user", "content": message})
                history.append({"role": "assistant", "content": "Error: Please create a valid persona first."})
                return "", history

            # Append user message to UI history
            history.append({"role": "user", "content": message})
            # Append placeholder for bot response (typing indicator)
            history.append({"role": "assistant", "content": None}) # Use None for typing indicator with type='messages'

            yield "", history # Update UI to show user msg + typing

            # Call chat method (uses internal state, returns string response)
            response_text = persona_chat.chat(message)

            # Update the placeholder in UI history with the actual response
            history[-1]["content"] = response_text

            yield "", history # Update UI with final response


        set_persona_button.click(
            set_persona_flow,
            inputs=[name_input, context_input],
            outputs=[status_output, system_prompt_display, enhanced_profile_display, character_name_display, chatbot]
        )
        send_button.click(
            send_message_flow,
            inputs=[msg_input, chatbot],
            outputs=[msg_input, chatbot]
        )
        msg_input.submit(
            send_message_flow,
            inputs=[msg_input, chatbot],
            outputs=[msg_input, chatbot]
        )

    return interface

# --- Main Execution ---
if __name__ == "__main__":
    print("Starting Gradio application for Hugging Face Spaces...")
    demo = create_interface()
    demo.queue().launch( # queue() is recommended for Spaces
        server_name="0.0.0.0",
        server_port=7860,
        # share=False is default and usually needed for Spaces deployment structure
        show_error=True, # Good for debugging in Spaces logs
        debug=True # More verbose logging
    )