ErenalpCet commited on
Commit
bdf9a64
·
verified ·
1 Parent(s): a84a428

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +536 -360
app.py CHANGED
@@ -1,183 +1,341 @@
 
 
 
1
  import gradio as gr
2
  import transformers
3
  import torch
4
- from transformers import pipeline
5
  from duckduckgo_search import DDGS
6
  import re
7
  import time
8
  from huggingface_hub import HfApi
9
- from spaces import GPU
10
 
11
  # --- Constants and Configuration ---
12
  MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
13
  MAX_GPU_MEMORY = "40GiB" # A100 memory allocation
14
 
15
- @GPU(memory=40)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def load_model():
17
  """Load the LLM model optimized for A100 GPU."""
 
18
  try:
 
 
 
19
  pipe = pipeline(
20
  "text-generation",
21
  model=MODEL_ID,
22
  torch_dtype=torch.bfloat16,
23
  device_map="auto",
24
  model_kwargs={
25
- "load_in_8bit": True,
 
26
  "use_cache": True,
27
- "max_memory": {0: MAX_GPU_MEMORY}
 
 
28
  }
29
  )
 
30
  return pipe
31
  except Exception as e:
32
- print(f"Error loading model: {e}")
33
- return None
 
 
34
 
 
35
  def search_person(name, context=""):
36
  """Search for information about a person using DuckDuckGo."""
 
37
  results = []
38
  search_terms = []
39
-
40
- if "grade" in context.lower():
41
- grade_match = re.search(r'(\d+)(st|nd|rd|th)?\s+grade', context.lower())
 
 
 
42
  if grade_match:
43
  grade = grade_match.group(1)
44
  search_terms.append(f"{name} student {grade} grade")
45
-
46
- search_terms.extend([
47
- f"{name} {context}" if context else name,
48
- f"{name} interests",
49
- f"{name} personality"
50
- ])
51
-
 
 
 
 
52
  try:
53
  with DDGS() as ddgs:
54
  for term in search_terms:
55
- search_results = list(ddgs.text(term, max_results=3))
 
 
56
  results.extend(search_results)
 
57
  except Exception as e:
58
- return f"Error during search: {str(e)}"
59
-
60
- if not results and context:
 
 
 
61
  return create_synthetic_profile(name, context)
62
-
 
63
  return results
64
 
65
  def create_synthetic_profile(name, context):
66
  """Create a synthetic profile when search returns no results."""
67
  profile = {
68
- "body": f"{name} is a person described as: {context}."
 
 
69
  }
70
-
 
71
  if "grade" in context.lower():
72
- grade_match = re.search(r'(\d+)(st|nd|rd|th)?\s+grade', context.lower())
73
  if grade_match:
74
- grade = grade_match.group(1)
75
- age = 5 + int(grade)
76
- profile["body"] += f" {name} is approximately {age} years old and in {grade}th grade."
77
- profile["body"] += f" Like most {grade}th graders, {name} is likely interested in friends, learning new things, and developing their own identity."
78
-
 
 
 
 
 
79
  return [profile]
80
 
81
  def extract_text_from_search_results(search_results):
82
  """Extract relevant text from search results."""
 
 
 
83
  combined_text = ""
 
 
 
 
84
  for result in search_results:
85
- if isinstance(result, dict) and 'body' in result:
86
- combined_text += result['body'] + "\n\n"
87
-
88
- combined_text = re.sub(r'\s+', ' ', combined_text)
89
- return combined_text
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- @GPU(memory=40)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def generate_enhanced_persona(model, name, bio_text, context=""):
93
- """Use the LLM to enhance the persona profile with GPU optimization."""
 
 
 
 
94
  enhancement_prompt = [
95
- {"role": "system", "content": """You are an expert AI character developer.
96
- Your task is to create a detailed character profile based on limited information.
97
- Output ONLY the enhanced profile with no additional explanations or formatting."""},
98
- {"role": "user", "content": f"""Here's some information I found about {name}:
99
 
 
 
 
 
100
  {bio_text}
101
 
102
- Additional context: {context}
103
-
104
- Based on this information, create a detailed, rich character profile for {name}.
105
- Include personality traits, speaking style, interests, and mannerisms.
106
- If this is a child, include age-appropriate details."""}
107
  ]
108
-
109
  try:
110
- with torch.cuda.amp.autocast():
111
- outputs = model(enhancement_prompt, max_new_tokens=1024, do_sample=True)
112
-
113
- if isinstance(outputs, list) and len(outputs) > 0:
114
- if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
115
- return outputs[0]["generated_text"]
116
- return bio_text
 
 
 
117
  except Exception as e:
118
- print(f"Error generating enhanced persona: {str(e)}")
119
- return bio_text
 
 
120
 
121
- @GPU(memory=40)
122
  def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
123
  """Generate an optimized system prompt for the persona."""
 
 
 
 
 
 
 
 
 
 
124
  prompt = [
125
- {"role": "system", "content": """You are an expert AI prompt engineer.
126
- Your task is to create an optimal system prompt for character simulation."""},
127
- {"role": "user", "content": f"""Create a system prompt for {name} based on:
128
 
129
- Profile: {enhanced_profile}
130
- Context: {context}
 
 
 
 
131
 
132
- The prompt should define the character's:
133
- 1. Personality and speaking style
134
- 2. Knowledge boundaries
135
- 3. Typical responses
136
- 4. Age-appropriate behavior if applicable"""}
 
 
137
  ]
138
-
139
  try:
140
- with torch.cuda.amp.autocast():
141
- outputs = model(prompt, max_new_tokens=1024, do_sample=True)
142
-
143
- if isinstance(outputs, list) and len(outputs) > 0:
144
- if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
145
- return outputs[0]["generated_text"]
146
-
147
- return f"""You are {name}. Use this information to respond:
148
- {enhanced_profile}
149
- {context}
150
- Always stay in character and respond naturally."""
151
-
152
  except Exception as e:
153
- print(f"Error generating system prompt: {str(e)}")
154
- return f"""You are {name}. Use this information to respond:
155
- {enhanced_profile}
156
- {context}
157
- Always stay in character and respond naturally."""
158
 
159
- @GPU(memory=40)
160
  def generate_response(model, messages):
161
- """Generate a response using the LLM with GPU optimization."""
 
 
 
 
 
 
162
  try:
163
- with torch.cuda.amp.autocast():
 
164
  outputs = model(
165
  messages,
166
- max_new_tokens=512,
167
  do_sample=True,
168
  top_p=0.9,
169
  temperature=0.7,
170
- use_cache=True
 
171
  )
172
-
173
- if isinstance(outputs, list) and len(outputs) > 0:
174
- if isinstance(outputs[0], dict) and "generated_text" in outputs[0]:
175
- return outputs[0]["generated_text"]
176
- return "I couldn't generate a proper response. Please try again."
 
177
  except Exception as e:
178
- print(f"Error generating response: {str(e)}")
179
- return f"Error: {str(e)}"
 
180
 
 
 
181
  class PersonaChat:
182
  def __init__(self):
183
  self.model = None
@@ -186,344 +344,362 @@ class PersonaChat:
186
  self.persona_context = ""
187
  self.messages = []
188
  self.enhanced_profile = ""
189
-
190
- @GPU(memory=40)
 
191
  def load_model_if_needed(self):
192
- if self.model is None:
 
 
 
193
  self.model = load_model()
194
-
 
 
 
 
 
 
 
 
 
 
195
  def set_persona(self, name, context=""):
196
- self.load_model_if_needed()
197
- self.persona_name = name
198
- self.persona_context = context
199
-
200
- status = f"Searching for information about {name}..."
201
- yield status, "", [{"role": "system", "content": "Starting persona creation..."}]
202
-
203
- search_results = search_person(name, context)
204
- if isinstance(search_results, str) and search_results.startswith("Error"):
205
- yield f"Error: {search_results}", "", [{"role": "system", "content": f"Error: {search_results}"}]
206
- return
207
-
208
- bio_text = extract_text_from_search_results(search_results)
209
-
210
- status = f"Creating enhanced profile for {name}..."
211
- yield status, "", [{"role": "system", "content": status}]
212
-
213
- self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
214
-
215
- status = f"Generating optimal system prompt for {name}..."
216
- yield status, "", [{"role": "system", "content": status}]
217
-
218
- self.system_prompt = generate_system_prompt_with_llm(self.model, name, self.enhanced_profile, context)
219
- self.messages = [{"role": "system", "content": self.system_prompt}]
220
-
221
- yield f"Persona set to {name}. Ready to chat!", self.system_prompt, self.messages
222
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  def chat(self, user_message):
224
- """Process a chat message and return the response."""
225
- self.load_model_if_needed()
226
-
227
  try:
228
- formatted_message = {"role": "user", "content": user_message} if isinstance(user_message, str) else user_message
 
 
 
 
 
 
 
 
 
 
 
229
  self.messages.append(formatted_message)
230
-
 
231
  response = generate_response(self.model, self.messages)
232
-
 
233
  assistant_message = {"role": "assistant", "content": response}
234
  self.messages.append(assistant_message)
235
-
 
236
  return response
237
-
 
 
 
 
 
238
  except Exception as e:
 
239
  error_msg = f"Error generating response: {str(e)}"
240
  print(error_msg)
 
 
 
241
  return error_msg
242
 
 
 
243
  def create_interface():
 
244
  persona_chat = PersonaChat()
245
-
246
- # Custom CSS for better UI
247
  css = """
248
- .gradio-container {
249
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
250
- }
251
-
252
- .main-container {
253
- max-width: 1200px;
254
- margin: auto;
255
- padding: 0;
256
- }
257
-
258
- .header {
259
- background: linear-gradient(90deg, #2c3e50, #4ca1af);
260
- color: white;
261
- padding: 20px;
262
- border-radius: 10px 10px 0 0;
263
- margin-bottom: 20px;
264
- text-align: center;
265
- }
266
-
267
- .setup-section {
268
- background-color: #f9f9f9;
269
- border-radius: 10px;
270
- padding: 20px;
271
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
272
- margin-bottom: 20px;
273
- }
274
-
275
- .chat-section {
276
- background-color: white;
277
- border-radius: 10px;
278
- padding: 20px;
279
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
280
- }
281
-
282
- .status-bar {
283
- background: #f0f0f0;
284
- padding: 10px 15px;
285
- border-radius: 5px;
286
- margin: 15px 0;
287
- font-weight: 500;
288
- }
289
-
290
- .chat-container {
291
- border: 1px solid #eaeaea;
292
- border-radius: 10px;
293
- height: 500px !important;
294
- overflow-y: auto;
295
- background-color: #f9f9f9;
296
- }
297
-
298
- .message-input {
299
- margin-top: 10px;
300
- }
301
-
302
- .send-button {
303
- background-color: #2c3e50 !important;
304
- }
305
-
306
- .persona-button {
307
- background-color: #4ca1af !important;
308
- }
309
-
310
- .system-prompt {
311
- background-color: #f5f5f5;
312
- border-radius: 8px;
313
- padding: 10px;
314
- margin-top: 15px;
315
- border: 1px solid #e0e0e0;
316
- }
317
-
318
- .footer {
319
- text-align: center;
320
- margin-top: 20px;
321
- font-size: 0.9rem;
322
- color: #666;
323
- }
324
-
325
- /* Avatar styling */
326
- .user-message {
327
- background-color: #e1f5fe;
328
- border-radius: 15px 15px 0 15px;
329
- padding: 10px 15px;
330
- margin: 8px 0;
331
- max-width: 80%;
332
- float: right;
333
- clear: both;
334
- }
335
-
336
- .bot-message {
337
- background-color: #f0f0f0;
338
- border-radius: 15px 15px 15px 0;
339
- padding: 10px 15px;
340
- margin: 8px 0;
341
- max-width: 80%;
342
- float: left;
343
- clear: both;
344
- }
345
-
346
- /* Loading animation */
347
- @keyframes pulse {
348
- 0% { opacity: 0.6; }
349
- 50% { opacity: 1; }
350
- 100% { opacity: 0.6; }
351
- }
352
-
353
- .loading {
354
- animation: pulse 1.5s infinite;
355
- padding: 10px;
356
- background-color: #eee;
357
- border-radius: 5px;
358
- display: inline-block;
359
- }
360
  """
361
-
362
  with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
363
  with gr.Row(elem_classes="main-container"):
364
  with gr.Column():
365
  # Header
366
  with gr.Column(elem_classes="header"):
367
  gr.Markdown("# AI Persona Simulator")
368
- gr.Markdown("Create lifelike character simulations with advanced AI")
369
-
370
  # Setup Section
371
  with gr.Column(elem_classes="setup-section"):
372
- gr.Markdown("### Create Your Persona")
373
- gr.Markdown("Enter details about the character you want to simulate")
374
-
375
  with gr.Row():
376
  name_input = gr.Textbox(
377
  label="Character Name",
378
- placeholder="e.g. Erenalp",
379
- elem_classes="input-field"
380
  )
381
-
382
- with gr.Row():
383
  context_input = gr.Textbox(
384
- label="Character Context",
385
- placeholder="e.g. in 7th grade, loves math and video games, has a pet cat",
386
  lines=2,
387
- elem_classes="input-field"
388
- )
389
-
390
- with gr.Row():
391
- set_persona_button = gr.Button(
392
- "Create Persona",
393
- variant="primary",
394
- elem_classes="persona-button"
395
  )
396
-
 
 
 
 
 
 
397
  status_output = gr.Textbox(
398
- label="Status",
 
399
  interactive=False,
400
  elem_classes="status-bar"
401
  )
402
-
403
- with gr.Accordion("Character System Prompt", open=False, elem_classes="system-prompt-section"):
404
  system_prompt_display = gr.TextArea(
405
- label="",
406
  interactive=False,
407
  lines=10,
408
- elem_classes="system-prompt"
409
  )
410
-
 
 
 
 
 
 
 
411
  # Chat Section
412
  with gr.Column(elem_classes="chat-section"):
413
- gr.Markdown("### Chat with Your Character")
414
-
415
- # Display character name dynamically
416
  character_name_display = gr.Markdown(
417
- elem_id="character-name",
418
- value="Start by creating a persona above"
419
  )
420
-
421
  chatbot = gr.Chatbot(
422
- label="",
423
  height=450,
424
  elem_classes="chat-container",
425
- avatar_images=("👤", "🤖"),
426
- type="messages"
427
  )
428
-
429
  with gr.Row():
430
  msg_input = gr.Textbox(
431
  label="Your message",
432
- placeholder="Type your message here...",
433
- elem_classes="message-input"
 
434
  )
435
  send_button = gr.Button(
436
- "Send",
437
  variant="primary",
438
- elem_classes="send-button"
 
439
  )
440
-
441
  # Footer
442
  with gr.Column(elem_classes="footer"):
443
- gr.Markdown("Powered by Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct")
444
-
445
- # Functions
446
- def update_character_name(name):
447
- if name:
448
- return f"### Chatting with {name}"
449
- return "### Start by creating a persona above"
450
-
451
- def set_persona_generator(name, context):
452
- initial_status = f"Creating persona for {name}..."
453
- initial_character_display = f"### Creating persona for {name}..."
454
- initial_prompt = ""
455
- initial_history = [{"role": "system", "content": "Initializing..."}]
456
-
457
- # Initial yield
458
- yield initial_status, initial_prompt, initial_history, initial_character_display
459
-
460
- # Process persona creation
461
- for status, prompt, history in persona_chat.set_persona(name, context):
462
- character_display = f"### Creating persona for {name}..."
463
- if "Ready to chat" in status:
464
- character_display = f"### Chatting with {name}"
465
- yield status, prompt, history, character_display
466
-
467
- def send_message(message, history):
468
- if not message.strip():
469
- return "", history
470
-
471
- if not persona_chat.messages:
472
- new_history = list(history) if history else []
473
- new_history.append({"role": "user", "content": message})
474
- new_history.append({"role": "assistant", "content": "Please create a persona first using the form above."})
475
- return "", new_history
476
-
477
  try:
478
- # Show typing indicator
479
- new_history = list(history) if history else []
480
- new_history.append({"role": "user", "content": message})
481
- new_history.append({"role": "assistant", "content": "..."})
482
- yield "", new_history
483
-
484
- # Generate actual response
485
- response = persona_chat.chat(message)
486
- new_history[-1]["content"] = response
487
- yield "", new_history
488
-
 
 
 
 
 
 
 
489
  except Exception as e:
490
- print(f"Error in send_message: {str(e)}")
491
- new_history[-1]["content"] = "Sorry, there was an error processing your message."
492
- yield "", new_history
493
-
494
- # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  set_persona_button.click(
496
- set_persona_generator,
497
  inputs=[name_input, context_input],
498
- outputs=[status_output, system_prompt_display, chatbot, character_name_display]
499
  )
500
-
501
- name_input.change(
502
- update_character_name,
503
- inputs=[name_input],
504
- outputs=[character_name_display]
505
- )
506
-
507
  send_button.click(
508
- send_message,
509
  inputs=[msg_input, chatbot],
510
  outputs=[msg_input, chatbot]
511
  )
512
-
 
513
  msg_input.submit(
514
- send_message,
515
  inputs=[msg_input, chatbot],
516
  outputs=[msg_input, chatbot]
517
  )
518
-
519
  return interface
520
 
 
521
  if __name__ == "__main__":
 
 
 
522
  demo = create_interface()
523
- demo.queue(max_size=5).launch(
524
- server_name="0.0.0.0",
525
  server_port=7860,
526
- share=False,
527
- show_error=True,
528
- debug=True
529
  )
 
1
+ # --- Required Installs ---
2
+ # pip install gradio transformers torch duckduckgo_search huggingface_hub accelerate bitsandbytes sentencepiece
3
+
4
  import gradio as gr
5
  import transformers
6
  import torch
7
+ from transformers import pipeline, BitsAndBytesConfig # Added BitsAndBytesConfig
8
  from duckduckgo_search import DDGS
9
  import re
10
  import time
11
  from huggingface_hub import HfApi
12
+ # from spaces import GPU # Assuming this is specific to Hugging Face Spaces deployment environment
13
 
14
  # --- Constants and Configuration ---
15
  MODEL_ID = "nvidia/Llama-3.1-Nemotron-8B-UltraLong-4M-Instruct"
16
  MAX_GPU_MEMORY = "40GiB" # A100 memory allocation
17
 
18
+ # --- GPU Decorator (Placeholder if not using HF Spaces GPU class) ---
19
+ # If not running on Hugging Face Spaces with their specific @GPU decorator,
20
+ # remove or comment out the @GPU decorators below.
21
+ # The resource allocation might need to be handled differently depending on your environment.
22
+ # For simplicity, assuming the decorator exists or is not strictly needed for function.
23
+ try:
24
+ from spaces import GPU
25
+ except ImportError:
26
+ print("Warning: 'spaces.GPU' not found. Assuming standard environment.")
27
+ # Define a dummy decorator if 'spaces' is not available
28
+ def GPU(memory=None):
29
+ def decorator(func):
30
+ return func
31
+ return decorator
32
+
33
+ # --- Model Loading ---
34
+ # @GPU(memory=40) # Decorator might be specific to HF Spaces
35
  def load_model():
36
  """Load the LLM model optimized for A100 GPU."""
37
+ print(f"Attempting to load model: {MODEL_ID}")
38
  try:
39
+ # Configure quantization
40
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
41
+
42
  pipe = pipeline(
43
  "text-generation",
44
  model=MODEL_ID,
45
  torch_dtype=torch.bfloat16,
46
  device_map="auto",
47
  model_kwargs={
48
+ # Use quantization_config instead of load_in_8bit directly
49
+ "quantization_config": quantization_config,
50
  "use_cache": True,
51
+ # max_memory might be implicitly handled by device_map="auto" with accelerate
52
+ # but explicitly setting can be safer. Adjust if needed.
53
+ # "max_memory": {0: MAX_GPU_MEMORY} # Keep if necessary for your setup
54
  }
55
  )
56
+ print(f"Model {MODEL_ID} loaded successfully.")
57
  return pipe
58
  except Exception as e:
59
+ print(f"Error loading model '{MODEL_ID}': {e}")
60
+ # Optionally raise the error to halt execution if model loading is critical
61
+ # raise e
62
+ return None # Return None to indicate failure
63
 
64
+ # --- Web Search ---
65
  def search_person(name, context=""):
66
  """Search for information about a person using DuckDuckGo."""
67
+ print(f"Searching for: {name} with context: {context}")
68
  results = []
69
  search_terms = []
70
+
71
+ # Prioritize context-specific search
72
+ if context:
73
+ search_terms.append(f"{name} {context}")
74
+ # Add grade-specific search if applicable
75
+ grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
76
  if grade_match:
77
  grade = grade_match.group(1)
78
  search_terms.append(f"{name} student {grade} grade")
79
+
80
+ # Add general search terms
81
+ search_terms.append(f"{name}") # Just the name
82
+ search_terms.append(f"{name} biography")
83
+ search_terms.append(f"{name} interests")
84
+ search_terms.append(f"{name} personality")
85
+
86
+ # Remove duplicates
87
+ search_terms = list(dict.fromkeys(search_terms))
88
+ print(f"Using search terms: {search_terms}")
89
+
90
  try:
91
  with DDGS() as ddgs:
92
  for term in search_terms:
93
+ print(f"Searching DDG for: '{term}'")
94
+ # Fetch fewer results per term to keep context concise
95
+ search_results = list(ddgs.text(term, max_results=2))
96
  results.extend(search_results)
97
+ time.sleep(0.2) # Small delay between searches
98
  except Exception as e:
99
+ error_msg = f"Error during DuckDuckGo search: {str(e)}"
100
+ print(error_msg)
101
+ return error_msg # Return the error message string
102
+
103
+ if not results:
104
+ print(f"No search results found for {name}. Creating synthetic profile.")
105
  return create_synthetic_profile(name, context)
106
+
107
+ print(f"Found {len(results)} potential search results.")
108
  return results
109
 
110
  def create_synthetic_profile(name, context):
111
  """Create a synthetic profile when search returns no results."""
112
  profile = {
113
+ "title": f"Synthetic Profile for {name}",
114
+ "href": "",
115
+ "body": f"{name} is a person described with the context: '{context}'. "
116
  }
117
+
118
+ # Try to infer age from grade
119
  if "grade" in context.lower():
120
+ grade_match = re.search(r'(\d+)(?:st|nd|rd|th)?\s+grade', context.lower())
121
  if grade_match:
122
+ try:
123
+ grade = int(grade_match.group(1))
124
+ age = 5 + grade # Approximate age based on US school system
125
+ profile["body"] += f"Based on being in {grade}th grade, {name} is likely around {age} years old. "
126
+ profile["body"] += f"Typical interests for this age might include friends, hobbies, school subjects, and developing independence. "
127
+ except ValueError:
128
+ profile["body"] += f"The grade mentioned ('{grade_match.group(1)}') could not be parsed to estimate age. "
129
+
130
+ profile["body"] += "Since no public information was found, this profile is based solely on the provided context."
131
+ # Return as a list containing the dictionary, matching search_person's format
132
  return [profile]
133
 
134
  def extract_text_from_search_results(search_results):
135
  """Extract relevant text from search results."""
136
+ if isinstance(search_results, str): # Handle case where search_person returned an error string
137
+ return f"Could not extract text due to search error: {search_results}"
138
+
139
  combined_text = ""
140
+ seen_bodies = set()
141
+ count = 0
142
+ max_results_to_process = 5 # Limit the number of results processed
143
+
144
  for result in search_results:
145
+ if count >= max_results_to_process:
146
+ break
147
+ if isinstance(result, dict) and 'body' in result and result['body']:
148
+ body = result['body'].strip()
149
+ # Avoid adding duplicate snippets
150
+ if body not in seen_bodies:
151
+ combined_text += body + "\n\n"
152
+ seen_bodies.add(body)
153
+ count += 1
154
+
155
+ if not combined_text:
156
+ return "No relevant text found in search results."
157
+
158
+ # Basic cleaning
159
+ combined_text = re.sub(r'\s+', ' ', combined_text).strip()
160
+ # Limit length to avoid excessive prompt size
161
+ max_length = 2000 # Characters
162
+ return combined_text[:max_length] + "..." if len(combined_text) > max_length else combined_text
163
 
164
+
165
+ # --- LLM Generation Functions ---
166
+
167
+ def parse_llm_output(full_output, input_prompt_list):
168
+ """
169
+ Attempts to parse only the newly generated text from the LLM output,
170
+ assuming the output might contain the input prompt messages.
171
+ """
172
+ # If the output is a list of dicts (as expected from pipeline), get the text
173
+ if isinstance(full_output, list) and len(full_output) > 0:
174
+ if isinstance(full_output[0], dict) and "generated_text" in full_output[0]:
175
+ generated_text = full_output[0]["generated_text"]
176
+ else:
177
+ return str(full_output) # Unexpected format, return raw output
178
+ elif isinstance(full_output, str):
179
+ generated_text = full_output # If it's already a string
180
+ else:
181
+ return str(full_output) # Unexpected format
182
+
183
+ # Heuristic: Find the last message's content from the input prompt
184
+ # The actual formatting depends on the tokenizer's chat template.
185
+ # This is a simplified approach.
186
+ last_input_content = ""
187
+ if isinstance(input_prompt_list, list) and input_prompt_list:
188
+ last_input_content = input_prompt_list[-1].get("content", "")
189
+
190
+ # Try to find the last input message content in the generated text
191
+ # If found, take the text after it. This might fail if formatting differs.
192
+ if last_input_content:
193
+ last_occurrence_index = generated_text.rfind(last_input_content)
194
+ if last_occurrence_index != -1:
195
+ potential_response = generated_text[last_occurrence_index + len(last_input_content):].strip()
196
+ # Further heuristics could be added (e.g., look for assistant role markers)
197
+ if potential_response: # Check if we got something after the input
198
+ # Simple cleanup for potential role markers if model adds them
199
+ potential_response = re.sub(r'^<\/?s?>', '', potential_response).strip()
200
+ potential_response = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', potential_response).strip()
201
+ return potential_response
202
+
203
+ # Fallback: If parsing fails, return the whole generated text, possibly with a warning
204
+ # Or, if the prompt asked for ONLY the output, the model might have behaved correctly.
205
+ # Let's clean up potential boilerplate often added by models
206
+ cleaned_text = generated_text
207
+ if isinstance(input_prompt_list, list) and input_prompt_list:
208
+ # Remove potential initial prompt remnants if possible (very basic)
209
+ first_prompt_content = input_prompt_list[0].get("content", "")
210
+ if first_prompt_content and cleaned_text.startswith(first_prompt_content):
211
+ cleaned_text = cleaned_text[len(first_prompt_content):].strip()
212
+
213
+ # Simple cleanup for common markers
214
+ cleaned_text = re.sub(r'^<\/?s?>', '', cleaned_text).strip()
215
+ cleaned_text = re.sub(r'^(assistant|ASSISTANT|System|SYSTEM)[:\s]*', '', cleaned_text).strip()
216
+
217
+ print("Warning: Could not reliably parse LLM response. Returning cleaned full output.")
218
+ return cleaned_text # Return the potentially full (but cleaned) text as a fallback
219
+
220
+ # @GPU(memory=40) # Decorator might be specific to HF Spaces
221
  def generate_enhanced_persona(model, name, bio_text, context=""):
222
+ """Use the LLM to enhance the persona profile."""
223
+ print(f"Generating enhanced persona for {name}...")
224
+ if model is None:
225
+ raise ValueError("Model is not loaded.")
226
+
227
  enhancement_prompt = [
228
+ {"role": "system", "content": """You are an expert AI character developer. Your task is to synthesize information into a detailed and coherent character profile. Focus on personality, potential interests, speaking style, and mannerisms based ONLY on the provided text. If the text indicates the character is a child, ensure the profile reflects age-appropriate traits.
 
 
 
229
 
230
+ Output ONLY the enhanced character profile description. Do not include conversational introductions, explanations, apologies for limited info, or markdown formatting like headers (e.g., ### Personality). Start directly with the profile text."""},
231
+ {"role": "user", "content": f"""Synthesize the following information about '{name}' into a character profile.
232
+ Context: {context}
233
+ Information Found:
234
  {bio_text}
235
 
236
+ Create the profile based *only* on the text above."""}
 
 
 
 
237
  ]
238
+
239
  try:
240
+ # Use torch.amp.autocast instead of torch.cuda.amp.autocast
241
+ with torch.amp.autocast('cuda', dtype=torch.bfloat16):
242
+ outputs = model(enhancement_prompt, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
243
+
244
+ # Parse the output
245
+ parsed_output = parse_llm_output(outputs, enhancement_prompt)
246
+ print("Enhanced persona generated.")
247
+ # Return the parsed text, or fallback to original bio if parsing fails badly
248
+ return parsed_output if parsed_output else bio_text
249
+
250
  except Exception as e:
251
+ error_msg = f"Error generating enhanced persona: {str(e)}"
252
+ print(error_msg)
253
+ # Fallback to the original bio text in case of error
254
+ return f"Error enhancing profile: {str(e)}\n\nUsing basic info:\n{bio_text}"
255
 
256
+ # @GPU(memory=40) # Decorator might be specific to HF Spaces
257
  def generate_system_prompt_with_llm(model, name, enhanced_profile, context=""):
258
  """Generate an optimized system prompt for the persona."""
259
+ print(f"Generating system prompt for {name}...")
260
+ if model is None:
261
+ raise ValueError("Model is not loaded.")
262
+
263
+ fallback_prompt = f"""You are simulating the character '{name}'. Act and respond according to this profile:
264
+ {enhanced_profile}
265
+ Additional context for the simulation: {context}
266
+ ---
267
+ Maintain this persona consistently. Respond naturally based on the profile. Do not mention that you are an AI or a simulation. If asked about details not in the profile, you can be evasive or state you don't know/remember, consistent with the persona."""
268
+
269
  prompt = [
270
+ {"role": "system", "content": """You are an expert AI prompt engineer specializing in character simulation. Your task is to create a concise and effective system prompt for an LLM that will simulate a character based on a provided profile.
 
 
271
 
272
+ The system prompt should instruct the LLM to embody the character, covering:
273
+ 1. Core personality, attitude, and speaking style (based on the profile).
274
+ 2. Key interests or knowledge areas (if mentioned in the profile).
275
+ 3. How to handle questions outside its knowledge (e.g., be evasive, admit ignorance naturally).
276
+ 4. Explicitly state it should *not* break character or mention being an AI.
277
+ 5. Incorporate age-appropriateness if the profile suggests a specific age group.
278
 
279
+ Output ONLY the system prompt itself. Do not add any explanation or introductory text."""},
280
+ {"role": "user", "content": f"""Create a system prompt for an AI to simulate the character '{name}'.
281
+ Context for simulation: {context}
282
+ Character Profile:
283
+ {enhanced_profile}
284
+
285
+ Generate the system prompt based *only* on the profile and context provided."""}
286
  ]
287
+
288
  try:
289
+ # Use torch.amp.autocast instead of torch.cuda.amp.autocast
290
+ with torch.amp.autocast('cuda', dtype=torch.bfloat16):
291
+ outputs = model(prompt, max_new_tokens=300, do_sample=True, temperature=0.6) # Shorter, less creative prompt
292
+
293
+ # Parse the output
294
+ parsed_output = parse_llm_output(outputs, prompt)
295
+ print("System prompt generated.")
296
+ # Return parsed output or fallback
297
+ return parsed_output if parsed_output else fallback_prompt
298
+
 
 
299
  except Exception as e:
300
+ error_msg = f"Error generating system prompt: {str(e)}"
301
+ print(error_msg)
302
+ # Fallback to a basic system prompt in case of error
303
+ return fallback_prompt
 
304
 
305
+ # @GPU(memory=40) # Decorator might be specific to HF Spaces
306
  def generate_response(model, messages):
307
+ """Generate a response using the LLM."""
308
+ print("Generating response...")
309
+ if model is None:
310
+ raise ValueError("Model is not loaded.")
311
+ if not messages:
312
+ return "Error: No message history provided."
313
+
314
  try:
315
+ # Use torch.amp.autocast instead of torch.cuda.amp.autocast
316
+ with torch.amp.autocast('cuda', dtype=torch.bfloat16):
317
  outputs = model(
318
  messages,
319
+ max_new_tokens=512, # Reasonable length for chat
320
  do_sample=True,
321
  top_p=0.9,
322
  temperature=0.7,
323
+ use_cache=True,
324
+ pad_token_id=model.tokenizer.eos_token_id # Important for stopping generation
325
  )
326
+
327
+ # Parse the output - expecting only the assistant's new reply
328
+ parsed_output = parse_llm_output(outputs, messages)
329
+ print("Response generated.")
330
+ return parsed_output if parsed_output else "..." # Return ellipsis if parsing failed
331
+
332
  except Exception as e:
333
+ error_msg = f"Error during response generation: {str(e)}"
334
+ print(error_msg)
335
+ return f"Sorry, I encountered an error: {str(e)}"
336
 
337
+
338
+ # --- Persona Chat Class ---
339
  class PersonaChat:
340
  def __init__(self):
341
  self.model = None
 
344
  self.persona_context = ""
345
  self.messages = []
346
  self.enhanced_profile = ""
347
+ self.model_loaded = False # Flag to track loading status
348
+
349
+ # @GPU(memory=40) # Apply decorator here if needed by Gradio for resource allocation on method call
350
  def load_model_if_needed(self):
351
+ """Loads the model if it hasn't been loaded successfully."""
352
+ if not self.model_loaded:
353
+ print("Model not loaded yet. Attempting to load...")
354
+ # Use the global load_model function
355
  self.model = load_model()
356
+ if self.model is None:
357
+ # Raise error if loading failed, caught by calling methods
358
+ raise RuntimeError("Failed to load the language model. Cannot proceed.")
359
+ else:
360
+ self.model_loaded = True
361
+ print("Model loaded successfully within PersonaChat instance.")
362
+ else:
363
+ print("Model already loaded.")
364
+
365
+ # This method orchestrates steps that need the GPU, so decorating it might be relevant for Gradio/Spaces
366
+ # @GPU(memory=40)
367
  def set_persona(self, name, context=""):
368
+ """Orchestrates persona creation: search, enhance, generate prompt."""
369
+ try:
370
+ # Ensure model is loaded before proceeding
371
+ self.load_model_if_needed() # This will raise RuntimeError if it fails
372
+
373
+ self.persona_name = name
374
+ self.persona_context = context
375
+ self.messages = [] # Reset message history for new persona
376
+ self.enhanced_profile = "" # Reset profile
377
+
378
+ status = f"Searching for information about {name}..."
379
+ yield status, "", [{"role": "system", "content": "Initializing persona creation..."}]
380
+
381
+ search_results = search_person(name, context)
382
+
383
+ # Check if search returned an error string
384
+ if isinstance(search_results, str) and search_results.startswith("Error"):
385
+ error_msg = f"Failed to set persona: {search_results}"
386
+ yield error_msg, "", [{"role": "system", "content": error_msg}]
387
+ return # Stop processing if search failed
388
+
389
+ bio_text = extract_text_from_search_results(search_results)
390
+ if bio_text.startswith("Could not extract text"):
391
+ yield f"Warning: {bio_text}", "", [{"role": "system", "content": bio_text}]
392
+ # Continue with potentially limited info
393
+
394
+ status = f"Creating enhanced profile for {name}..."
395
+ yield status, "", [{"role": "system", "content": status}]
396
+
397
+ # Pass the loaded model to the generation function
398
+ self.enhanced_profile = generate_enhanced_persona(self.model, name, bio_text, context)
399
+ # Check if enhancement failed
400
+ if self.enhanced_profile.startswith("Error enhancing profile"):
401
+ yield f"Warning: Could not enhance profile. Using basic info.", "", [{"role": "system", "content": self.enhanced_profile}]
402
+ # Use the basic bio text for the system prompt instead
403
+ profile_for_prompt = bio_text
404
+ else:
405
+ profile_for_prompt = self.enhanced_profile
406
+
407
+
408
+ status = f"Generating optimal system prompt for {name}..."
409
+ yield status, self.enhanced_profile, [{"role": "system", "content": status}] # Show profile while generating prompt
410
+
411
+ # Pass the loaded model
412
+ self.system_prompt = generate_system_prompt_with_llm(self.model, name, profile_for_prompt, context)
413
+
414
+ # Set the initial system message for the chat history
415
+ self.messages = [{"role": "system", "content": self.system_prompt}]
416
+
417
+ yield f"Persona set to '{name}'. Ready to chat!", self.system_prompt, self.messages
418
+
419
+ except RuntimeError as e:
420
+ # Catch model loading errors from load_model_if_needed
421
+ error_msg = f"Critical Error: {str(e)}"
422
+ print(error_msg)
423
+ yield error_msg, "", [{"role": "system", "content": error_msg}]
424
+ except Exception as e:
425
+ # Catch other unexpected errors during persona setting
426
+ error_msg = f"An unexpected error occurred during persona setup: {str(e)}"
427
+ print(error_msg)
428
+ yield error_msg, self.enhanced_profile, [{"role": "system", "content": error_msg}]
429
+
430
+
431
+ # This method uses the GPU for inference, so decorating might be relevant
432
+ # @GPU(memory=40)
433
  def chat(self, user_message):
434
+ """Processes a user message and returns the AI's response."""
 
 
435
  try:
436
+ # Ensure model is loaded
437
+ self.load_model_if_needed() # Raises RuntimeError if model failed to load initially
438
+
439
+ if not self.messages:
440
+ # This case should ideally be prevented by UI logic
441
+ # but handle it defensively.
442
+ print("Error: Chat called before persona was set.")
443
+ return "Please set a persona first using the controls above."
444
+
445
+ print(f"User message: {user_message}")
446
+ # Append user message (ensure correct format)
447
+ formatted_message = {"role": "user", "content": user_message}
448
  self.messages.append(formatted_message)
449
+
450
+ # Generate response using the loaded model
451
  response = generate_response(self.model, self.messages)
452
+
453
+ # Append assistant response
454
  assistant_message = {"role": "assistant", "content": response}
455
  self.messages.append(assistant_message)
456
+
457
+ print(f"Assistant response: {response}")
458
  return response
459
+
460
+ except RuntimeError as e:
461
+ # Catch model loading errors
462
+ error_msg = f"Critical Error: {str(e)}. Cannot generate response."
463
+ print(error_msg)
464
+ return error_msg
465
  except Exception as e:
466
+ # Catch errors during generation itself
467
  error_msg = f"Error generating response: {str(e)}"
468
  print(error_msg)
469
+ # Append error message as assistant response? Or just return error?
470
+ # Let's return the error string directly.
471
+ # We might want to avoid adding the error to self.messages history
472
  return error_msg
473
 
474
+
475
+ # --- Gradio Interface ---
476
  def create_interface():
477
+ # Instantiate the chat handler class ONCE
478
  persona_chat = PersonaChat()
479
+
480
+ # Custom CSS (minor adjustments possible)
481
  css = """
482
+ .gradio-container { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; }
483
+ .main-container { max-width: 1200px; margin: auto; padding: 0; }
484
+ .header { background: linear-gradient(90deg, #2c3e50, #4ca1af); color: white; padding: 20px; border-radius: 10px 10px 0 0; margin-bottom: 20px; text-align: center; }
485
+ .setup-section { background-color: #f9f9f9; border-radius: 10px; padding: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); margin-bottom: 20px; }
486
+ .chat-section { background-color: white; border-radius: 10px; padding: 20px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); }
487
+ .status-bar { background: #e9ecef; padding: 10px 15px; border-radius: 5px; margin: 15px 0; font-weight: 500; border: 1px solid #ced4da; }
488
+ .chat-container { border: 1px solid #eaeaea; border-radius: 10px; height: 500px !important; overflow-y: auto; background-color: #ffffff; padding: 10px; }
489
+ .message-input { margin-top: 10px; }
490
+ .send-button { background-color: #2c3e50 !important; color: white !important; }
491
+ .persona-button { background-color: #4ca1af !important; color: white !important; }
492
+ .system-prompt-display { background-color: #f5f5f5; border-radius: 8px; padding: 15px; margin-top: 15px; border: 1px solid #e0e0e0; font-family: monospace; white-space: pre-wrap; word-wrap: break-word; }
493
+ .footer { text-align: center; margin-top: 20px; font-size: 0.9rem; color: #666; }
494
+ .user-message > .message { background-color: #e1f5fe; border-radius: 15px 15px 0 15px !important; padding: 10px 15px !important; margin: 8px 0 8px auto !important; max-width: 80%; float: right; clear: both; color: #333; }
495
+ .bot-message > .message { background-color: #f1f3f5; border-radius: 15px 15px 15px 0 !important; padding: 10px 15px !important; margin: 8px auto 8px 0 !important; max-width: 80%; float: left; clear: both; color: #333; }
496
+ .message p { margin: 0 !important; padding: 0 !important; } /* Prevent extra margins in chatbot messages */
497
+ .typing-indicator { color: #aaa; font-style: italic; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  """
499
+
500
  with gr.Blocks(css=css, title="AI Persona Simulator") as interface:
501
  with gr.Row(elem_classes="main-container"):
502
  with gr.Column():
503
  # Header
504
  with gr.Column(elem_classes="header"):
505
  gr.Markdown("# AI Persona Simulator")
506
+ gr.Markdown("Create and interact with AI-driven character simulations")
507
+
508
  # Setup Section
509
  with gr.Column(elem_classes="setup-section"):
510
+ gr.Markdown("### 1. Create Your Persona")
511
+ gr.Markdown("Enter a name and any defining context (e.g., age, job, key traits, situation). The AI will search for info and build a profile.")
512
+
513
  with gr.Row():
514
  name_input = gr.Textbox(
515
  label="Character Name",
516
+ placeholder="e.g., Sherlock Holmes, Erenalp, A curious 7th grader",
517
+ elem_id="name_input"
518
  )
 
 
519
  context_input = gr.Textbox(
520
+ label="Character Context / Description",
521
+ placeholder="e.g., Living in 221B Baker Street, London. OR 7th grade, loves math and video games, has a pet cat named Luna. OR A spaceship captain exploring Alpha Centauri.",
522
  lines=2,
523
+ elem_id="context_input"
 
 
 
 
 
 
 
524
  )
525
+
526
+ set_persona_button = gr.Button(
527
+ "Create Persona & Start Chat",
528
+ variant="primary",
529
+ elem_classes="persona-button"
530
+ )
531
+
532
  status_output = gr.Textbox(
533
+ label="Status",
534
+ value="Enter details above and click 'Create Persona'.",
535
  interactive=False,
536
  elem_classes="status-bar"
537
  )
538
+
539
+ with gr.Accordion("View Generated System Prompt", open=False):
540
  system_prompt_display = gr.TextArea(
541
+ label="System Prompt (Instructions for the AI)",
542
  interactive=False,
543
  lines=10,
544
+ elem_classes="system-prompt-display" # Use dedicated class
545
  )
546
+ enhanced_profile_display = gr.TextArea(
547
+ label="Enhanced Profile (Generated by AI)",
548
+ interactive=False,
549
+ lines=10,
550
+ elem_classes="system-prompt-display" # Reuse style or create new
551
+ )
552
+
553
+
554
  # Chat Section
555
  with gr.Column(elem_classes="chat-section"):
556
+ gr.Markdown("### 2. Chat with Your Character")
557
+
 
558
  character_name_display = gr.Markdown(
559
+ value="*No persona created yet*",
560
+ elem_id="character-name-display"
561
  )
562
+
563
  chatbot = gr.Chatbot(
564
+ label="Conversation",
565
  height=450,
566
  elem_classes="chat-container",
567
+ bubble_full_width=False, # Makes bubbles fit content
568
+ avatar_images=(None, "🤖") # User avatar default, bot uses emoji
569
  )
570
+
571
  with gr.Row():
572
  msg_input = gr.Textbox(
573
  label="Your message",
574
+ placeholder="Type your message here and press Enter...",
575
+ elem_classes="message-input",
576
+ scale=4 # Make input wider
577
  )
578
  send_button = gr.Button(
579
+ "Send",
580
  variant="primary",
581
+ elem_classes="send-button",
582
+ scale=1
583
  )
584
+
585
  # Footer
586
  with gr.Column(elem_classes="footer"):
587
+ gr.Markdown(f"Powered by {MODEL_ID}")
588
+
589
+
590
+ # --- Event Handlers ---
591
+
592
+ # Generator function for smoother UI updates during persona creation
593
+ def set_persona_flow(name, context):
594
+ if not name:
595
+ yield "Status: Please enter a character name.", "", "", "*No persona created yet*", []
596
+ return
597
+
598
+ initial_status = f"Creating persona for '{name}'..."
599
+ initial_character_display = f"### Preparing to chat with {name}..."
600
+ initial_prompt = "System prompt will appear here..."
601
+ initial_profile = "Enhanced profile will appear here..."
602
+ initial_history = [{"role": "system", "content": "Initializing..."}] # Start with system message
603
+
604
+ # Initial yield to show activity starting
605
+ yield initial_status, initial_prompt, initial_profile, initial_character_display, initial_history
606
+
607
+ # Use the PersonaChat instance's method, which is a generator
608
+ final_status, final_prompt, final_profile = "Error", "", ""
609
+ final_history = initial_history
 
 
 
 
 
 
 
 
 
 
 
610
  try:
611
+ # Iterate through the status updates yielded by set_persona
612
+ for status, prompt, profile, history_update in persona_chat.set_persona(name, context):
613
+ final_status, final_prompt, final_profile = status, prompt, profile
614
+ # Use the latest history from the generator if available
615
+ if isinstance(history_update, list):
616
+ final_history = history_update
617
+
618
+ # Determine character display based on status
619
+ character_display = f"### Preparing chat with {name}..."
620
+ if "Ready to chat" in status:
621
+ character_display = f"### Chatting with {name}"
622
+ elif "Error" in status:
623
+ character_display = f"### Error creating {name}"
624
+
625
+ yield status, final_prompt, final_profile, character_display, final_history
626
+ # Add a small delay to ensure UI updates between yields if needed
627
+ # time.sleep(0.1)
628
+
629
  except Exception as e:
630
+ # Catch any error not handled within set_persona generator
631
+ error_msg = f"Failed to set persona due to an unexpected error: {str(e)}"
632
+ print(error_msg)
633
+ yield error_msg, "", "", f"### Error creating {name}", [{"role": "system", "content": error_msg}]
634
+
635
+
636
+ # Function to handle sending messages
637
+ def send_message_flow(message, history):
638
+ if not message.strip():
639
+ # Ignore empty messages
640
+ return "", history # Return unchanged history and clear input box
641
+
642
+ # Check if persona is ready (check for system prompt in history)
643
+ if not persona_chat.messages or persona_chat.messages[0]['role'] != 'system':
644
+ # Persona not set or history is corrupted
645
+ history.append({"role": "user", "content": message})
646
+ history.append({"role": "assistant", "content": "Error: Please create a valid persona first using the form above."})
647
+ return "", history # Clear input box, update history with error
648
+
649
+ # Append user message to chatbot UI immediately
650
+ history.append({"role": "user", "content": message})
651
+ # Add typing indicator for the bot
652
+ history.append({"role": "assistant", "content": None}) # Use None for Gradio typing indicator
653
+
654
+ # Yield the updated history to show user message and typing indicator
655
+ yield "", history
656
+
657
+ # Call the chat method (which handles model loading and generation)
658
+ response = persona_chat.chat(message) # This now uses the internal self.messages
659
+
660
+ # Update the last message (the typing indicator) with the actual response
661
+ history[-1]["content"] = response
662
+
663
+ # Yield the final history with the bot's response
664
+ # The input box is cleared, history is updated
665
+ yield "", history
666
+
667
+
668
+ # Connect Gradio components to functions
669
+
670
+ # Use the generator for persona setting
671
  set_persona_button.click(
672
+ set_persona_flow,
673
  inputs=[name_input, context_input],
674
+ outputs=[status_output, system_prompt_display, enhanced_profile_display, character_name_display, chatbot]
675
  )
676
+
677
+ # Use the generator for sending messages
 
 
 
 
 
678
  send_button.click(
679
+ send_message_flow,
680
  inputs=[msg_input, chatbot],
681
  outputs=[msg_input, chatbot]
682
  )
683
+
684
+ # Allow submitting message with Enter key
685
  msg_input.submit(
686
+ send_message_flow,
687
  inputs=[msg_input, chatbot],
688
  outputs=[msg_input, chatbot]
689
  )
690
+
691
  return interface
692
 
693
+ # --- Main Execution ---
694
  if __name__ == "__main__":
695
+ print("Starting Gradio application...")
696
+ # Ensure necessary packages are installed:
697
+ # pip install gradio transformers torch duckduckgo_search huggingface_hub accelerate bitsandbytes sentencepiece
698
  demo = create_interface()
699
+ demo.queue().launch( # Add queue for better handling of multiple users/requests
700
+ server_name="0.0.0.0", # Allows access from other devices on the network
701
  server_port=7860,
702
+ share=False, # Set to True to create a temporary public link (requires internet)
703
+ show_error=True, # Show errors in the browser console
704
+ debug=True # Provides more detailed logs in the terminal
705
  )