Lesterchia174 commited on
Commit
6877857
·
verified ·
1 Parent(s): 7167bb5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +244 -0
  2. requirements.txt +18 -0
app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """App
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1TdjbTSA8V5GUProQ3Bd-uYmTLXSInoWf
8
+ """
9
+
10
+ import subprocess
11
+
12
+ def install_espeak():
13
+ try:
14
+ #subprocess.run(["sudo", "apt-get", "install", "espeak", "-y"], check=True)
15
+ subprocess.run(["apt-get", "install", "espeak", "-y"], check=True) # Removed 'sudo'
16
+ print("eSpeak installed successfully!")
17
+ except subprocess.CalledProcessError as e:
18
+ print(f"Error occurred while installing eSpeak: {e}")
19
+
20
+ # Call the function to install eSpeak
21
+ install_espeak()
22
+
23
+ import gradio as gr
24
+ import numpy as np
25
+ from transformers import pipeline
26
+ import os
27
+ import groq
28
+ from langchain_groq import ChatGroq
29
+ from langchain.schema import HumanMessage
30
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
31
+ from langchain.vectorstores import Chroma
32
+ from langchain.embeddings import HuggingFaceEmbeddings
33
+ from langchain.docstore.document import Document
34
+ import chardet
35
+ import fitz # PyMuPDF for PDFs
36
+ import docx # python-docx for Word files
37
+ import gtts # Google Text-to-Speech library
38
+ from pptx import Presentation # python-pptx for PowerPoint files
39
+ import re
40
+
41
+ # Initialize Whisper model for speech-to-text
42
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
43
+
44
+ # Set API Key (Ensure it's stored securely in an environment variable)
45
+ groq.api_key = os.getenv("GROQ_API_KEY", "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") # Replace with a valid API key
46
+
47
+ # Initialize Chat Model
48
+ chat_model = ChatGroq(model_name="DeepSeek-R1-Distill-Llama-70b", api_key=groq.api_key) # llama-3.3-70b-versatile
49
+
50
+ # Initialize Embeddings
51
+ embedding_model = HuggingFaceEmbeddings()
52
+
53
+ # Initialize ChromaDB
54
+ vectorstore = Chroma(embedding_function=embedding_model)
55
+
56
+ # Prompt for quiz generation with added remark
57
+ quiz_prompt = """
58
+ You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
59
+ Remove all unnecessary formatting generated by the LLM, including <think> tags, asterisks, markdown formatting, and any bold or italic text, as well as **, ###, ##, and # tags."
60
+
61
+ For each question:
62
+
63
+ - Provide 4 answer choices (for MCQs), with only one correct answer.
64
+ - Ensure fill-in-the-blank questions focus on key terms, phrases, or concepts from the document.
65
+ - Include an answer key for all questions.
66
+ - Ensure questions vary in difficulty and encourage comprehension rather than memorization.
67
+ - Additionally, implement an instant feedback mechanism:
68
+ - When a user selects an answer, indicate whether it is correct or incorrect.
69
+ - If incorrect, provide a brief explanation from the document to guide learning.
70
+ - Ensure responses are concise and educational to enhance understanding.
71
+
72
+ Output Example:
73
+ 1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
74
+
75
+ Answer: Agent Core
76
+
77
+ Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
78
+
79
+ 2. What is the main limitation of LLM-based applications?
80
+ a) Limited token capacity
81
+ b) Lack of domain expertise
82
+ c) Prone to hallucination
83
+ d) All of the above
84
+
85
+ Answer: d) All of the above
86
+
87
+ Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
88
+
89
+
90
+ """
91
+
92
+ # Function to clean AI response by removing unwanted formatting
93
+ def clean_response(response):
94
+ """Removes <think> tags, asterisks, and markdown formatting."""
95
+ cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL) # Remove <think> tags
96
+ cleaned_text = re.sub(r"(\*\*|\*)", "", cleaned_text) # Remove **bold** and *italics*
97
+ cleaned_text = re.sub(r"^#+\s*", "", cleaned_text, flags=re.MULTILINE) # Remove # and ### tags
98
+ return cleaned_text.strip()
99
+
100
+ # Function to generate quiz based on content
101
+ def generate_quiz(content):
102
+ prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
103
+ response = chat_model([HumanMessage(content=prompt)])
104
+
105
+ # Apply text cleaning before returning the response
106
+ cleaned_response = clean_response(response.content)
107
+
108
+ return cleaned_response
109
+
110
+
111
+ # Function to handle chatbot interactions
112
+ def chat_with_groq(user_input):
113
+ try:
114
+ response = chat_model([HumanMessage(content=user_input)])
115
+ cleaned_response_text = clean_response(response.content) # Clean the response here
116
+ audio_file = speech_playback(cleaned_response_text) # Play the speech after generating the response
117
+ return cleaned_response_text, audio_file # Return both response and audio file path
118
+ except Exception as e:
119
+ return f"Error: {str(e)}", None
120
+
121
+ # Function to play response as speech using gTTS
122
+ def speech_playback(text):
123
+ tts = gtts.gTTS(text, lang='en')
124
+ audio_file = "output_audio.mp3"
125
+ tts.save(audio_file)
126
+ return audio_file # Return the path to the audio file
127
+
128
+ # Function to detect encoding safely
129
+ def detect_encoding(file_path):
130
+ try:
131
+ with open(file_path, "rb") as f:
132
+ raw_data = f.read(4096) # Read first 4KB for detection
133
+ detected = chardet.detect(raw_data)
134
+ encoding = detected["encoding"]
135
+ return encoding if encoding else "utf-8" # Default to UTF-8 if detection fails
136
+ except Exception:
137
+ return "utf-8"
138
+
139
+ # Function to extract text from PDF
140
+ def extract_text_from_pdf(pdf_path):
141
+ try:
142
+ doc = fitz.open(pdf_path)
143
+ text = "\n".join([page.get_text("text") for page in doc])
144
+ return text if text.strip() else "No extractable text found."
145
+ except Exception as e:
146
+ return f"Error extracting text from PDF: {str(e)}"
147
+
148
+ # Function to extract text from Word files (.docx)
149
+ def extract_text_from_docx(docx_path):
150
+ try:
151
+ doc = docx.Document(docx_path)
152
+ text = "\n".join([para.text for para in doc.paragraphs])
153
+ return text if text.strip() else "No extractable text found."
154
+ except Exception as e:
155
+ return f"Error extracting text from Word document: {str(e)}"
156
+
157
+ # Function to extract text from PowerPoint files (.pptx)
158
+ def extract_text_from_pptx(pptx_path):
159
+ try:
160
+ presentation = Presentation(pptx_path)
161
+ text = ""
162
+ for slide in presentation.slides:
163
+ for shape in slide.shapes:
164
+ if hasattr(shape, "text"):
165
+ text += shape.text + "\n"
166
+ return text if text.strip() else "No extractable text found."
167
+ except Exception as e:
168
+ return f"Error extracting text from PowerPoint: {str(e)}"
169
+
170
+ # Function to process documents safely
171
+ def process_document(file):
172
+ try:
173
+ file_extension = os.path.splitext(file.name)[-1].lower()
174
+
175
+ if file_extension in [".png", ".jpg", ".jpeg"]:
176
+ return f"Error: Images cannot be processed for text extraction."
177
+
178
+ # Extract text based on file type
179
+ if file_extension == ".pdf":
180
+ content = extract_text_from_pdf(file.name)
181
+ elif file_extension == ".docx":
182
+ content = extract_text_from_docx(file.name)
183
+ elif file_extension == ".pptx":
184
+ content = extract_text_from_pptx(file.name)
185
+ else:
186
+ encoding = detect_encoding(file.name)
187
+ with open(file.name, "r", encoding=encoding, errors="replace") as f:
188
+ content = f.read()
189
+
190
+ # Process text into chunks
191
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
192
+ documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
193
+ vectorstore.add_documents(documents)
194
+
195
+ # Generate quiz based on document content
196
+ quiz = generate_quiz(content)
197
+
198
+ return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
199
+
200
+ except Exception as e:
201
+ return f"Error processing document: {str(e)}"
202
+
203
+ # Function to handle speech-to-text conversion
204
+ def transcribe_audio(audio):
205
+ sr, y = audio
206
+
207
+ # Convert to mono if stereo
208
+ if y.ndim > 1:
209
+ y = y.mean(axis=1)
210
+
211
+ y = y.astype(np.float32)
212
+ y /= np.max(np.abs(y))
213
+
214
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
215
+
216
+ # Gradio UI
217
+ with gr.Blocks() as demo:
218
+ gr.HTML("<h2 style='text-align: center;'>AI Tutor</h2>")
219
+ gr.HTML("""
220
+ <div style="text-align: center; margin-bottom: 20px;">
221
+ <img src="https://img.freepik.com/premium-photo/little-girl-is-seen-sitting-front-laptop-computer-engaged-with-nearby-robot-robot-assistant-helping-child-with-homework-ai-generated_585735-12266.jpg" style="max-width: 60%; height: auto; border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.2);" />
222
+ </div>
223
+ """)
224
+
225
+ with gr.Row():
226
+ with gr.Column():
227
+ audio_input = gr.Audio(type="numpy", label="Record Audio")
228
+ transcription_output = gr.Textbox(label="Transcription")
229
+ user_input = gr.Textbox(label="Ask a question")
230
+ chat_output = gr.Textbox(label="Response")
231
+ audio_output = gr.Audio(label="Audio Playback") # Add an audio output component
232
+ submit_btn = gr.Button("Ask")
233
+ with gr.Column():
234
+ file_upload = gr.File(label="Upload a document")
235
+ process_status = gr.Textbox(label="Processing Status", interactive=False)
236
+ process_btn = gr.Button("Process Document")
237
+
238
+ audio_input.change(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
239
+ transcription_output.change(fn=lambda x: x, inputs=transcription_output, outputs=user_input)
240
+ submit_btn.click(chat_with_groq, inputs=user_input, outputs=[chat_output, audio_output]) # Fixed closing brackets
241
+ process_btn.click(process_document, inputs=file_upload, outputs=process_status)
242
+
243
+ # Launch the Gradio app
244
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ groq
3
+ gtts
4
+ langchain
5
+ langchain-core
6
+ langchain-community
7
+ langchain-text-splitters
8
+ langgraph
9
+ chromadb
10
+ langsmith
11
+ llama-cpp-python
12
+ langchain_huggingface
13
+ pymupdf
14
+ sentence_transformers
15
+ langchain-groq
16
+ langchain-docling
17
+ langchain-chroma
18
+ pyttsx3