Lesterchia174 commited on
Commit
9906ef6
·
verified ·
1 Parent(s): 4dfc590

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +249 -34
app.py CHANGED
@@ -7,40 +7,255 @@ Original file is located at
7
  https://colab.research.google.com/drive/1GzjDFYPEtsFsBFnhi3x3B0vWyCE-Dtpb
8
  """
9
 
10
- from ultralytics import YOLO
11
- from PIL import Image
12
  import gradio as gr
13
- from huggingface_hub import snapshot_download
 
14
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- model_path = "/Users/markk/Downloads/best_int8_openvino_model"
17
-
18
- def load_model(repo_id):
19
- download_dir = snapshot_download(repo_id)
20
- print(download_dir)
21
- path = os.path.join(download_dir, "best_int8_openvino_model")
22
- print(path)
23
- detection_model = YOLO(path, task='detect')
24
- return detection_model
25
-
26
-
27
- def predict(pilimg):
28
- source = pilimg
29
- result = detection_model.predict(source, conf=0.5, iou=0.6)
30
- img_bgr = result[0].plot()
31
- out_pilimg = Image.fromarray(img_bgr[..., ::-1]) # RGB-order PIL image
32
-
33
- return out_pilimg
34
-
35
- REPO_ID = "Lesterchia174/Monkey_Durian"
36
- detection_model = load_model(REPO_ID)
37
-
38
- video_url = "https://github.com/lesterchia1/Monkey_Durian/blob/main/Monkey_Durian.mp4"
39
-
40
- gr.Interface(
41
- fn=predict,
42
- inputs=gr.Image(type="pil"),
43
- outputs=gr.Image(type="pil"),
44
- title="Monkey Durian Detector",
45
- description=f"[Click here to watch the video]({video_url})"
46
- ).launch(share=True)
 
7
  https://colab.research.google.com/drive/1GzjDFYPEtsFsBFnhi3x3B0vWyCE-Dtpb
8
  """
9
 
 
 
10
  import gradio as gr
11
+ import numpy as np
12
+ from transformers import pipeline
13
  import os
14
+ import time
15
+ import groq
16
+ import uuid # For generating unique filenames
17
+
18
+ # Updated imports to address LangChain deprecation warnings:
19
+ from langchain_groq import ChatGroq
20
+ from langchain.schema import HumanMessage
21
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
22
+ from langchain_community.vectorstores import Chroma
23
+ from langchain_community.embeddings import HuggingFaceEmbeddings
24
+ from langchain.docstore.document import Document
25
+
26
+ # Importing chardet (make sure to add chardet to your requirements.txt)
27
+ import chardet
28
+
29
+ import fitz # PyMuPDF for PDFs
30
+ import docx # python-docx for Word files
31
+ import gtts # Google Text-to-Speech library
32
+ from pptx import Presentation # python-pptx for PowerPoint files
33
+ import re
34
+
35
+ # Initialize Whisper model for speech-to-text
36
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
37
+
38
+ # Set API Key (Ensure it's stored securely in an environment variable)
39
+ groq.api_key = os.getenv("GROQ_API_KEY", "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") # Replace with a valid API key
40
+
41
+ # Initialize Chat Model
42
+ chat_model = ChatGroq(model_name="deepseek-r1-distill-qwen-32b", api_key=groq.api_key)
43
+
44
+ # Initialize Embeddings and chromaDB
45
+ embedding_model = HuggingFaceEmbeddings()
46
+ vectorstore = Chroma(embedding_function=embedding_model)
47
+
48
+ # Short-term memory for the LLM
49
+ chat_memory = []
50
+
51
+ # Prompt for quiz generation with added remark
52
+ quiz_prompt = """
53
+ You are an AI assistant specialized in education and assessment creation. Given an uploaded document or text, generate a quiz with a mix of multiple-choice questions (MCQs) and fill-in-the-blank questions. The quiz should be directly based on the key concepts, facts, and details from the provided material.
54
+ Remove all unnecessary formatting generated by the LLM, including <think> tags, asterisks, markdown formatting, and any bold or italic text, as well as **, ###, ##, and # tags.
55
+ Please generate 20 Questions.
56
+
57
+ For each question:
58
+ - Provide 4 answer choices (for MCQs), with only one correct answer.
59
+ - Ensure fill-in-the-blank questions focus on key terms, phrases, or concepts from the document.
60
+ - Include an answer key for all questions.
61
+ - Ensure questions vary in difficulty and encourage comprehension rather than memorization.
62
+ - Additionally, implement an instant feedback mechanism:
63
+ - When a user selects an answer, indicate whether it is correct or incorrect.
64
+ - If incorrect, provide a brief explanation from the document to guide learning.
65
+ - Ensure responses are concise and educational to enhance understanding.
66
+
67
+ Output Example:
68
+ 1. Fill in the blank: The LLM Agent framework has a central decision-making unit called the _______________________.
69
+
70
+ Answer: Agent Core
71
+
72
+ Feedback: The Agent Core is the central component of the LLM Agent framework, responsible for managing goals, tool instructions, planning modules, memory integration, and agent persona.
73
+
74
+ 2. What is the main limitation of LLM-based applications?
75
+ a) Limited token capacity
76
+ b) Lack of domain expertise
77
+ c) Prone to hallucination
78
+ d) All of the above
79
+
80
+ Answer: d) All of the above
81
+
82
+ Feedback: LLM-based applications have several limitations, including limited token capacity, lack of domain expertise, and being prone to hallucination, among others.
83
+
84
+ """
85
+
86
+ # Function to clean AI response by removing unwanted formatting
87
+ def clean_response(response):
88
+ """Removes <think> tags, asterisks, and markdown formatting."""
89
+ cleaned_text = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL)
90
+ cleaned_text = re.sub(r"(\*\*|\*|\[|\]|\\n)", "", cleaned_text)
91
+ cleaned_text = re.sub(r"^##+\s*", "", cleaned_text, flags=re.MULTILINE)
92
+ cleaned_text = re.sub(r"\\", "", cleaned_text)
93
+ cleaned_text = re.sub(r"---", "", cleaned_text)
94
+ return cleaned_text.strip()
95
+
96
+ # Function to generate quiz based on content
97
+ def generate_quiz(content):
98
+ prompt = f"{quiz_prompt}\n\nDocument content:\n{content}"
99
+ response = chat_model([HumanMessage(content=prompt)])
100
+ cleaned_response = clean_response(response.content)
101
+ return cleaned_response
102
+
103
+ # Function to retrieve relevant documents from vectorstore based on user query
104
+ def retrieve_documents(query):
105
+ results = vectorstore.similarity_search(query, k=3)
106
+ return [doc.page_content for doc in results]
107
+
108
+ # Function to check content in vector store
109
+ def check_vectorstore():
110
+ # Check the content of vectorstore by retrieving some documents
111
+ results = vectorstore.similarity_search("test", k=3)
112
+ return [doc.page_content for doc in results]
113
+
114
+ # RAG Function: Retrieve context and generate response based on context and query
115
+ def rag_query_handler(user_input):
116
+ try:
117
+ # Retrieve relevant documents for additional context (RAG - retrieval-augmented generation)
118
+ relevant_docs = retrieve_documents(user_input)
119
+ context = "\n".join(relevant_docs) if relevant_docs else "No relevant documents found."
120
+
121
+ # Combine the context with the user input and conversation history for the final prompt
122
+ system_prompt = "You are a helpful AI assistant. Answer questions accurately and concisely."
123
+ conversation_history = "\n".join(chat_memory[-10:]) # Keep the last 10 exchanges
124
+ prompt = f"{system_prompt}\n\nConversation History:\n{conversation_history}\n\nUser Input: {user_input}\n\nContext:\n{context}"
125
+
126
+ # Call the chat model for RAG generation (Retrieve + Generate)
127
+ response = chat_model([HumanMessage(content=prompt)])
128
+
129
+ # Clean response to remove any unwanted formatting
130
+ cleaned_response_text = clean_response(response.content)
131
+
132
+ # Append conversation history for future queries
133
+ chat_memory.append(f"User: {user_input}")
134
+ chat_memory.append(f"AI: {cleaned_response_text}")
135
+
136
+ # Convert response to speech
137
+ audio_file = speech_playback(cleaned_response_text)
138
+
139
+ # Return both chat response and audio file path
140
+ return [(user_input, cleaned_response_text)], audio_file # Return as a tuple
141
+ except Exception as e:
142
+ return [("Error", str(e))], None
143
+
144
+ # Function to play response as speech using gTTS
145
+ def speech_playback(text):
146
+ try:
147
+ # Generate a unique filename for each audio file
148
+ unique_id = str(uuid.uuid4())
149
+ audio_file = f"output_audio_{unique_id}.mp3"
150
+
151
+ # Convert text to speech
152
+ tts = gtts.gTTS(text, lang='zh-CN')
153
+ tts.save(audio_file)
154
+
155
+ # Return the path to the audio file
156
+ return audio_file
157
+ except Exception as e:
158
+ print(f"Error in speech_playback: {e}")
159
+ return None
160
+
161
+ # Function to detect encoding safely
162
+ def detect_encoding(file_path):
163
+ try:
164
+ with open(file_path, "rb") as f:
165
+ raw_data = f.read(4096)
166
+ detected = chardet.detect(raw_data)
167
+ encoding = detected["encoding"]
168
+ return encoding if encoding else "utf-8"
169
+ except Exception:
170
+ return "utf-8"
171
+
172
+ # Function to extract text from PDF
173
+ def extract_text_from_pdf(pdf_path):
174
+ try:
175
+ doc = fitz.open(pdf_path)
176
+ text = "\n".join([page.get_text("text") for page in doc])
177
+ return text if text.strip() else "No extractable text found."
178
+ except Exception as e:
179
+ return f"Error extracting text from PDF: {str(e)}"
180
+
181
+ # Function to extract text from Word files (.docx)
182
+ def extract_text_from_docx(docx_path):
183
+ try:
184
+ doc = docx.Document(docx_path)
185
+ text = "\n".join([para.text for para in doc.paragraphs])
186
+ return text if text.strip() else "No extractable text found."
187
+ except Exception as e:
188
+ return f"Error extracting text from Word document: {str(e)}"
189
+
190
+ # Function to extract text from PowerPoint files (.pptx)
191
+ def extract_text_from_pptx(pptx_path):
192
+ try:
193
+ presentation = Presentation(pptx_path)
194
+ text = ""
195
+ for slide in presentation.slides:
196
+ for shape in slide.shapes:
197
+ if hasattr(shape, "text"):
198
+ text += shape.text + "\n"
199
+ return text if text.strip() else "No extractable text found."
200
+ except Exception as e:
201
+ return f"Error extracting text from PowerPoint: {str(e)}"
202
+
203
+ # Function to process documents safely
204
+ def process_document(file):
205
+ try:
206
+ file_extension = os.path.splitext(file.name)[-1].lower()
207
+ if file_extension in [".png", ".jpg", ".jpeg"]:
208
+ return "Error: Images cannot be processed for text extraction."
209
+ if file_extension == ".pdf":
210
+ content = extract_text_from_pdf(file.name)
211
+ elif file_extension == ".docx":
212
+ content = extract_text_from_docx(file.name)
213
+ elif file_extension == ".pptx":
214
+ content = extract_text_from_pptx(file.name)
215
+ else:
216
+ encoding = detect_encoding(file.name)
217
+ with open(file.name, "r", encoding=encoding, errors="replace") as f:
218
+ content = f.read()
219
+
220
+ # Split content into chunks for vector store indexing
221
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
222
+ documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
223
+
224
+ # Add documents to vectorstore
225
+ vectorstore.add_documents(documents)
226
+
227
+ # Check the content in vectorstore
228
+ vectorstore_content = check_vectorstore()
229
+
230
+ # Generate quiz based on document content
231
+ quiz = generate_quiz(content)
232
+
233
+ return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}\n\nVectorstore Content:\n{vectorstore_content}"
234
+ except Exception as e:
235
+ return f"Error processing document: {str(e)}"
236
+
237
+ # Create Gradio interface for uploading files and interacting with the model
238
+ def chatbot_interface():
239
+ with gr.Blocks() as demo:
240
+ with gr.Tab("Upload Document"):
241
+ with gr.Column():
242
+ file_input = gr.File(label="Upload Document")
243
+ submit_button = gr.Button("Submit")
244
+ result_output = gr.Textbox(label="Processed Output", interactive=False)
245
+ audio_output = gr.Audio(label="Generated Speech")
246
+
247
+ with gr.Tab("Chat with AI"):
248
+ with gr.Column():
249
+ user_input = gr.Textbox(label="Ask a Question")
250
+ chat_button = gr.Button("Ask")
251
+ chat_output = gr.Textbox(label="Chat Response", interactive=False)
252
+ audio_output = gr.Audio(label="Generated Speech")
253
+
254
+ submit_button.click(process_document, inputs=file_input, outputs=result_output)
255
+ chat_button.click(rag_query_handler, inputs=user_input, outputs=[chat_output, audio_output])
256
+
257
+ demo.launch()
258
+
259
+ # Run chatbot interface
260
+ chatbot_interface()
261