UKURIKIYEYEZU commited on
Commit
cff93e7
·
verified ·
1 Parent(s): 16ebf5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -48
app.py CHANGED
@@ -1,64 +1,262 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
27
 
28
- response = ""
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
41
 
 
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
  )
61
 
62
-
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
+ import os
2
+ from google.colab import userdata
3
 
4
+ groq_api_key= userdata.get('groq2')
5
+ from langchain_groq import ChatGroq
6
+ llm =ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key)
7
+
8
+ from langchain.prompts import ChatPromptTemplate, PromptTemplate
9
+ from langchain.output_parsers import ResponseSchema, StructuredOutputParser
10
+
11
+ import PyPDF2
12
+ # Initialize required components
13
+ TEMPLATE = """
14
+ You are a helpful agent. Your task is to generate a meaningful question and an answer using the following provided "{context}"
15
+
16
+ You MUST obey the following criteria:
17
+ - No preamble.
18
+ - Restrict the question to the context information provided and provide answer with its details in summary.
19
+ - Do NOT create a question that cannot be answered from the context.
20
+ - Phrase the question so that it does NOT refer to specific context.
21
+ - For instance, do NOT use phrases like 'given the provided context' or 'in this work' in the question or 'according to the text' in the answer because if the question is asked elsewhere it would not be provided specific context. Replace these terms with specific details.
22
+ - Please do NOT repeat the provided context.
23
+ - Please Only generate a question and an answer without any sentence in advance such as "Here is the generated question and answer:".
24
+ - Please follow the JSON recommended format below.
25
+ - Please ensure that the output is a valid JSON object.
26
+ {format_instructions}
27
  """
 
 
 
28
 
29
+ prompt = ChatPromptTemplate.from_template(template=TEMPLATE)
30
+ response_schemas = [
31
+ {"name": "Question", "description": "The generated question from the provided context"},
32
+ {"name": "Answer", "description": "The corresponding answer from the provided context"}
33
+ ]
34
+ output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
35
+ format_instructions = output_parser.get_format_instructions(only_json=True)
36
 
37
+ # Folder containing PDF files
38
+ folder_path = "/content/drive/MyDrive/Chatbot"
 
 
 
 
 
 
 
39
 
40
+ # List to store questions and answers as tuples
41
+ data = []
 
 
 
42
 
43
+ # Function to extract text from PDF
44
+ def extract_text_from_pdf(pdf_path):
45
+ with open(pdf_path, "rb") as file:
46
+ reader = PyPDF2.PdfReader(file)
47
+ text = ""
48
+ for page in reader.pages:
49
+ text += page.extract_text()
50
+ return text
51
 
52
+ # Process each PDF in the folder
53
+ for filename in os.listdir(folder_path):
54
+ if filename.endswith(".pdf"):
55
+ pdf_path = os.path.join(folder_path, filename)
56
+ try:
57
+ # Extract text from the PDF
58
+ context = extract_text_from_pdf(pdf_path)
59
 
60
+ # Split context into manageable chunks (optional)
61
+ chunks = [context[i:i+200] for i in range(0, len(context), 200)]
 
 
 
 
 
 
62
 
63
+ for chunk in chunks:
64
+ # Format the messages
65
+ messages = prompt.format_messages(context=chunk, format_instructions=format_instructions)
66
 
67
+ # Invoke the LLM
68
+ response = llm.invoke(messages)
69
 
70
+ # Parse the response
71
+ output_dict = output_parser.parse(response.content)
72
+
73
+ # Extract question and answer
74
+ question = output_dict["Question"]
75
+ answer = output_dict["Answer"]
76
+
77
+ # Append question and answer as a tuple to the list
78
+ data.append((question, answer))
79
+
80
+ except Exception as e:
81
+ print(f"Error processing file {filename}: {e}")
82
+
83
+ import PyPDF2
84
+
85
+ # Function to extract text from a PDF
86
+ def extract_text_from_pdf(pdf_path):
87
+ with open(pdf_path, 'rb') as file:
88
+ reader = PyPDF2.PdfReader(file)
89
+ text = ""
90
+ for page in reader.pages:
91
+ text += page.extract_text()
92
+ return text
93
+
94
+ # Function to chunk text into pieces of max_length
95
+ def chunk_text(text, max_length=500):
96
+ return [text[i:i + max_length] for i in range(0, len(text), max_length)]
97
+
98
+ # Specify the path to the PDF file
99
+ pdf_path = "/content/drive/MyDrive/LAW Nº 59 ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES.pdf"
100
+ # List to hold context data
101
+ context_data = []
102
+
103
+ try:
104
+ # Extract text from the PDF
105
+ pdf_text = extract_text_from_pdf(pdf_path)
106
+
107
+ if pdf_text:
108
+ # Create chunks of 500 characters
109
+ chunks = chunk_text(pdf_text, max_length=500)
110
+
111
+ # Add each chunk to context_data list as plain strings
112
+ context_data = [] # Initialize the list
113
+ for chunk in chunks:
114
+ context_data.append(chunk) # Save each chunk as a string
115
+
116
+ # Print the context_data list
117
+ for entry in context_data:
118
+ print(entry)
119
+ print("-" * 40) # Separator for readability
120
+ else:
121
+ print("No text found in the PDF.")
122
+ except Exception as e:
123
+ print(f"Error reading the PDF: {e}")
124
+
125
+ context_data.extend(data)
126
+
127
+ processed_texts = []
128
+
129
+ for element in context_data:
130
+ if isinstance(element, tuple):
131
+ question, answer = element
132
+ processed_texts.append(f"Question: {question} Answer: {answer}")
133
+ elif isinstance(element, str):
134
+
135
+ processed_texts.append(element)
136
+ else:
137
+
138
+ processed_texts.append(str(element))
139
+
140
+ ## Embedding model!
141
+ from langchain_huggingface import HuggingFaceEmbeddings
142
+ embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")
143
+
144
+ from google.colab import drive
145
+ drive.mount('/content/drive')
146
+
147
+ # create vector store!
148
+ from langchain_chroma import Chroma
149
+
150
+ vectorstore = Chroma(
151
+ collection_name="laws_dataset", # Changed the name to be compliant
152
+ embedding_function=embed_model,
153
+ persist_directory="./",
154
+ )
155
+
156
+ vectorstore.get().keys()
157
+
158
+ # add data to vector nstore
159
+ vectorstore.add_texts(processed_texts)
160
+
161
+ from langchain_core.prompts import PromptTemplate
162
+
163
+
164
+ template = ("""You are a legal expert specializing in providing precise and reliable legal assistance.
165
+ Use the provided legal context to answer the question with clear and accurate legal advice.
166
+ If the context is irrelevant or insufficient, state so concisely without elaboration.
167
+ Do not discuss or analyze the context unless absolutely necessary for clarity.
168
+ Ensure your response is professional, detailed in summary and rooted in legal reasoning
169
+
170
+ Legal Context: {context}
171
+
172
+ Question: {question}
173
+
174
+ Legal Advice:""")
175
+
176
+
177
+ rag_prompt = PromptTemplate.from_template(template)
178
+
179
+ retriever = vectorstore.as_retriever()
180
+
181
+ from langchain_core.output_parsers import StrOutputParser
182
+ from langchain_core.runnables import RunnablePassthrough
183
+
184
+ rag_chain = (
185
+ {"context": retriever, "question": RunnablePassthrough()}
186
+ | rag_prompt
187
+ | llm
188
+ | StrOutputParser()
189
+ )
190
+
191
+ import gradio as gr
192
+
193
+ def rag_memory_stream(message, history):
194
+ partial_text = ""
195
+ for new_text in rag_chain.stream(message): # Replace with actual streaming logic
196
+ partial_text += new_text
197
+ yield partial_text
198
+
199
+ # Correctly define examples as a list
200
+ examples = [
201
+ ["What is the main purpose of Law Nº 59/2018 of 22/8/2018?"]
202
+ ]
203
+
204
+ description = (
205
+ "This Regal AI Assistance specializes in LAW Nº 59/2018 OF 22/8/2018 "
206
+ "ON THE CRIME OF GENOCIDE IDEOLOGY AND RELATED CRIMES."
207
+ )
208
+
209
+ title = "⚖️ Chat with me and learn Laws! ⚖️"
210
+
211
+ # Custom CSS for styling the interface
212
+ custom_css = """
213
+ body {
214
+ background-color: black;
215
+ color: white;
216
+ font-family: "Times New Roman", serif;
217
+ }
218
+ .gradio-container {
219
+ font-family: "Times New Roman", serif;
220
+ color: white;
221
+ }
222
+ .gr-chatbot {
223
+ background-color: #222; /* Dark background for chatbot */
224
+ border: 1px solid #555;
225
+ border-radius: 10px;
226
+ padding: 10px;
227
+ margin-bottom: 20px;
228
+ }
229
+ .gr-textbox {
230
+ background-color: #333; /* Slightly lighter than background */
231
+ color: white;
232
+ border: 1px solid #555;
233
+ border-radius: 5px;
234
+ }
235
+ .gr-button {
236
+ background-color: #007bff; /* Blue button */
237
+ color: white;
238
+ border: none;
239
+ border-radius: 5px;
240
+ font-size: 16px;
241
+ padding: 10px 20px;
242
+ cursor: pointer;
243
+ }
244
+ .gr-button:hover {
245
+ background-color: #0056b3; /* Darker blue on hover */
246
+ }
247
  """
248
+
249
+ # Create the Chat Interface
250
  demo = gr.ChatInterface(
251
+ fn=rag_memory_stream,
252
+ type="messages",
253
+ title=title,
254
+ description=description,
255
+ fill_height=True,
256
+ examples=examples, # Pass the corrected examples list
257
+ theme="soft",
258
+ css=custom_css, # Apply the custom CSS
 
 
 
 
 
259
  )
260
 
 
261
  if __name__ == "__main__":
262
+ demo.launch(share=True, inbrowser=True, height=800, width="100%")