Spaces:
Sleeping
Sleeping
initial commit
Browse files- .gitattributes +1 -0
- LICENSE +21 -0
- chainlit.md +4 -0
- data/geo-10-3-notes.pdf +3 -0
- data/geo-10/geo-10-1.pdf +3 -0
- data/geo-10/geo-chapter-list.json +64 -0
- data/jess1ps_merged.pdf +3 -0
- index.faiss +0 -0
- index.pkl +3 -0
- ingest.py +30 -0
- learnai.py +148 -0
- learnai_chainlit.py +25 -0
- llm_loader.py +104 -0
- model.py +140 -0
- refbooks-vectorstore/geo-10-1/index.faiss +0 -0
- refbooks-vectorstore/geo-10-1/index.pkl +3 -0
- requirements.txt +13 -0
- testing.ipynb +68 -0
- vectorstore/db_faiss/geo-10-3/index.faiss +0 -0
- vectorstore/db_faiss/geo-10-3/index.pkl +3 -0
- vectorstore/db_faiss/geo-10-whole_book/index.faiss +0 -0
- vectorstore/db_faiss/geo-10-whole_book/index.pkl +3 -0
- vectorstore/db_faiss/index.faiss +0 -0
- vectorstore/db_faiss/index.pkl +3 -0
- word_docs/Geography-10/geo-10-1.docx +0 -0
- word_docs/testing.ipynb +68 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 AI Anytime
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
chainlit.md
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Welcome to Llama2 Med-Bot! 🚀🤖
|
2 |
+
|
3 |
+
Hi there, 👋 We're excited to have you on board. This is a powerful bot designed to help you ask queries related to your data/knowledge.
|
4 |
+
|
data/geo-10-3-notes.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e627281b6bad3fc951001e4740c976e5916255758c87d6d2eea0f5f5c7adf29
|
3 |
+
size 220267
|
data/geo-10/geo-10-1.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f869020fed2bd143a8044316fd9071c9b194714ca3b19da19b7f80a577f77c3
|
3 |
+
size 228700
|
data/geo-10/geo-chapter-list.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"chapter_number": 1,
|
4 |
+
"chapter_name": "Resources and Development",
|
5 |
+
"subtopics": [
|
6 |
+
"Definition of Resource",
|
7 |
+
"Transformation Process in the Environment",
|
8 |
+
"Classification of Resources",
|
9 |
+
"Development of Resources",
|
10 |
+
"Problems Due to Indiscriminate Resource Use",
|
11 |
+
"Importance of Equitable Resource Distribution",
|
12 |
+
"Resource Planning",
|
13 |
+
"Sustainable Development",
|
14 |
+
"Rio de Janeiro Earth Summit, 1992",
|
15 |
+
"Agenda 21",
|
16 |
+
"Resource Planning in India",
|
17 |
+
"Factors in Resource Planning",
|
18 |
+
"Role of Technology and Institutions in Resource Development",
|
19 |
+
"Conservation of Resources",
|
20 |
+
"Historical Perspectives on Resource Conservation",
|
21 |
+
"Land Resources Importance",
|
22 |
+
"Land Utilization Types",
|
23 |
+
"Land Use Pattern in India",
|
24 |
+
"Forest Area Concerns",
|
25 |
+
"Land Degradation and Conservation Measures",
|
26 |
+
"Soil as a Resource and Its Formation",
|
27 |
+
"Classification of Soils",
|
28 |
+
"Alluvial Soils",
|
29 |
+
"Black Soil",
|
30 |
+
"Red and Yellow Soils",
|
31 |
+
"Laterite Soil",
|
32 |
+
"Arid Soils",
|
33 |
+
"Forest Soils",
|
34 |
+
"Soil Erosion and Conservation Methods"
|
35 |
+
]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"chapter_number": 2,
|
39 |
+
"chapter_name": "In-Depth Analysis",
|
40 |
+
"subtopics": [
|
41 |
+
"Detailed Aspect 1",
|
42 |
+
"Detailed Aspect 2",
|
43 |
+
"Case Studies on the Topic"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"chapter_number": 3,
|
48 |
+
"chapter_name": "Advanced Applications",
|
49 |
+
"subtopics": [
|
50 |
+
"Advanced Concept 1",
|
51 |
+
"Advanced Concept 2",
|
52 |
+
"Future Trends in the Field"
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"chapter_number": 4,
|
57 |
+
"chapter_name": "Conclusion and Summary",
|
58 |
+
"subtopics": [
|
59 |
+
"Recap of Key Points",
|
60 |
+
"Final Thoughts",
|
61 |
+
"Further Resources"
|
62 |
+
]
|
63 |
+
}
|
64 |
+
]
|
data/jess1ps_merged.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:057d8de0e37b5acc1975629794a147b54d42851f43d5e4cb3c9dec68c6428513
|
3 |
+
size 16202519
|
index.faiss
ADDED
Binary file (702 kB). View file
|
|
index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdce5bc50acb24703cf60f20d7b36783c00758713025be3bf803cc7430127d61
|
3 |
+
size 252285
|
ingest.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
+
from langchain.vectorstores import FAISS
|
3 |
+
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter, SpacyTextSplitter
|
5 |
+
|
6 |
+
DATA_PATH = 'data/geo-10'
|
7 |
+
DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
|
8 |
+
|
9 |
+
# Create vector database
|
10 |
+
|
11 |
+
|
12 |
+
def create_vector_db():
|
13 |
+
loader = DirectoryLoader(DATA_PATH,
|
14 |
+
glob='geo-10-1.pdf',
|
15 |
+
loader_cls=PyPDFLoader)
|
16 |
+
|
17 |
+
documents = loader.load()
|
18 |
+
text_splitter = SpacyTextSplitter(chunk_size=500,
|
19 |
+
chunk_overlap=50)
|
20 |
+
texts = text_splitter.split_documents(documents)
|
21 |
+
|
22 |
+
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
|
23 |
+
model_kwargs={'device': 'cpu'})
|
24 |
+
|
25 |
+
db = FAISS.from_documents(texts, embeddings)
|
26 |
+
db.save_local(DB_FAISS_PATH)
|
27 |
+
|
28 |
+
|
29 |
+
if __name__ == "__main__":
|
30 |
+
create_vector_db()
|
learnai.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import random
|
3 |
+
import time
|
4 |
+
import json
|
5 |
+
|
6 |
+
from llm_loader import get_llm_response
|
7 |
+
|
8 |
+
from langchain.prompts import PromptTemplate
|
9 |
+
|
10 |
+
age = 15
|
11 |
+
name = 'Fidva'
|
12 |
+
grade = 10
|
13 |
+
context = '{context}'
|
14 |
+
question = '{question}'
|
15 |
+
|
16 |
+
DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
|
17 |
+
JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'
|
18 |
+
|
19 |
+
|
20 |
+
with open(JSON_FILE_PATH, 'r') as file:
|
21 |
+
book_data = json.load(file)
|
22 |
+
|
23 |
+
chapter_data = book_data[0]
|
24 |
+
subtopics = chapter_data['subtopics']
|
25 |
+
|
26 |
+
######## Streamlit App ########
|
27 |
+
|
28 |
+
st.title("Learn.AI")
|
29 |
+
|
30 |
+
# Add custom CSS properties
|
31 |
+
st.markdown(
|
32 |
+
"""
|
33 |
+
<style>
|
34 |
+
@import url('https://fonts.googleapis.com/css2?family=Sometype+Mono:wght@400;500;600;700&display=swap');
|
35 |
+
|
36 |
+
:root {
|
37 |
+
--color-bg: #03045E;
|
38 |
+
--color-footer: #0077B6;
|
39 |
+
--color-component: #00B4D8;
|
40 |
+
--font-family: 'Sometype Mono', sans-serif;
|
41 |
+
--font-color: #FAFAFA;
|
42 |
+
}
|
43 |
+
body, .stTextArea textarea {
|
44 |
+
font-family: var(--font-family); !important;
|
45 |
+
background-color: var(--color-bg);
|
46 |
+
color: var(--font-color); !important;
|
47 |
+
}
|
48 |
+
h1, p, li {
|
49 |
+
font-family: var(--font-family);
|
50 |
+
color: var(--font-color);
|
51 |
+
}
|
52 |
+
textarea {
|
53 |
+
font-family: var(--font-family);
|
54 |
+
color: var(--font-color);
|
55 |
+
background-color: #FAFAFA;
|
56 |
+
}
|
57 |
+
.stApp {
|
58 |
+
background-color: var(--color-bg);
|
59 |
+
}
|
60 |
+
.stChatMessage:nth-child(even) .stMarkdown {
|
61 |
+
background-color: var(--color-bg);
|
62 |
+
}
|
63 |
+
.stChatFloatingInputContainer {
|
64 |
+
background-color: var(--color-bg);
|
65 |
+
color: var(--font-color);
|
66 |
+
}
|
67 |
+
.st-emotion-cache-1avcm0n ezrtsby2 {
|
68 |
+
background-color: #050647;
|
69 |
+
}
|
70 |
+
.st-emotion-cache-10trblm {
|
71 |
+
--font-family: 'Sometype Mono', sans-serif;
|
72 |
+
}
|
73 |
+
.st-emotion-cache-nahz7x {
|
74 |
+
--font-family: 'Sometype Mono', sans-serif;
|
75 |
+
}
|
76 |
+
</style>
|
77 |
+
""",
|
78 |
+
unsafe_allow_html=True
|
79 |
+
)
|
80 |
+
|
81 |
+
|
82 |
+
# Initialize chat history
|
83 |
+
if "messages" not in st.session_state:
|
84 |
+
st.session_state.messages = []
|
85 |
+
|
86 |
+
# Display chat messages from history on app rerun
|
87 |
+
for message in st.session_state.messages:
|
88 |
+
with st.chat_message(message["role"]):
|
89 |
+
st.markdown(message["content"])
|
90 |
+
|
91 |
+
if "lesson_count" not in st.session_state:
|
92 |
+
st.session_state.lesson_count = 0
|
93 |
+
# GreetingMessage = f'Hi there, {name}! Let\'s start the lesson! Type \'start\' when you\'re ready to begin!'
|
94 |
+
# st.session_state.messages.append({"role": "assistant", "content": GreetingMessage})
|
95 |
+
|
96 |
+
# Accept user input
|
97 |
+
if prompt := st.chat_input(f'Hi there, {name}! Let\'s start the lesson! Type \'start\' when you\'re ready to begin!'):
|
98 |
+
print("TOPIC NAME:",subtopics[st.session_state.lesson_count])
|
99 |
+
print("USER:",prompt)
|
100 |
+
if prompt.lower() =='start' or prompt.lower()=='continue' or prompt.lower()=='next':
|
101 |
+
print("Topic Name:", subtopics[st.session_state.lesson_count])
|
102 |
+
# Display assistant response in chat message container
|
103 |
+
with st.chat_message("assistant"):
|
104 |
+
placeholder_list = ['Writing notes...', 'Revising topic...', 'Clearing blackboard...', 'Formulating Lesson Plan...', 'Getting ready for doubts...']
|
105 |
+
placeholder_text = random.choice(placeholder_list)
|
106 |
+
with st.spinner(placeholder_text):
|
107 |
+
assistant_response = get_llm_response(subtopics[st.session_state.lesson_count], template_type='lesson')
|
108 |
+
st.session_state.lesson_count += 1
|
109 |
+
message_placeholder = st.empty()
|
110 |
+
full_response = ""
|
111 |
+
|
112 |
+
# Simulate stream of response with milliseconds delay\
|
113 |
+
print(assistant_response)
|
114 |
+
for chunk in assistant_response.split():
|
115 |
+
full_response += chunk + " "
|
116 |
+
time.sleep(0.05)
|
117 |
+
# Add a blinking cursor to simulate typing
|
118 |
+
message_placeholder.markdown(full_response + "▌")
|
119 |
+
message_placeholder.markdown(assistant_response)
|
120 |
+
|
121 |
+
st.session_state.messages.append({"role": "assistant", "content": assistant_response})
|
122 |
+
|
123 |
+
else:
|
124 |
+
# Add user message to chat history
|
125 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
126 |
+
# Display user message in chat message container
|
127 |
+
with st.chat_message("user"):
|
128 |
+
st.markdown(prompt)
|
129 |
+
|
130 |
+
# Display assistant response in chat message container
|
131 |
+
with st.chat_message("assistant"):
|
132 |
+
placeholder_list = ['Thinking...', 'Reading Textbook...', 'Clearing blackboard...', 'Revising Topics...', 'Refilling pen...']
|
133 |
+
placeholder_text = random.choice(placeholder_list)
|
134 |
+
with st.spinner(placeholder_text):
|
135 |
+
assistant_response = get_llm_response(prompt, template_type='user')
|
136 |
+
message_placeholder = st.empty()
|
137 |
+
full_response = ""
|
138 |
+
|
139 |
+
# Simulate stream of response with milliseconds delay\
|
140 |
+
print(assistant_response)
|
141 |
+
for chunk in assistant_response.split():
|
142 |
+
full_response += chunk + " "
|
143 |
+
time.sleep(0.05)
|
144 |
+
# Add a blinking cursor to simulate typing
|
145 |
+
message_placeholder.markdown(full_response + "▌")
|
146 |
+
message_placeholder.markdown(assistant_response)
|
147 |
+
# Add assistant response to chat history
|
148 |
+
st.session_state.messages.append({"role": "assistant", "content": assistant_response})
|
learnai_chainlit.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import chainlit as cl
|
3 |
+
|
4 |
+
from llm_loader import get_llm_response
|
5 |
+
|
6 |
+
from langchain.prompts import PromptTemplate
|
7 |
+
|
8 |
+
|
9 |
+
age = 15
|
10 |
+
name = "Fidva"
|
11 |
+
grade = 10
|
12 |
+
context = '{context}'
|
13 |
+
question = '{question}'
|
14 |
+
|
15 |
+
DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
|
16 |
+
JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'
|
17 |
+
|
18 |
+
# Load book data
|
19 |
+
with open(JSON_FILE_PATH, 'r') as file:
|
20 |
+
book_data = json.load(file)
|
21 |
+
|
22 |
+
chapter_data = book_data[0]
|
23 |
+
subtopics = chapter_data['subtopics']
|
24 |
+
|
25 |
+
# Chainlit App
|
llm_loader.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
+
from langchain.vectorstores import FAISS
|
3 |
+
from langchain.llms import CTransformers
|
4 |
+
from langchain.chains import RetrievalQA
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
7 |
+
|
8 |
+
name = 'Fidva'
|
9 |
+
age = 15
|
10 |
+
grade = 10
|
11 |
+
context = '{context}'
|
12 |
+
question = '{question}'
|
13 |
+
|
14 |
+
DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
|
15 |
+
|
16 |
+
|
17 |
+
def get_llm_response(query, template_type):
|
18 |
+
|
19 |
+
if template_type == 'user':
|
20 |
+
user_template = """Use the following pieces of information to answer the user's question in a friendly way.
|
21 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer. Also refer to the user by their name, and keep in mind their age while answering the question.
|
22 |
+
|
23 |
+
Name of user: {name}
|
24 |
+
Age of user: {age}
|
25 |
+
Grade of user: {grade}
|
26 |
+
Context: {context}
|
27 |
+
Question: {question}
|
28 |
+
|
29 |
+
Return the Helpful Answer, and then also give the user a Knowledge Check Question related to what he just asked.
|
30 |
+
Returning the helpful answer is a must and takes higher priority.
|
31 |
+
|
32 |
+
Helpful answer:
|
33 |
+
"""
|
34 |
+
unformatted_prompt_template = PromptTemplate.from_template(
|
35 |
+
user_template)
|
36 |
+
|
37 |
+
elif template_type == 'lesson':
|
38 |
+
# lesson_template = """Teach the given topic in accordance with the content below to the user in a friendly way, while keeping in mind the user's age and his grade.
|
39 |
+
# Name of user: {name}
|
40 |
+
# Age of user: {age}
|
41 |
+
# Grade of user: {grade}
|
42 |
+
# Content: {context}
|
43 |
+
# Topic: {question}
|
44 |
+
# """
|
45 |
+
lesson_template = """Hello {name}! Let's dive into the topic of {question} together.
|
46 |
+
|
47 |
+
As a {grade}th grader at {age} years old, it's great to explore this subject!
|
48 |
+
|
49 |
+
Let's start by understanding the context:
|
50 |
+
|
51 |
+
{context}
|
52 |
+
|
53 |
+
Now, to grasp this topic better, here are some key points to consider:
|
54 |
+
|
55 |
+
- Explain the fundamental concept or idea related to {question}.
|
56 |
+
|
57 |
+
- Provide examples or illustrations to make it easier to comprehend.
|
58 |
+
|
59 |
+
- Share any real-life applications or relevance of this topic.
|
60 |
+
|
61 |
+
Feel free to ask if you have any questions along the way. Let's learn together!
|
62 |
+
"""
|
63 |
+
unformatted_prompt_template = PromptTemplate.from_template(
|
64 |
+
lesson_template)
|
65 |
+
|
66 |
+
prompt_template = unformatted_prompt_template.format(
|
67 |
+
name=name,
|
68 |
+
age=age,
|
69 |
+
grade=grade,
|
70 |
+
context=context,
|
71 |
+
question=question
|
72 |
+
)
|
73 |
+
|
74 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
|
75 |
+
model_kwargs={'device': 'cpu'})
|
76 |
+
db = FAISS.load_local(DB_FAISS_PATH, embeddings)
|
77 |
+
|
78 |
+
# Load the locally downloaded model here
|
79 |
+
llm = CTransformers(
|
80 |
+
model="TheBloke/Llama-2-7B-Chat-GGML",
|
81 |
+
model_type="llama",
|
82 |
+
callbacks=[StreamingStdOutCallbackHandler()],
|
83 |
+
config={
|
84 |
+
'context_length': 4096,
|
85 |
+
'temperature': 0.1,
|
86 |
+
'max_new_tokens': 512,
|
87 |
+
},
|
88 |
+
)
|
89 |
+
|
90 |
+
qa_prompt = PromptTemplate(template=prompt_template,
|
91 |
+
input_variables=['context', 'question'])
|
92 |
+
|
93 |
+
print(qa_prompt)
|
94 |
+
qa_result = RetrievalQA.from_chain_type(llm=llm,
|
95 |
+
chain_type='stuff',
|
96 |
+
retriever=db.as_retriever(
|
97 |
+
search_kwargs={'k': 1}),
|
98 |
+
return_source_documents=True,
|
99 |
+
chain_type_kwargs={
|
100 |
+
'prompt': qa_prompt},
|
101 |
+
)
|
102 |
+
|
103 |
+
response = qa_result({'query': query})
|
104 |
+
return response['result']
|
model.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
+
from langchain.vectorstores import FAISS
|
3 |
+
from langchain.llms import CTransformers
|
4 |
+
from langchain.chains import RetrievalQA
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
|
7 |
+
import chainlit as cl
|
8 |
+
|
9 |
+
age = 15
|
10 |
+
name = 'Fidva'
|
11 |
+
grade = 10
|
12 |
+
context = '{context}'
|
13 |
+
question = '{question}'
|
14 |
+
|
15 |
+
DB_FAISS_PATH = 'vectorstore/db_faiss/geo-10-whole_book'
|
16 |
+
# OPENAI_API_KEY = "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"
|
17 |
+
|
18 |
+
custom_prompt_template = """Use the following pieces of information to answer the user's question in a friendly way.
|
19 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer. Also refer to the user by their name, and keep in mind their age while answering the question.
|
20 |
+
|
21 |
+
Name of user: {name}
|
22 |
+
Age of user: {age}
|
23 |
+
Grade of user: {grade}
|
24 |
+
Context: {context}
|
25 |
+
Question: {question}
|
26 |
+
|
27 |
+
Return the Helpful Answer, and then also give the user a Knowledge Check Question related to what he just asked.
|
28 |
+
Returning the helpful answer is a must and takes higher priority.
|
29 |
+
|
30 |
+
Helpful answer:
|
31 |
+
"""
|
32 |
+
|
33 |
+
custom_prompt_template = PromptTemplate.from_template(custom_prompt_template)
|
34 |
+
formatted_prompt = custom_prompt_template.format(
|
35 |
+
name=name, age=age, grade=grade, context=context, question=question)
|
36 |
+
|
37 |
+
|
38 |
+
def set_custom_prompt():
|
39 |
+
"""
|
40 |
+
Prompt template for QA retrieval for each vectorstore
|
41 |
+
"""
|
42 |
+
prompt = PromptTemplate(template=formatted_prompt,
|
43 |
+
input_variables=['context', 'question'])
|
44 |
+
return prompt
|
45 |
+
|
46 |
+
# Retrieval QA Chain
|
47 |
+
|
48 |
+
|
49 |
+
def retrieval_qa_chain(llm, prompt, db):
|
50 |
+
qa_chain = RetrievalQA.from_chain_type(llm=llm,
|
51 |
+
chain_type='stuff',
|
52 |
+
retriever=db.as_retriever(
|
53 |
+
search_kwargs={'k': 1}),
|
54 |
+
return_source_documents=True,
|
55 |
+
chain_type_kwargs={'prompt': prompt}
|
56 |
+
)
|
57 |
+
return qa_chain
|
58 |
+
|
59 |
+
# Loading the model
|
60 |
+
|
61 |
+
|
62 |
+
def load_llm():
|
63 |
+
|
64 |
+
# config = AutoConfig.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML")
|
65 |
+
# config.max_seq_len = 4096
|
66 |
+
# config.max_answer_len = 1024
|
67 |
+
|
68 |
+
# Load the locally downloaded model here
|
69 |
+
llm = CTransformers(
|
70 |
+
model="TheBloke/Llama-2-7B-Chat-GGML",
|
71 |
+
# model = "zephyr-7b-beta.Q5_K_S.gguf",
|
72 |
+
model_type="llama",
|
73 |
+
config={
|
74 |
+
'context_length': 4096,
|
75 |
+
'temperature': 0.3,
|
76 |
+
'max_new_tokens': 512,
|
77 |
+
},
|
78 |
+
)
|
79 |
+
|
80 |
+
# llm = AutoModelForCausalLM.from_pretrained(
|
81 |
+
# "TheBloke/Llama-2-7B-Chat-GGML",
|
82 |
+
# model_type="llama",
|
83 |
+
# config=config,
|
84 |
+
# temperature=0.5
|
85 |
+
# )
|
86 |
+
|
87 |
+
return llm
|
88 |
+
|
89 |
+
# QA Model Function
|
90 |
+
|
91 |
+
|
92 |
+
def qa_bot():
|
93 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
|
94 |
+
model_kwargs={'device': 'cpu'})
|
95 |
+
db = FAISS.load_local(DB_FAISS_PATH, embeddings)
|
96 |
+
llm = load_llm()
|
97 |
+
qa_prompt = set_custom_prompt()
|
98 |
+
qa = retrieval_qa_chain(llm, qa_prompt, db)
|
99 |
+
|
100 |
+
return qa
|
101 |
+
|
102 |
+
# output function
|
103 |
+
|
104 |
+
|
105 |
+
def final_result(query):
|
106 |
+
qa_result = qa_bot()
|
107 |
+
response = qa_result({'query': query})
|
108 |
+
return response
|
109 |
+
|
110 |
+
# chainlit code
|
111 |
+
|
112 |
+
|
113 |
+
@cl.on_chat_start
|
114 |
+
async def start():
|
115 |
+
chain = qa_bot()
|
116 |
+
msg = cl.Message(content="Starting the bot...")
|
117 |
+
await msg.send()
|
118 |
+
msg.content = "Hi, Welcome to Geo Bot. What is your query?"
|
119 |
+
await msg.update()
|
120 |
+
|
121 |
+
cl.user_session.set("chain", chain)
|
122 |
+
|
123 |
+
|
124 |
+
@cl.on_message
|
125 |
+
async def main(message: cl.Message):
|
126 |
+
chain = cl.user_session.get("chain")
|
127 |
+
cb = cl.AsyncLangchainCallbackHandler(
|
128 |
+
stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
|
129 |
+
)
|
130 |
+
cb.answer_reached = True
|
131 |
+
res = await chain.acall(message.content, callbacks=[cb])
|
132 |
+
answer = res["result"]
|
133 |
+
sources = res["source_documents"]
|
134 |
+
|
135 |
+
if sources:
|
136 |
+
answer += f"\nSources:" + str(sources)
|
137 |
+
else:
|
138 |
+
answer += "\nNo sources found"
|
139 |
+
|
140 |
+
await cl.Message(content=answer).send()
|
refbooks-vectorstore/geo-10-1/index.faiss
ADDED
Binary file (84.5 kB). View file
|
|
refbooks-vectorstore/geo-10-1/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:139f9355d0fdc3c31b2f4f35fe7de6bf92f01af1349dfefb381571b4aee6a836
|
3 |
+
size 29329
|
requirements.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accelerate==0.25.0
|
2 |
+
bitsandbytes==0.41.2.post2
|
3 |
+
chainlit==0.7.700
|
4 |
+
ctransformers==0.2.27
|
5 |
+
exceptiongroup==1.2.0
|
6 |
+
faiss-cpu==1.7.4
|
7 |
+
langchain==0.0.345
|
8 |
+
openai==1.3.7
|
9 |
+
pickleshare==0.7.5
|
10 |
+
pip-chill==1.0.3
|
11 |
+
pypdf==3.17.1
|
12 |
+
sentence-transformers==2.2.2
|
13 |
+
streamlit==1.29.0
|
testing.ipynb
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import json"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 15,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [
|
17 |
+
{
|
18 |
+
"name": "stdout",
|
19 |
+
"output_type": "stream",
|
20 |
+
"text": [
|
21 |
+
"<class 'list'>\n",
|
22 |
+
"<class 'dict'>\n",
|
23 |
+
"{'chapter_number': 1, 'chapter_name': 'Resources and Development', 'subtopics': ['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']}\n",
|
24 |
+
"['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']\n"
|
25 |
+
]
|
26 |
+
}
|
27 |
+
],
|
28 |
+
"source": [
|
29 |
+
"DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'\n",
|
30 |
+
"JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'\n",
|
31 |
+
"\n",
|
32 |
+
"with open(JSON_FILE_PATH, 'r') as file:\n",
|
33 |
+
" book_data = json.load(file)\n",
|
34 |
+
" print(type(book_data))\n",
|
35 |
+
" \n",
|
36 |
+
"chapter_data = book_data[0]\n",
|
37 |
+
"print(type(chapter_data))\n",
|
38 |
+
"print(chapter_data)\n",
|
39 |
+
"\n",
|
40 |
+
"chapter_name = chapter_data['chapter_name']\n",
|
41 |
+
"subtopics = chapter_data['subtopics']\n",
|
42 |
+
"\n",
|
43 |
+
"print(subtopics)"
|
44 |
+
]
|
45 |
+
}
|
46 |
+
],
|
47 |
+
"metadata": {
|
48 |
+
"kernelspec": {
|
49 |
+
"display_name": "Python 3",
|
50 |
+
"language": "python",
|
51 |
+
"name": "python3"
|
52 |
+
},
|
53 |
+
"language_info": {
|
54 |
+
"codemirror_mode": {
|
55 |
+
"name": "ipython",
|
56 |
+
"version": 3
|
57 |
+
},
|
58 |
+
"file_extension": ".py",
|
59 |
+
"mimetype": "text/x-python",
|
60 |
+
"name": "python",
|
61 |
+
"nbconvert_exporter": "python",
|
62 |
+
"pygments_lexer": "ipython3",
|
63 |
+
"version": "3.11.5"
|
64 |
+
}
|
65 |
+
},
|
66 |
+
"nbformat": 4,
|
67 |
+
"nbformat_minor": 2
|
68 |
+
}
|
vectorstore/db_faiss/geo-10-3/index.faiss
ADDED
Binary file (21.5 kB). View file
|
|
vectorstore/db_faiss/geo-10-3/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c6a4d112990968a29d9e16897bd5b5a166acec3a894df2daf10419a310c6d72
|
3 |
+
size 7312
|
vectorstore/db_faiss/geo-10-whole_book/index.faiss
ADDED
Binary file (702 kB). View file
|
|
vectorstore/db_faiss/geo-10-whole_book/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b03d45e671070a77668a6d3431509d26e5d792549418c2070a6b4b491e3fe597
|
3 |
+
size 252285
|
vectorstore/db_faiss/index.faiss
ADDED
Binary file (702 kB). View file
|
|
vectorstore/db_faiss/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fc76b583cb25e343ab9bc3292f3096035eba770f775d73246902c1151d9a54b
|
3 |
+
size 252285
|
word_docs/Geography-10/geo-10-1.docx
ADDED
Binary file (26.8 kB). View file
|
|
word_docs/testing.ipynb
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import json"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 15,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [
|
17 |
+
{
|
18 |
+
"name": "stdout",
|
19 |
+
"output_type": "stream",
|
20 |
+
"text": [
|
21 |
+
"<class 'list'>\n",
|
22 |
+
"<class 'dict'>\n",
|
23 |
+
"{'chapter_number': 1, 'chapter_name': 'Resources and Development', 'subtopics': ['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']}\n",
|
24 |
+
"['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']\n"
|
25 |
+
]
|
26 |
+
}
|
27 |
+
],
|
28 |
+
"source": [
|
29 |
+
"DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'\n",
|
30 |
+
"JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'\n",
|
31 |
+
"\n",
|
32 |
+
"with open(JSON_FILE_PATH, 'r') as file:\n",
|
33 |
+
" book_data = json.load(file)\n",
|
34 |
+
" print(type(book_data))\n",
|
35 |
+
" \n",
|
36 |
+
"chapter_data = book_data[0]\n",
|
37 |
+
"print(type(chapter_data))\n",
|
38 |
+
"print(chapter_data)\n",
|
39 |
+
"\n",
|
40 |
+
"chapter_name = chapter_data['chapter_name']\n",
|
41 |
+
"subtopics = chapter_data['subtopics']\n",
|
42 |
+
"\n",
|
43 |
+
"print(subtopics)"
|
44 |
+
]
|
45 |
+
}
|
46 |
+
],
|
47 |
+
"metadata": {
|
48 |
+
"kernelspec": {
|
49 |
+
"display_name": "Python 3",
|
50 |
+
"language": "python",
|
51 |
+
"name": "python3"
|
52 |
+
},
|
53 |
+
"language_info": {
|
54 |
+
"codemirror_mode": {
|
55 |
+
"name": "ipython",
|
56 |
+
"version": 3
|
57 |
+
},
|
58 |
+
"file_extension": ".py",
|
59 |
+
"mimetype": "text/x-python",
|
60 |
+
"name": "python",
|
61 |
+
"nbconvert_exporter": "python",
|
62 |
+
"pygments_lexer": "ipython3",
|
63 |
+
"version": "3.11.5"
|
64 |
+
}
|
65 |
+
},
|
66 |
+
"nbformat": 4,
|
67 |
+
"nbformat_minor": 2
|
68 |
+
}
|