V15h commited on
Commit
888d109
·
1 Parent(s): f30d170

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.pdf filter=lfs diff=lfs merge=lfs -text
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 AI Anytime
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
chainlit.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Welcome to Llama2 Med-Bot! 🚀🤖
2
+
3
+ Hi there, 👋 We're excited to have you on board. This is a powerful bot designed to help you ask queries related to your data/knowledge.
4
+
data/geo-10-3-notes.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e627281b6bad3fc951001e4740c976e5916255758c87d6d2eea0f5f5c7adf29
3
+ size 220267
data/geo-10/geo-10-1.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f869020fed2bd143a8044316fd9071c9b194714ca3b19da19b7f80a577f77c3
3
+ size 228700
data/geo-10/geo-chapter-list.json ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "chapter_number": 1,
4
+ "chapter_name": "Resources and Development",
5
+ "subtopics": [
6
+ "Definition of Resource",
7
+ "Transformation Process in the Environment",
8
+ "Classification of Resources",
9
+ "Development of Resources",
10
+ "Problems Due to Indiscriminate Resource Use",
11
+ "Importance of Equitable Resource Distribution",
12
+ "Resource Planning",
13
+ "Sustainable Development",
14
+ "Rio de Janeiro Earth Summit, 1992",
15
+ "Agenda 21",
16
+ "Resource Planning in India",
17
+ "Factors in Resource Planning",
18
+ "Role of Technology and Institutions in Resource Development",
19
+ "Conservation of Resources",
20
+ "Historical Perspectives on Resource Conservation",
21
+ "Land Resources Importance",
22
+ "Land Utilization Types",
23
+ "Land Use Pattern in India",
24
+ "Forest Area Concerns",
25
+ "Land Degradation and Conservation Measures",
26
+ "Soil as a Resource and Its Formation",
27
+ "Classification of Soils",
28
+ "Alluvial Soils",
29
+ "Black Soil",
30
+ "Red and Yellow Soils",
31
+ "Laterite Soil",
32
+ "Arid Soils",
33
+ "Forest Soils",
34
+ "Soil Erosion and Conservation Methods"
35
+ ]
36
+ },
37
+ {
38
+ "chapter_number": 2,
39
+ "chapter_name": "In-Depth Analysis",
40
+ "subtopics": [
41
+ "Detailed Aspect 1",
42
+ "Detailed Aspect 2",
43
+ "Case Studies on the Topic"
44
+ ]
45
+ },
46
+ {
47
+ "chapter_number": 3,
48
+ "chapter_name": "Advanced Applications",
49
+ "subtopics": [
50
+ "Advanced Concept 1",
51
+ "Advanced Concept 2",
52
+ "Future Trends in the Field"
53
+ ]
54
+ },
55
+ {
56
+ "chapter_number": 4,
57
+ "chapter_name": "Conclusion and Summary",
58
+ "subtopics": [
59
+ "Recap of Key Points",
60
+ "Final Thoughts",
61
+ "Further Resources"
62
+ ]
63
+ }
64
+ ]
data/jess1ps_merged.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057d8de0e37b5acc1975629794a147b54d42851f43d5e4cb3c9dec68c6428513
3
+ size 16202519
index.faiss ADDED
Binary file (702 kB). View file
 
index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdce5bc50acb24703cf60f20d7b36783c00758713025be3bf803cc7430127d61
3
+ size 252285
ingest.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import HuggingFaceEmbeddings
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, SpacyTextSplitter
5
+
6
+ DATA_PATH = 'data/geo-10'
7
+ DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
8
+
9
+ # Create vector database
10
+
11
+
12
+ def create_vector_db():
13
+ loader = DirectoryLoader(DATA_PATH,
14
+ glob='geo-10-1.pdf',
15
+ loader_cls=PyPDFLoader)
16
+
17
+ documents = loader.load()
18
+ text_splitter = SpacyTextSplitter(chunk_size=500,
19
+ chunk_overlap=50)
20
+ texts = text_splitter.split_documents(documents)
21
+
22
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
23
+ model_kwargs={'device': 'cpu'})
24
+
25
+ db = FAISS.from_documents(texts, embeddings)
26
+ db.save_local(DB_FAISS_PATH)
27
+
28
+
29
+ if __name__ == "__main__":
30
+ create_vector_db()
learnai.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import random
3
+ import time
4
+ import json
5
+
6
+ from llm_loader import get_llm_response
7
+
8
+ from langchain.prompts import PromptTemplate
9
+
10
+ age = 15
11
+ name = 'Fidva'
12
+ grade = 10
13
+ context = '{context}'
14
+ question = '{question}'
15
+
16
+ DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
17
+ JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'
18
+
19
+
20
+ with open(JSON_FILE_PATH, 'r') as file:
21
+ book_data = json.load(file)
22
+
23
+ chapter_data = book_data[0]
24
+ subtopics = chapter_data['subtopics']
25
+
26
+ ######## Streamlit App ########
27
+
28
+ st.title("Learn.AI")
29
+
30
+ # Add custom CSS properties
31
+ st.markdown(
32
+ """
33
+ <style>
34
+ @import url('https://fonts.googleapis.com/css2?family=Sometype+Mono:wght@400;500;600;700&display=swap');
35
+
36
+ :root {
37
+ --color-bg: #03045E;
38
+ --color-footer: #0077B6;
39
+ --color-component: #00B4D8;
40
+ --font-family: 'Sometype Mono', sans-serif;
41
+ --font-color: #FAFAFA;
42
+ }
43
+ body, .stTextArea textarea {
44
+ font-family: var(--font-family); !important;
45
+ background-color: var(--color-bg);
46
+ color: var(--font-color); !important;
47
+ }
48
+ h1, p, li {
49
+ font-family: var(--font-family);
50
+ color: var(--font-color);
51
+ }
52
+ textarea {
53
+ font-family: var(--font-family);
54
+ color: var(--font-color);
55
+ background-color: #FAFAFA;
56
+ }
57
+ .stApp {
58
+ background-color: var(--color-bg);
59
+ }
60
+ .stChatMessage:nth-child(even) .stMarkdown {
61
+ background-color: var(--color-bg);
62
+ }
63
+ .stChatFloatingInputContainer {
64
+ background-color: var(--color-bg);
65
+ color: var(--font-color);
66
+ }
67
+ .st-emotion-cache-1avcm0n ezrtsby2 {
68
+ background-color: #050647;
69
+ }
70
+ .st-emotion-cache-10trblm {
71
+ --font-family: 'Sometype Mono', sans-serif;
72
+ }
73
+ .st-emotion-cache-nahz7x {
74
+ --font-family: 'Sometype Mono', sans-serif;
75
+ }
76
+ </style>
77
+ """,
78
+ unsafe_allow_html=True
79
+ )
80
+
81
+
82
+ # Initialize chat history
83
+ if "messages" not in st.session_state:
84
+ st.session_state.messages = []
85
+
86
+ # Display chat messages from history on app rerun
87
+ for message in st.session_state.messages:
88
+ with st.chat_message(message["role"]):
89
+ st.markdown(message["content"])
90
+
91
+ if "lesson_count" not in st.session_state:
92
+ st.session_state.lesson_count = 0
93
+ # GreetingMessage = f'Hi there, {name}! Let\'s start the lesson! Type \'start\' when you\'re ready to begin!'
94
+ # st.session_state.messages.append({"role": "assistant", "content": GreetingMessage})
95
+
96
+ # Accept user input
97
+ if prompt := st.chat_input(f'Hi there, {name}! Let\'s start the lesson! Type \'start\' when you\'re ready to begin!'):
98
+ print("TOPIC NAME:",subtopics[st.session_state.lesson_count])
99
+ print("USER:",prompt)
100
+ if prompt.lower() =='start' or prompt.lower()=='continue' or prompt.lower()=='next':
101
+ print("Topic Name:", subtopics[st.session_state.lesson_count])
102
+ # Display assistant response in chat message container
103
+ with st.chat_message("assistant"):
104
+ placeholder_list = ['Writing notes...', 'Revising topic...', 'Clearing blackboard...', 'Formulating Lesson Plan...', 'Getting ready for doubts...']
105
+ placeholder_text = random.choice(placeholder_list)
106
+ with st.spinner(placeholder_text):
107
+ assistant_response = get_llm_response(subtopics[st.session_state.lesson_count], template_type='lesson')
108
+ st.session_state.lesson_count += 1
109
+ message_placeholder = st.empty()
110
+ full_response = ""
111
+
112
+ # Simulate stream of response with milliseconds delay\
113
+ print(assistant_response)
114
+ for chunk in assistant_response.split():
115
+ full_response += chunk + " "
116
+ time.sleep(0.05)
117
+ # Add a blinking cursor to simulate typing
118
+ message_placeholder.markdown(full_response + "▌")
119
+ message_placeholder.markdown(assistant_response)
120
+
121
+ st.session_state.messages.append({"role": "assistant", "content": assistant_response})
122
+
123
+ else:
124
+ # Add user message to chat history
125
+ st.session_state.messages.append({"role": "user", "content": prompt})
126
+ # Display user message in chat message container
127
+ with st.chat_message("user"):
128
+ st.markdown(prompt)
129
+
130
+ # Display assistant response in chat message container
131
+ with st.chat_message("assistant"):
132
+ placeholder_list = ['Thinking...', 'Reading Textbook...', 'Clearing blackboard...', 'Revising Topics...', 'Refilling pen...']
133
+ placeholder_text = random.choice(placeholder_list)
134
+ with st.spinner(placeholder_text):
135
+ assistant_response = get_llm_response(prompt, template_type='user')
136
+ message_placeholder = st.empty()
137
+ full_response = ""
138
+
139
+ # Simulate stream of response with milliseconds delay\
140
+ print(assistant_response)
141
+ for chunk in assistant_response.split():
142
+ full_response += chunk + " "
143
+ time.sleep(0.05)
144
+ # Add a blinking cursor to simulate typing
145
+ message_placeholder.markdown(full_response + "▌")
146
+ message_placeholder.markdown(assistant_response)
147
+ # Add assistant response to chat history
148
+ st.session_state.messages.append({"role": "assistant", "content": assistant_response})
learnai_chainlit.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import chainlit as cl
3
+
4
+ from llm_loader import get_llm_response
5
+
6
+ from langchain.prompts import PromptTemplate
7
+
8
+
9
+ age = 15
10
+ name = "Fidva"
11
+ grade = 10
12
+ context = '{context}'
13
+ question = '{question}'
14
+
15
+ DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
16
+ JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'
17
+
18
+ # Load book data
19
+ with open(JSON_FILE_PATH, 'r') as file:
20
+ book_data = json.load(file)
21
+
22
+ chapter_data = book_data[0]
23
+ subtopics = chapter_data['subtopics']
24
+
25
+ # Chainlit App
llm_loader.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import HuggingFaceEmbeddings
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.llms import CTransformers
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
7
+
8
+ name = 'Fidva'
9
+ age = 15
10
+ grade = 10
11
+ context = '{context}'
12
+ question = '{question}'
13
+
14
+ DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'
15
+
16
+
17
+ def get_llm_response(query, template_type):
18
+
19
+ if template_type == 'user':
20
+ user_template = """Use the following pieces of information to answer the user's question in a friendly way.
21
+ If you don't know the answer, just say that you don't know, don't try to make up an answer. Also refer to the user by their name, and keep in mind their age while answering the question.
22
+
23
+ Name of user: {name}
24
+ Age of user: {age}
25
+ Grade of user: {grade}
26
+ Context: {context}
27
+ Question: {question}
28
+
29
+ Return the Helpful Answer, and then also give the user a Knowledge Check Question related to what he just asked.
30
+ Returning the helpful answer is a must and takes higher priority.
31
+
32
+ Helpful answer:
33
+ """
34
+ unformatted_prompt_template = PromptTemplate.from_template(
35
+ user_template)
36
+
37
+ elif template_type == 'lesson':
38
+ # lesson_template = """Teach the given topic in accordance with the content below to the user in a friendly way, while keeping in mind the user's age and his grade.
39
+ # Name of user: {name}
40
+ # Age of user: {age}
41
+ # Grade of user: {grade}
42
+ # Content: {context}
43
+ # Topic: {question}
44
+ # """
45
+ lesson_template = """Hello {name}! Let's dive into the topic of {question} together.
46
+
47
+ As a {grade}th grader at {age} years old, it's great to explore this subject!
48
+
49
+ Let's start by understanding the context:
50
+
51
+ {context}
52
+
53
+ Now, to grasp this topic better, here are some key points to consider:
54
+
55
+ - Explain the fundamental concept or idea related to {question}.
56
+
57
+ - Provide examples or illustrations to make it easier to comprehend.
58
+
59
+ - Share any real-life applications or relevance of this topic.
60
+
61
+ Feel free to ask if you have any questions along the way. Let's learn together!
62
+ """
63
+ unformatted_prompt_template = PromptTemplate.from_template(
64
+ lesson_template)
65
+
66
+ prompt_template = unformatted_prompt_template.format(
67
+ name=name,
68
+ age=age,
69
+ grade=grade,
70
+ context=context,
71
+ question=question
72
+ )
73
+
74
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
75
+ model_kwargs={'device': 'cpu'})
76
+ db = FAISS.load_local(DB_FAISS_PATH, embeddings)
77
+
78
+ # Load the locally downloaded model here
79
+ llm = CTransformers(
80
+ model="TheBloke/Llama-2-7B-Chat-GGML",
81
+ model_type="llama",
82
+ callbacks=[StreamingStdOutCallbackHandler()],
83
+ config={
84
+ 'context_length': 4096,
85
+ 'temperature': 0.1,
86
+ 'max_new_tokens': 512,
87
+ },
88
+ )
89
+
90
+ qa_prompt = PromptTemplate(template=prompt_template,
91
+ input_variables=['context', 'question'])
92
+
93
+ print(qa_prompt)
94
+ qa_result = RetrievalQA.from_chain_type(llm=llm,
95
+ chain_type='stuff',
96
+ retriever=db.as_retriever(
97
+ search_kwargs={'k': 1}),
98
+ return_source_documents=True,
99
+ chain_type_kwargs={
100
+ 'prompt': qa_prompt},
101
+ )
102
+
103
+ response = qa_result({'query': query})
104
+ return response['result']
model.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import HuggingFaceEmbeddings
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.llms import CTransformers
4
+ from langchain.chains import RetrievalQA
5
+ from langchain.prompts import PromptTemplate
6
+
7
+ import chainlit as cl
8
+
9
+ age = 15
10
+ name = 'Fidva'
11
+ grade = 10
12
+ context = '{context}'
13
+ question = '{question}'
14
+
15
+ DB_FAISS_PATH = 'vectorstore/db_faiss/geo-10-whole_book'
16
+ # OPENAI_API_KEY = "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"
17
+
18
+ custom_prompt_template = """Use the following pieces of information to answer the user's question in a friendly way.
19
+ If you don't know the answer, just say that you don't know, don't try to make up an answer. Also refer to the user by their name, and keep in mind their age while answering the question.
20
+
21
+ Name of user: {name}
22
+ Age of user: {age}
23
+ Grade of user: {grade}
24
+ Context: {context}
25
+ Question: {question}
26
+
27
+ Return the Helpful Answer, and then also give the user a Knowledge Check Question related to what he just asked.
28
+ Returning the helpful answer is a must and takes higher priority.
29
+
30
+ Helpful answer:
31
+ """
32
+
33
+ custom_prompt_template = PromptTemplate.from_template(custom_prompt_template)
34
+ formatted_prompt = custom_prompt_template.format(
35
+ name=name, age=age, grade=grade, context=context, question=question)
36
+
37
+
38
+ def set_custom_prompt():
39
+ """
40
+ Prompt template for QA retrieval for each vectorstore
41
+ """
42
+ prompt = PromptTemplate(template=formatted_prompt,
43
+ input_variables=['context', 'question'])
44
+ return prompt
45
+
46
+ # Retrieval QA Chain
47
+
48
+
49
+ def retrieval_qa_chain(llm, prompt, db):
50
+ qa_chain = RetrievalQA.from_chain_type(llm=llm,
51
+ chain_type='stuff',
52
+ retriever=db.as_retriever(
53
+ search_kwargs={'k': 1}),
54
+ return_source_documents=True,
55
+ chain_type_kwargs={'prompt': prompt}
56
+ )
57
+ return qa_chain
58
+
59
+ # Loading the model
60
+
61
+
62
+ def load_llm():
63
+
64
+ # config = AutoConfig.from_pretrained("TheBloke/Llama-2-7B-Chat-GGML")
65
+ # config.max_seq_len = 4096
66
+ # config.max_answer_len = 1024
67
+
68
+ # Load the locally downloaded model here
69
+ llm = CTransformers(
70
+ model="TheBloke/Llama-2-7B-Chat-GGML",
71
+ # model = "zephyr-7b-beta.Q5_K_S.gguf",
72
+ model_type="llama",
73
+ config={
74
+ 'context_length': 4096,
75
+ 'temperature': 0.3,
76
+ 'max_new_tokens': 512,
77
+ },
78
+ )
79
+
80
+ # llm = AutoModelForCausalLM.from_pretrained(
81
+ # "TheBloke/Llama-2-7B-Chat-GGML",
82
+ # model_type="llama",
83
+ # config=config,
84
+ # temperature=0.5
85
+ # )
86
+
87
+ return llm
88
+
89
+ # QA Model Function
90
+
91
+
92
+ def qa_bot():
93
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
94
+ model_kwargs={'device': 'cpu'})
95
+ db = FAISS.load_local(DB_FAISS_PATH, embeddings)
96
+ llm = load_llm()
97
+ qa_prompt = set_custom_prompt()
98
+ qa = retrieval_qa_chain(llm, qa_prompt, db)
99
+
100
+ return qa
101
+
102
+ # output function
103
+
104
+
105
+ def final_result(query):
106
+ qa_result = qa_bot()
107
+ response = qa_result({'query': query})
108
+ return response
109
+
110
+ # chainlit code
111
+
112
+
113
+ @cl.on_chat_start
114
+ async def start():
115
+ chain = qa_bot()
116
+ msg = cl.Message(content="Starting the bot...")
117
+ await msg.send()
118
+ msg.content = "Hi, Welcome to Geo Bot. What is your query?"
119
+ await msg.update()
120
+
121
+ cl.user_session.set("chain", chain)
122
+
123
+
124
+ @cl.on_message
125
+ async def main(message: cl.Message):
126
+ chain = cl.user_session.get("chain")
127
+ cb = cl.AsyncLangchainCallbackHandler(
128
+ stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
129
+ )
130
+ cb.answer_reached = True
131
+ res = await chain.acall(message.content, callbacks=[cb])
132
+ answer = res["result"]
133
+ sources = res["source_documents"]
134
+
135
+ if sources:
136
+ answer += f"\nSources:" + str(sources)
137
+ else:
138
+ answer += "\nNo sources found"
139
+
140
+ await cl.Message(content=answer).send()
refbooks-vectorstore/geo-10-1/index.faiss ADDED
Binary file (84.5 kB). View file
 
refbooks-vectorstore/geo-10-1/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139f9355d0fdc3c31b2f4f35fe7de6bf92f01af1349dfefb381571b4aee6a836
3
+ size 29329
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.25.0
2
+ bitsandbytes==0.41.2.post2
3
+ chainlit==0.7.700
4
+ ctransformers==0.2.27
5
+ exceptiongroup==1.2.0
6
+ faiss-cpu==1.7.4
7
+ langchain==0.0.345
8
+ openai==1.3.7
9
+ pickleshare==0.7.5
10
+ pip-chill==1.0.3
11
+ pypdf==3.17.1
12
+ sentence-transformers==2.2.2
13
+ streamlit==1.29.0
testing.ipynb ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import json"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 15,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "<class 'list'>\n",
22
+ "<class 'dict'>\n",
23
+ "{'chapter_number': 1, 'chapter_name': 'Resources and Development', 'subtopics': ['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']}\n",
24
+ "['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'\n",
30
+ "JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'\n",
31
+ "\n",
32
+ "with open(JSON_FILE_PATH, 'r') as file:\n",
33
+ " book_data = json.load(file)\n",
34
+ " print(type(book_data))\n",
35
+ " \n",
36
+ "chapter_data = book_data[0]\n",
37
+ "print(type(chapter_data))\n",
38
+ "print(chapter_data)\n",
39
+ "\n",
40
+ "chapter_name = chapter_data['chapter_name']\n",
41
+ "subtopics = chapter_data['subtopics']\n",
42
+ "\n",
43
+ "print(subtopics)"
44
+ ]
45
+ }
46
+ ],
47
+ "metadata": {
48
+ "kernelspec": {
49
+ "display_name": "Python 3",
50
+ "language": "python",
51
+ "name": "python3"
52
+ },
53
+ "language_info": {
54
+ "codemirror_mode": {
55
+ "name": "ipython",
56
+ "version": 3
57
+ },
58
+ "file_extension": ".py",
59
+ "mimetype": "text/x-python",
60
+ "name": "python",
61
+ "nbconvert_exporter": "python",
62
+ "pygments_lexer": "ipython3",
63
+ "version": "3.11.5"
64
+ }
65
+ },
66
+ "nbformat": 4,
67
+ "nbformat_minor": 2
68
+ }
vectorstore/db_faiss/geo-10-3/index.faiss ADDED
Binary file (21.5 kB). View file
 
vectorstore/db_faiss/geo-10-3/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6a4d112990968a29d9e16897bd5b5a166acec3a894df2daf10419a310c6d72
3
+ size 7312
vectorstore/db_faiss/geo-10-whole_book/index.faiss ADDED
Binary file (702 kB). View file
 
vectorstore/db_faiss/geo-10-whole_book/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b03d45e671070a77668a6d3431509d26e5d792549418c2070a6b4b491e3fe597
3
+ size 252285
vectorstore/db_faiss/index.faiss ADDED
Binary file (702 kB). View file
 
vectorstore/db_faiss/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc76b583cb25e343ab9bc3292f3096035eba770f775d73246902c1151d9a54b
3
+ size 252285
word_docs/Geography-10/geo-10-1.docx ADDED
Binary file (26.8 kB). View file
 
word_docs/testing.ipynb ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 3,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import json"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 15,
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "<class 'list'>\n",
22
+ "<class 'dict'>\n",
23
+ "{'chapter_number': 1, 'chapter_name': 'Resources and Development', 'subtopics': ['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']}\n",
24
+ "['Definition of Resource', 'Transformation Process in the Environment', 'Classification of Resources', 'Development of Resources', 'Problems Due to Indiscriminate Resource Use', 'Importance of Equitable Resource Distribution', 'Resource Planning', 'Sustainable Development', 'Rio de Janeiro Earth Summit, 1992', 'Agenda 21', 'Resource Planning in India', 'Factors in Resource Planning', 'Role of Technology and Institutions in Resource Development', 'Conservation of Resources', 'Historical Perspectives on Resource Conservation', 'Land Resources Importance', 'Land Utilization Types', 'Land Use Pattern in India', 'Forest Area Concerns', 'Land Degradation and Conservation Measures', 'Soil as a Resource and Its Formation', 'Classification of Soils', 'Alluvial Soils', 'Black Soil', 'Red and Yellow Soils', 'Laterite Soil', 'Arid Soils', 'Forest Soils', 'Soil Erosion and Conservation Methods']\n"
25
+ ]
26
+ }
27
+ ],
28
+ "source": [
29
+ "DB_FAISS_PATH = 'refbooks-vectorstore/geo-10-1'\n",
30
+ "JSON_FILE_PATH = 'data/geo-10/geo-chapter-list.json'\n",
31
+ "\n",
32
+ "with open(JSON_FILE_PATH, 'r') as file:\n",
33
+ " book_data = json.load(file)\n",
34
+ " print(type(book_data))\n",
35
+ " \n",
36
+ "chapter_data = book_data[0]\n",
37
+ "print(type(chapter_data))\n",
38
+ "print(chapter_data)\n",
39
+ "\n",
40
+ "chapter_name = chapter_data['chapter_name']\n",
41
+ "subtopics = chapter_data['subtopics']\n",
42
+ "\n",
43
+ "print(subtopics)"
44
+ ]
45
+ }
46
+ ],
47
+ "metadata": {
48
+ "kernelspec": {
49
+ "display_name": "Python 3",
50
+ "language": "python",
51
+ "name": "python3"
52
+ },
53
+ "language_info": {
54
+ "codemirror_mode": {
55
+ "name": "ipython",
56
+ "version": 3
57
+ },
58
+ "file_extension": ".py",
59
+ "mimetype": "text/x-python",
60
+ "name": "python",
61
+ "nbconvert_exporter": "python",
62
+ "pygments_lexer": "ipython3",
63
+ "version": "3.11.5"
64
+ }
65
+ },
66
+ "nbformat": 4,
67
+ "nbformat_minor": 2
68
+ }