Ishaan Shah commited on
Commit
997488c
·
1 Parent(s): e66dfb9
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .venv/
2
+ env/
3
+
4
+ *.pyc
5
+ __pycache__/
6
+
7
+ instance/
8
+
9
+ .pytest_cache/
10
+ .coverage
11
+ htmlcov/
12
+
13
+ dist/
14
+ build/
15
+ *.egg-info/
db/chroma-collections.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:251bf652217b82b5fce7f8ba2e164e14f09f2815cb4e2b4efd5e563acdc2604b
3
+ size 557
db/chroma-embeddings.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:217821140da7c533c36c3c26d622ca8ebf2a0579b591dfaddb97a8bf8431fe40
3
+ size 34357398
db/index/id_to_uuid_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3985f37cc18b9645a1af4abcd213620848270eecd2d8300abad4f64eab2f60a9
3
+ size 217486
db/index/index_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efd288bf7df73564fc95263fafeabb808ed1397c1d1e1ad1deb94fc8ef3e7c2d
3
+ size 21565780
db/index/index_metadata_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a33d7aeb30c846c66a0655be595b5311114241ef9980a28be04f3ba9ea86d6d
3
+ size 74
db/index/uuid_to_id_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acee5d7d12cbece9cd42ef84eaa814ca0d088870b7dbce330733a7338e2b3c1f
3
+ size 254248
main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request
2
+ import os
3
+ import requests
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.llms import OpenAI
6
+ from langchain.chains import RetrievalQA
7
+ from InstructorEmbedding import INSTRUCTOR
8
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
9
+ from langchain.chat_models import ChatOpenAI
10
+
11
+ import numpy
12
+ import torch
13
+ import json
14
+ import textwrap
15
+ from flask_cors import CORS
16
+ import socket;
17
+
18
+ app = Flask(__name__)
19
+ cors = CORS(app)
20
+
21
+
22
+ def get_local_ip():
23
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
24
+ s.connect(("8.8.8.8", 80))
25
+ return s.getsockname()[0]
26
+
27
+ def wrap_text_preserve_newlines(text, width=110):
28
+ # Split the input text into lines based on newline characters
29
+ lines = text.split('\n')
30
+ # Wrap each line individually
31
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
32
+ # Join the wrapped lines back together using newline characters
33
+ wrapped_text = '\n'.join(wrapped_lines)
34
+ return wrapped_text
35
+
36
+ def process_llm_response(llm_response):
37
+ response_data = {
38
+ 'result': wrap_text_preserve_newlines(llm_response['result']),
39
+ 'sources': []
40
+ }
41
+ print(wrap_text_preserve_newlines(llm_response['result']))
42
+ print('\n\nSources:')
43
+ for source in llm_response["source_documents"]:
44
+ print(source.metadata['source']+ "Page Number: " + str(source.metadata['page']))
45
+ response_data['sources'].append({"book": source.metadata['source'], "page": source.metadata['page']})
46
+ return json.dumps(response_data)
47
+
48
+ def get_answer(question):
49
+ llm_response = qa_chain(question)
50
+ response = process_llm_response(llm_response)
51
+ return response
52
+
53
+ @app.route('/question', methods=['POST'])
54
+ def answer():
55
+ content_type = request.headers.get('Content-Type')
56
+ if (content_type == 'application/json'):
57
+ data = request.json
58
+ question = data['question']
59
+ response = get_answer(question)
60
+ return response
61
+ else:
62
+ return 'Content-Type not supported!'
63
+
64
+ @app.route('/', methods=['GET'])
65
+ def default():
66
+ return "Hello World!"
67
+
68
+
69
+ if __name__ == '__main__':
70
+ ip=get_local_ip()
71
+ os.environ["OPENAI_API_KEY"] = "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"
72
+ # Embed and store the texts
73
+ # if(torch.cuda.is_available() == False):
74
+ # print("No GPU available")
75
+ # exit(1)
76
+
77
+ torch.cuda.empty_cache()
78
+ torch.max_split_size_mb = 100
79
+ instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
80
+ model_kwargs={"device": "cpu"})
81
+ # Supplying a persist_directory will store the embeddings on disk
82
+ persist_directory = 'db'
83
+ vectordb2 = Chroma(persist_directory=persist_directory,
84
+ embedding_function=instructor_embeddings,
85
+ )
86
+ retriever = vectordb2.as_retriever(search_kwargs={"k": 3})
87
+ vectordb2.persist()
88
+
89
+ # Set up the turbo LLM
90
+ turbo_llm = ChatOpenAI(
91
+ temperature=0,
92
+ model_name='gpt-3.5-turbo'
93
+ )
94
+ qa_chain = RetrievalQA.from_chain_type(llm=turbo_llm,
95
+ chain_type="stuff",
96
+ retriever=retriever,
97
+ return_source_documents=True)
98
+ qa_chain.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template= """
99
+ Use only the following pieces of context and think step by step to answer. Answer the users question only if they are related to the context given.
100
+ If you don't know the answer, just say that you don't know, don't try to make up an answer. Make your answer very detailed and long.
101
+ Use bullet points to explain when required.
102
+ Use only text found in the context as your knowledge source for the answer.
103
+ ----------------
104
+ {context}"""
105
+ app.run(host=ip, port=5000)
requirements.txt ADDED
Binary file (3.12 kB). View file