Spaces:

ishaan812
/

mediHelp

Sleeping

App Files Files Community

Ishaan Shah commited on May 20, 2023

Commit

997488c

1 Parent(s): e66dfb9

init

Browse files

Files changed (9) hide show

.gitignore +15 -0
db/chroma-collections.parquet +3 -0
db/chroma-embeddings.parquet +3 -0
db/index/id_to_uuid_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl +3 -0
db/index/index_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.bin +3 -0
db/index/index_metadata_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl +3 -0
db/index/uuid_to_id_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl +3 -0
main.py +105 -0
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+.venv/
+env/
+*.pyc
+__pycache__/
+instance/
+.pytest_cache/
+.coverage
+htmlcov/
+dist/
+build/
+*.egg-info/

db/chroma-collections.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:251bf652217b82b5fce7f8ba2e164e14f09f2815cb4e2b4efd5e563acdc2604b
+size 557

db/chroma-embeddings.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:217821140da7c533c36c3c26d622ca8ebf2a0579b591dfaddb97a8bf8431fe40
+size 34357398

db/index/id_to_uuid_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3985f37cc18b9645a1af4abcd213620848270eecd2d8300abad4f64eab2f60a9
+size 217486

db/index/index_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efd288bf7df73564fc95263fafeabb808ed1397c1d1e1ad1deb94fc8ef3e7c2d
+size 21565780

db/index/index_metadata_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a33d7aeb30c846c66a0655be595b5311114241ef9980a28be04f3ba9ea86d6d
+size 74

db/index/uuid_to_id_1dc4c700-b712-4062-ba0c-4aa6bd0d7fc8.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acee5d7d12cbece9cd42ef84eaa814ca0d088870b7dbce330733a7338e2b3c1f
+size 254248

main.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from flask import Flask, request
+import os
+import requests
+from langchain.vectorstores import Chroma
+from langchain.llms import OpenAI
+from langchain.chains import RetrievalQA
+from InstructorEmbedding import INSTRUCTOR
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.chat_models import ChatOpenAI
+import numpy
+import torch
+import json
+import textwrap
+from flask_cors import CORS
+import socket;
+app = Flask(__name__)
+cors = CORS(app)
+def get_local_ip():
+  s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+  s.connect(("8.8.8.8", 80))
+  return s.getsockname()[0]
+def wrap_text_preserve_newlines(text, width=110):
+    # Split the input text into lines based on newline characters
+    lines = text.split('\n')
+    # Wrap each line individually
+    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
+    # Join the wrapped lines back together using newline characters
+    wrapped_text = '\n'.join(wrapped_lines)
+    return wrapped_text
+def process_llm_response(llm_response):
+    response_data = {
+        'result': wrap_text_preserve_newlines(llm_response['result']),
+        'sources': []
+    }
+    print(wrap_text_preserve_newlines(llm_response['result']))
+    print('\n\nSources:')
+    for source in llm_response["source_documents"]:
+        print(source.metadata['source']+ "Page Number: " + str(source.metadata['page']))
+        response_data['sources'].append({"book": source.metadata['source'], "page": source.metadata['page']})
+    return json.dumps(response_data)
+def get_answer(question):
+    llm_response = qa_chain(question)
+    response = process_llm_response(llm_response)
+    return response
+@app.route('/question', methods=['POST'])
+def answer():
+    content_type = request.headers.get('Content-Type')
+    if (content_type == 'application/json'):
+        data = request.json
+        question = data['question']
+        response = get_answer(question)
+        return response
+    else:
+        return 'Content-Type not supported!'
+@app.route('/', methods=['GET'])
+def default():
+    return "Hello World!"
+if __name__ == '__main__':
+    ip=get_local_ip()
+    os.environ["OPENAI_API_KEY"] = "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"
+    # Embed and store the texts
+    # if(torch.cuda.is_available() == False):
+    #     print("No GPU available")
+    #     exit(1)
+    torch.cuda.empty_cache()
+    torch.max_split_size_mb = 100
+    instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl",
+                                                      model_kwargs={"device": "cpu"})
+    # Supplying a persist_directory will store the embeddings on disk
+    persist_directory = 'db'
+    vectordb2 = Chroma(persist_directory=persist_directory,
+                  embedding_function=instructor_embeddings,
+                   )
+    retriever = vectordb2.as_retriever(search_kwargs={"k": 3})
+    vectordb2.persist()
+    # Set up the turbo LLM
+    turbo_llm = ChatOpenAI(
+        temperature=0,
+        model_name='gpt-3.5-turbo'
+    )
+    qa_chain = RetrievalQA.from_chain_type(llm=turbo_llm,
+                                  chain_type="stuff",
+                                  retriever=retriever,
+                                  return_source_documents=True)
+    qa_chain.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template= """
+    Use only the following pieces of context and think step by step to answer. Answer the users question only if they are related to the context given.
+    If you don't know the answer, just say that you don't know, don't try to make up an answer. Make your answer very detailed and long.
+    Use bullet points to explain when required.
+    Use only text found in the context as your knowledge source for the answer.
+    ----------------
+    {context}"""
+    app.run(host=ip, port=5000)

requirements.txt ADDED Viewed

Binary file (3.12 kB). View file