File size: 8,461 Bytes
59c997c 4fa3f5e 2954669 6257b7b 236a72c 0e70365 4fa3f5e e9dab73 4fa3f5e e9dab73 a87e0a8 4fa3f5e 59c997c fed0024 59c997c fde1975 0f23e26 22d0990 e11c2eb 0e70365 013aec3 3a6441d 240b097 7139ac7 e11c2eb 3a6441d 4fa3f5e 3a6441d 59c997c 3a6441d 627fe18 59c997c 993cfc4 22d0990 b0ee9ad 22d0990 0f23e26 61131d7 b0ee9ad 22d0990 61131d7 59c997c b0ee9ad 61131d7 b0ee9ad 61131d7 b0ee9ad fed0024 b0ee9ad 59c997c 61131d7 59c997c 61131d7 c1beb45 59c997c c1beb45 59c997c 7139ac7 59c997c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
import threading
import time
import subprocess
print("Expanding user path for Ollama")
OLLAMA = os.path.expanduser("~/ollama")
print("Checking if Ollama exists at the path")
if not os.path.exists(OLLAMA):
print("Ollama not found, downloading it")
subprocess.run("pwd", shell=True)
subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64.tgz -o /home/user/app/ollama-linux-amd64.tgz", shell=True)
subprocess.run("tar -xzf /home/user/app/ollama-linux-amd64.tgz", shell=True)
subprocess.run("ls -lash", shell=True)
time.sleep(10)
def ollama_service_thread():
print("Starting Ollama service thread")
subprocess.run("/home/user/app/bin/ollama serve", shell=True)
print("Creating and starting Ollama service thread")
OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
OLLAMA_SERVICE_THREAD.start()
print("Giving Ollama serve a moment to start")
time.sleep(10)
print("Setting model to 'llama3'")
model = "llama3"
print(f"Pulling model {model}")
subprocess.run(f"/home/user/app/bin/ollama pull {model}", shell=True)
subprocess.run(f"/home/user/app/bin/ollama pull mxbai-embed-large", shell=True)
import gradio as gr
import textwrap
# Importing specific components from the LangChain library. Aliasing them with shorter names for convenience
from langchain_community.document_loaders import YoutubeLoader as YLoader
from langchain_community.document_loaders import RecursiveUrlLoader
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_core.documents import Document # from langchain.schema.document import Document from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter as RCTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains.summarize import load_summarize_chain
from langchain_ollama import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
import warnings
warnings.filterwarnings('ignore')
lm = OllamaLLM(
model = 'llama3',
base_url = 'http://localhost:11434',
keep_alive = True
)
embeddings = OllamaEmbeddings(
model = "llama3",
)
# Load video information from YouTube
yt_loader = YLoader.from_youtube_url("https://www.youtube.com/watch?v=AyOnug-3OKM", add_video_info=False)
# video_data = yt_loader.load()
video_data = [Document(metadata={'source': 'AyOnug-3OKM'}, page_content="Imagine the wildest idea that you've\never had and you're curious about how it might scale to something that's\nhundred a thousand times bigger. This is a place where\nyou can get to do that. OpenAI is building the general\ntechnologies that can extend the reach of the human brain and our creativity. Dall-E really brings high\nquality creations to anyone. Our coding models serve as a coding\nassistant in tools like Co-Pilot. And GPT3 is a system that has a\nrobust understanding of language. Our mission is to create highly capable\nAI technologies and deploy them to the world, for the benefit of humanity in a\nway that is safe and beneficial for all. I think that AI will be a\ntechnological revolution on the scale of the agricultural, the\nindustrial, the computer revolution. And we have no goal other than\nthe creation and deployment of safe, beneficial AGI. We have a non-profit that governs\neverything, and the ability to block deployments that would make commercial\nsense but we think create safety issues. Our whole structure is around that. I really like the way OpenAI\nare trying to build AGI. Namely by harnessing large amounts\nof real world data to train powerful unsupervised models, and then steering\nthose with human alignment to create agents that can actually do useful things. OpenAI is truly unique in\nthat sense of being able to work in a very nimble fashion. At the same time having all the resources\nin the world to solve the problems. That mentality is what made it\nreally possible to ship Dall-E. At OpenAI we felt very strongly that\nengineering is an integral part to the success of any progress in AI. And that research needs to be infused\nwith engineering and engineering needs to be infused with research. In my first month I did a kind of an\noutrageously large scaling project. They were like yeah we have a\npiece of software that runs on one computer can you make it run\non 3000 and you have three weeks? So that was wild. I don't think you need a specific\nmachine learning background to come here to OpenAI and succeed. I had just taken one class in college. People were very willing\nto help and teach. And I worked on Co-Pilot, one of the\nmore successful products that are based on ML that people are using day-to-day. People are really both friendly\nand approachable and ambitious. There's this shared sense of we're\ngoing to do something very very big together, we've set our sights\nhigh, and also we're collaborating together to make that happen. The number of times I've been\nsitting at lunch and you overhear the other team talking about something\nand you learn a trick from that. Just kind of going on the signal of what\nis everyone else excited about is already a really strong sense of what do I need to\npay attention, to what do I need to learn. For the systems that we're developing\nto have a big impact we have to figure out how to make them\naccessible and how to do so safely. So it's very important to work with\na set of people that have diverse experiences, because we're really\nentering an era where we have to innovate in almost every aspect of society. I truly believe that the technology\nwe are creating is going to have a really profound impact. Can we create an environment\nwhere we have AI and technology broadly amplifying human wisdom? Giving humans another resource for\nconnecting with what's actually important to them, what actually gives them meaning. And we need a lot of different\nkinds of people to make that happen.")]
# video_id = 'W7ppd_RY-UE' # Replace with your video ID
# transcript = YouTubeTranscriptApi.get_transcript(video_id)
# Verify video data
if not video_data:
raise ValueError("Failed to load video data. Please check the YouTube URL and loader.")
# Split text content into documents
text_splitter = RCTextSplitter(chunk_size=1024, chunk_overlap=64)
documents = text_splitter.split_documents(video_data)
# Check if documents are valid
if not documents:
raise ValueError("No documents could be extracted. Check the text splitter configuration.")
print(f"# of documents = {len(documents)}")
transcript = documents[0].page_content
line_width = 120
print(textwrap.fill(transcript[:2000], line_width))
# Embed text documents using embeddings model
embedding_list = embeddings.embed_documents([doc.page_content for doc in documents])
# Handle empty embedding list
if not embedding_list:
raise ValueError("Embeddings could not be generated. Please check the embedding model and document content.")
print(f"{len(embedding_list)} embeddings")
print(f"Here's a sample of one: {embedding_list[0][:10]}...")
# Create Chroma vector store from documents and embeddings
vector_store = Chroma.from_documents(documents, embeddings)
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 2})
summary_chain = load_summarize_chain(llm=lm, chain_type='map_reduce', verbose=False)
# Initialize RetrievalQA chain for question answering
qa_chain = RetrievalQA.from_chain_type(llm=lm, chain_type="stuff", retriever=retriever, return_source_documents=True)
# Define a function for semantic question answering
def semantic_ask(question, print_results=True):
video_subset = qa_chain({"query": question})
context = video_subset
prompt = f"""
Answer the following question in a detailed manner, using information from the text below. If the answer is not in the text, say I don't know and do not generate your own response.
Question:
{question}
Text:
{context}
Question:
{question}
Answer:
"""
response = lm.predict(prompt).strip()
return {
"answer": response
}
# Define a function to get response for input text
def get_response(input_text):
response = semantic_ask(input_text)
return response
# Create Gradio interface for the question answering function
gr_interface = gr.Interface(fn=get_response, inputs="text", outputs="text")
gr_interface.launch()
|