Spaces:
Runtime error
Runtime error
import os | |
import chainlit as cl | |
from langchain.memory.buffer import ConversationBufferMemory | |
from langchain_openai import ChatOpenAI, OpenAI | |
from langchain.chains import LLMChain | |
from prompts import default_prompt_template, doctor_prompt_template, default_prompt_template_no_sources, doctor_prompt_template_no_sources | |
from dotenv import load_dotenv | |
from chainlit.input_widget import Select, Switch, Slider | |
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate | |
from math import exp | |
import numpy as np | |
from typing import Any, Dict, List, Tuple | |
from langchain_core.output_parsers import BaseOutputParser | |
from difflib import SequenceMatcher | |
import requests | |
from bs4 import BeautifulSoup | |
import nltk | |
import re | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import json | |
llm = ChatOpenAI( | |
base_url="https://api.studio.nebius.com/v1/", | |
api_key=os.environ.get("NEBIUS_API_KEY"), | |
model = "meta-llama/Llama-3.3-70B-Instruct", | |
temperature = 0.7 | |
).bind(logprobs=True) | |
def get_wikipedia_page_content(page_title): | |
#scraping wikipedia pages with the Revisions API | |
page_title = re.sub(r"\s+", "", page_title).strip() | |
url = f"https://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&titles={page_title}&formatversion=2&rvprop=content&rvslots=*" | |
response = requests.get(url) | |
data = response.json() | |
return data["query"]["pages"][0]["revisions"][0]["slots"]["main"]["content"] | |
def test_scrape_sim(link, response): | |
tfidf_vectorizer = TfidfVectorizer() | |
try: | |
idx = link.rfind("/") | |
title = link[idx+1:] | |
tfidf_matrix = tfidf_vectorizer.fit_transform([get_wikipedia_page_content(title), response]) | |
# tfidf_matrix = tfidf_vectorizer.fit_transform([scrape_web_text(link), response]) | |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] | |
return cosine_sim*100 | |
except: | |
return 0 | |
config_file="config.json" | |
def get_config(): | |
with open(config_file, "r") as file: | |
return json.load(file) | |
def update_config(new_value): | |
config = get_config() | |
config["num_sources"] = new_value | |
with open(config_file, "w") as file: | |
json.dump(config, file, indent=4) | |
def load_config(): | |
with open("config.json","r") as file: | |
return json.load(file) | |
def generate_hypothetical_answer(question: str) -> str: | |
"""Have LLM generate a hypothetical answer to assist with bot response.""" | |
prompt = PromptTemplate( | |
input_variables=['question'], | |
template=""" | |
You are an AI assistant taked with generate a hypothetical answer to the following question. Your answer shoulld be detailed and comprehensive, | |
as if you had access to all relevant information. This hypothetical answer will be used to improve document retrieval, so include key terms and concepts | |
that might be relevant. Do not include phrases like "I think" or "It's possible that" - present the information as if it were factual. | |
Question:{question} | |
Hypothetical answer: | |
""", | |
) | |
return TransparentGPT_settings.llm.invoke(prompt.format(question=question)) | |
def highest_log_prob(vals): | |
"""Calculates the perplexity score (confidence) of bot response.""" | |
logprobs = [] | |
for token in vals: | |
logprobs += [token['logprob']] | |
average_log_prob = sum(logprobs)/len(logprobs) | |
return np.round(np.exp(average_log_prob)*100,2) | |