Spaces:

rhuang03
/

TransparentGPT

Runtime error

App Files Files Community

TransparentGPT / methods.py

rhuang03

Upload 6 files

c3c8f07 verified about 2 months ago

raw

history blame contribute delete

3.49 kB

	import os
	import chainlit as cl
	from langchain.memory.buffer import ConversationBufferMemory
	from langchain_openai import ChatOpenAI, OpenAI
	from langchain.chains import LLMChain
	from prompts import default_prompt_template, doctor_prompt_template, default_prompt_template_no_sources, doctor_prompt_template_no_sources
	from dotenv import load_dotenv
	from chainlit.input_widget import Select, Switch, Slider
	from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
	from math import exp
	import numpy as np
	from typing import Any, Dict, List, Tuple
	from langchain_core.output_parsers import BaseOutputParser
	from difflib import SequenceMatcher
	import requests
	from bs4 import BeautifulSoup
	import nltk
	import re
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	import json

	llm = ChatOpenAI(
	base_url="https://api.studio.nebius.com/v1/",
	api_key=os.environ.get("NEBIUS_API_KEY"),
	model = "meta-llama/Llama-3.3-70B-Instruct",
	temperature = 0.7
	).bind(logprobs=True)

	def get_wikipedia_page_content(page_title):
	#scraping wikipedia pages with the Revisions API
	page_title = re.sub(r"\s+", "", page_title).strip()
	url = f"https://en.wikipedia.org/w/api.php?action=query&format=json&prop=revisions&titles={page_title}&formatversion=2&rvprop=content&rvslots=*"
	response = requests.get(url)
	data = response.json()
	return data["query"]["pages"][0]["revisions"][0]["slots"]["main"]["content"]

	def test_scrape_sim(link, response):
	tfidf_vectorizer = TfidfVectorizer()
	try:
	idx = link.rfind("/")
	title = link[idx+1:]
	tfidf_matrix = tfidf_vectorizer.fit_transform([get_wikipedia_page_content(title), response])
	# tfidf_matrix = tfidf_vectorizer.fit_transform([scrape_web_text(link), response])
	cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
	return cosine_sim*100
	except:
	return 0

	config_file="config.json"
	def get_config():
	with open(config_file, "r") as file:
	return json.load(file)
	def update_config(new_value):
	config = get_config()
	config["num_sources"] = new_value
	with open(config_file, "w") as file:
	json.dump(config, file, indent=4)

	def load_config():
	with open("config.json","r") as file:
	return json.load(file)

	def generate_hypothetical_answer(question: str) -> str:
	"""Have LLM generate a hypothetical answer to assist with bot response."""
	prompt = PromptTemplate(
	input_variables=['question'],
	template="""
	You are an AI assistant taked with generate a hypothetical answer to the following question. Your answer shoulld be detailed and comprehensive,
	as if you had access to all relevant information. This hypothetical answer will be used to improve document retrieval, so include key terms and concepts
	that might be relevant. Do not include phrases like "I think" or "It's possible that" - present the information as if it were factual.
	Question:{question}
	Hypothetical answer:
	""",
	)
	return TransparentGPT_settings.llm.invoke(prompt.format(question=question))

	def highest_log_prob(vals):
	"""Calculates the perplexity score (confidence) of bot response."""
	logprobs = []
	for token in vals:
	logprobs += [token['logprob']]
	average_log_prob = sum(logprobs)/len(logprobs)
	return np.round(np.exp(average_log_prob)*100,2)