Update app.py
Browse files
app.py
CHANGED
@@ -16,6 +16,8 @@ from astroquery.nasa_ads import ADS
|
|
16 |
import pyvo as vo
|
17 |
import pandas as pd
|
18 |
from pinecone import Pinecone
|
|
|
|
|
19 |
|
20 |
# Load the NASA-specific bi-encoder model and tokenizer
|
21 |
bi_encoder_model_name = "nasa-impact/nasa-smd-ibm-st-v2"
|
@@ -106,11 +108,10 @@ def retrieve_relevant_context(user_input, context_text, science_objectives="", t
|
|
106 |
|
107 |
return retrieved_context
|
108 |
|
109 |
-
def extract_keywords_with_gpt(
|
110 |
-
|
111 |
-
keyword_prompt = f"Extract
|
112 |
|
113 |
-
# Call GPT-4 to extract keywords based on the user prompt
|
114 |
response = client.chat.completions.create(
|
115 |
model="gpt-4",
|
116 |
messages=[
|
@@ -121,35 +122,40 @@ def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
|
|
121 |
temperature=temperature
|
122 |
)
|
123 |
|
124 |
-
# Extract the content from GPT-4's reply
|
125 |
extracted_keywords = response.choices[0].message.content.strip()
|
126 |
|
127 |
-
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
simplified_query = prompt
|
133 |
|
|
|
|
|
|
|
134 |
# Query NASA ADS for relevant papers
|
135 |
-
papers = ADS.query_simple(
|
136 |
-
|
137 |
if not papers or len(papers) == 0:
|
138 |
-
return [("No results found", "N/A", "N/A")]
|
139 |
-
|
140 |
# Include authors in the references
|
141 |
-
references = [
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
)
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
149 |
return references
|
150 |
-
|
151 |
except Exception as e:
|
152 |
-
|
|
|
153 |
|
154 |
def fetch_exoplanet_data():
|
155 |
# Connect to NASA Exoplanet Archive TAP Service
|
@@ -203,7 +209,7 @@ def generate_response(user_input, science_objectives="", relevant_context="", re
|
|
203 |
if references:
|
204 |
response_content = response.choices[0].message.content.strip()
|
205 |
references_text = "\n\nADS References:\n" + "\n".join(
|
206 |
-
[f"- {title}
|
207 |
)
|
208 |
return f"{response_content}\n{references_text}"
|
209 |
|
@@ -378,8 +384,12 @@ def chatbot(user_input, science_objectives="", context="", subdomain="", max_tok
|
|
378 |
|
379 |
yield "Context Retrieved successfully ✅ ", None, None, None, None
|
380 |
|
381 |
-
|
382 |
-
|
|
|
|
|
|
|
|
|
383 |
|
384 |
yield "🔄 Generating structured response using GPT-4o...", None, None, None, None
|
385 |
|
|
|
16 |
import pyvo as vo
|
17 |
import pandas as pd
|
18 |
from pinecone import Pinecone
|
19 |
+
import logging
|
20 |
+
import re
|
21 |
|
22 |
# Load the NASA-specific bi-encoder model and tokenizer
|
23 |
bi_encoder_model_name = "nasa-impact/nasa-smd-ibm-st-v2"
|
|
|
108 |
|
109 |
return retrieved_context
|
110 |
|
111 |
+
def extract_keywords_with_gpt(context, max_tokens=100, temperature=0.3):
|
112 |
+
|
113 |
+
keyword_prompt = f"Extract 3 most important scientific keywords from the following user query:\n\n{context}"
|
114 |
|
|
|
115 |
response = client.chat.completions.create(
|
116 |
model="gpt-4",
|
117 |
messages=[
|
|
|
122 |
temperature=temperature
|
123 |
)
|
124 |
|
|
|
125 |
extracted_keywords = response.choices[0].message.content.strip()
|
126 |
|
127 |
+
cleaned_keywords = re.sub(r'\d+\.\s*', '', extracted_keywords)
|
128 |
|
129 |
+
keywords_list = [kw.strip() for kw in cleaned_keywords.split("\n") if kw.strip()]
|
130 |
+
|
131 |
+
return keywords_list
|
|
|
132 |
|
133 |
+
def fetch_nasa_ads_references(ads_query):
|
134 |
+
"""Fetch relevant NASA ADS papers and format them for readability."""
|
135 |
+
try:
|
136 |
# Query NASA ADS for relevant papers
|
137 |
+
papers = ADS.query_simple(ads_query)
|
138 |
+
|
139 |
if not papers or len(papers) == 0:
|
140 |
+
return [("No results found", "N/A", "N/A", "N/A", "N/A", "N/A")]
|
141 |
+
|
142 |
# Include authors in the references
|
143 |
+
references = []
|
144 |
+
for paper in papers[:5]: # Limit to 5 references
|
145 |
+
title = paper.get('title', ['Title not available'])[0]
|
146 |
+
abstract = paper.get('abstract', 'Abstract not available')
|
147 |
+
authors = ", ".join(paper.get('author', [])[:3]) + (" et al." if len(paper.get('author', [])) > 3 else "")
|
148 |
+
bibcode = paper.get('bibcode', 'N/A')
|
149 |
+
pub = paper.get('pub', 'Unknown Journal')
|
150 |
+
pubdate = paper.get('pubdate', 'Unknown Date')
|
151 |
+
|
152 |
+
references.append((title, abstract, authors, bibcode, pub, pubdate))
|
153 |
+
|
154 |
return references
|
155 |
+
|
156 |
except Exception as e:
|
157 |
+
logging.error(f"Error fetching ADS references: {str(e)}")
|
158 |
+
return [("Error fetching references", "See logs for details", "N/A", "N/A", "N/A", "N/A")]
|
159 |
|
160 |
def fetch_exoplanet_data():
|
161 |
# Connect to NASA Exoplanet Archive TAP Service
|
|
|
209 |
if references:
|
210 |
response_content = response.choices[0].message.content.strip()
|
211 |
references_text = "\n\nADS References:\n" + "\n".join(
|
212 |
+
[f"- {title} {authors} (Bibcode: {bibcode}) {pub} {pubdate}" for title, abstract, authors, bibcode, pub, pubdate in references])
|
213 |
)
|
214 |
return f"{response_content}\n{references_text}"
|
215 |
|
|
|
384 |
|
385 |
yield "Context Retrieved successfully ✅ ", None, None, None, None
|
386 |
|
387 |
+
keywords = extract_keywords_with_gpt(context)
|
388 |
+
|
389 |
+
ads_query = " ".join(keywords)
|
390 |
+
|
391 |
+
# Fetch NASA ADS references using the user context
|
392 |
+
references = fetch_nasa_ads_references(ads_query)
|
393 |
|
394 |
yield "🔄 Generating structured response using GPT-4o...", None, None, None, None
|
395 |
|