aquibmoin commited on
Commit
1a1b0fb
·
verified ·
1 Parent(s): a6d305c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -27
app.py CHANGED
@@ -16,6 +16,8 @@ from astroquery.nasa_ads import ADS
16
  import pyvo as vo
17
  import pandas as pd
18
  from pinecone import Pinecone
 
 
19
 
20
  # Load the NASA-specific bi-encoder model and tokenizer
21
  bi_encoder_model_name = "nasa-impact/nasa-smd-ibm-st-v2"
@@ -106,11 +108,10 @@ def retrieve_relevant_context(user_input, context_text, science_objectives="", t
106
 
107
  return retrieved_context
108
 
109
- def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
110
- # Define a prompt to ask GPT-4 to extract keywords and important terms
111
- keyword_prompt = f"Extract the most important keywords, scientific concepts, and parameters from the following user query:\n\n{user_input}"
112
 
113
- # Call GPT-4 to extract keywords based on the user prompt
114
  response = client.chat.completions.create(
115
  model="gpt-4",
116
  messages=[
@@ -121,35 +122,40 @@ def extract_keywords_with_gpt(user_input, max_tokens=100, temperature=0.3):
121
  temperature=temperature
122
  )
123
 
124
- # Extract the content from GPT-4's reply
125
  extracted_keywords = response.choices[0].message.content.strip()
126
 
127
- return extracted_keywords
128
 
129
- def fetch_nasa_ads_references(prompt):
130
- try:
131
- # Use the entire prompt for the query
132
- simplified_query = prompt
133
 
 
 
 
134
  # Query NASA ADS for relevant papers
135
- papers = ADS.query_simple(simplified_query)
136
-
137
  if not papers or len(papers) == 0:
138
- return [("No results found", "N/A", "N/A")]
139
-
140
  # Include authors in the references
141
- references = [
142
- (
143
- paper['title'][0],
144
- ", ".join(paper['author'][:3]) + (" et al." if len(paper['author']) > 3 else ""),
145
- paper['bibcode']
146
- )
147
- for paper in papers[:5] # Limit to 5 references
148
- ]
 
 
 
149
  return references
150
-
151
  except Exception as e:
152
- return [("Error fetching references", str(e), "N/A")]
 
153
 
154
  def fetch_exoplanet_data():
155
  # Connect to NASA Exoplanet Archive TAP Service
@@ -203,7 +209,7 @@ def generate_response(user_input, science_objectives="", relevant_context="", re
203
  if references:
204
  response_content = response.choices[0].message.content.strip()
205
  references_text = "\n\nADS References:\n" + "\n".join(
206
- [f"- {title} by {authors} (Bibcode: {bibcode})" for title, authors, bibcode in references]
207
  )
208
  return f"{response_content}\n{references_text}"
209
 
@@ -378,8 +384,12 @@ def chatbot(user_input, science_objectives="", context="", subdomain="", max_tok
378
 
379
  yield "Context Retrieved successfully ✅ ", None, None, None, None
380
 
381
- # Fetch NASA ADS references using the full prompt
382
- references = fetch_nasa_ads_references(subdomain)
 
 
 
 
383
 
384
  yield "🔄 Generating structured response using GPT-4o...", None, None, None, None
385
 
 
16
  import pyvo as vo
17
  import pandas as pd
18
  from pinecone import Pinecone
19
+ import logging
20
+ import re
21
 
22
  # Load the NASA-specific bi-encoder model and tokenizer
23
  bi_encoder_model_name = "nasa-impact/nasa-smd-ibm-st-v2"
 
108
 
109
  return retrieved_context
110
 
111
+ def extract_keywords_with_gpt(context, max_tokens=100, temperature=0.3):
112
+
113
+ keyword_prompt = f"Extract 3 most important scientific keywords from the following user query:\n\n{context}"
114
 
 
115
  response = client.chat.completions.create(
116
  model="gpt-4",
117
  messages=[
 
122
  temperature=temperature
123
  )
124
 
 
125
  extracted_keywords = response.choices[0].message.content.strip()
126
 
127
+ cleaned_keywords = re.sub(r'\d+\.\s*', '', extracted_keywords)
128
 
129
+ keywords_list = [kw.strip() for kw in cleaned_keywords.split("\n") if kw.strip()]
130
+
131
+ return keywords_list
 
132
 
133
+ def fetch_nasa_ads_references(ads_query):
134
+ """Fetch relevant NASA ADS papers and format them for readability."""
135
+ try:
136
  # Query NASA ADS for relevant papers
137
+ papers = ADS.query_simple(ads_query)
138
+
139
  if not papers or len(papers) == 0:
140
+ return [("No results found", "N/A", "N/A", "N/A", "N/A", "N/A")]
141
+
142
  # Include authors in the references
143
+ references = []
144
+ for paper in papers[:5]: # Limit to 5 references
145
+ title = paper.get('title', ['Title not available'])[0]
146
+ abstract = paper.get('abstract', 'Abstract not available')
147
+ authors = ", ".join(paper.get('author', [])[:3]) + (" et al." if len(paper.get('author', [])) > 3 else "")
148
+ bibcode = paper.get('bibcode', 'N/A')
149
+ pub = paper.get('pub', 'Unknown Journal')
150
+ pubdate = paper.get('pubdate', 'Unknown Date')
151
+
152
+ references.append((title, abstract, authors, bibcode, pub, pubdate))
153
+
154
  return references
155
+
156
  except Exception as e:
157
+ logging.error(f"Error fetching ADS references: {str(e)}")
158
+ return [("Error fetching references", "See logs for details", "N/A", "N/A", "N/A", "N/A")]
159
 
160
  def fetch_exoplanet_data():
161
  # Connect to NASA Exoplanet Archive TAP Service
 
209
  if references:
210
  response_content = response.choices[0].message.content.strip()
211
  references_text = "\n\nADS References:\n" + "\n".join(
212
+ [f"- {title} {authors} (Bibcode: {bibcode}) {pub} {pubdate}" for title, abstract, authors, bibcode, pub, pubdate in references])
213
  )
214
  return f"{response_content}\n{references_text}"
215
 
 
384
 
385
  yield "Context Retrieved successfully ✅ ", None, None, None, None
386
 
387
+ keywords = extract_keywords_with_gpt(context)
388
+
389
+ ads_query = " ".join(keywords)
390
+
391
+ # Fetch NASA ADS references using the user context
392
+ references = fetch_nasa_ads_references(ads_query)
393
 
394
  yield "🔄 Generating structured response using GPT-4o...", None, None, None, None
395