Create data_insights.py
Browse files- utils/data_insights.py +50 -0
utils/data_insights.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pyvo as vo
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def fetch_exoplanet_data():
|
5 |
+
# Connect to NASA Exoplanet Archive TAP Service
|
6 |
+
tap_service = vo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")
|
7 |
+
|
8 |
+
# Query to fetch all columns from the pscomppars table
|
9 |
+
ex_query = """
|
10 |
+
SELECT TOP 10 pl_name, hostname, sy_snum, sy_pnum, discoverymethod, disc_year, disc_facility, pl_controv_flag, pl_orbper, pl_orbsmax, pl_rade, pl_bmasse, pl_orbeccen, pl_eqt, st_spectype, st_teff, st_rad, st_mass, ra, dec, sy_vmag
|
11 |
+
FROM pscomppars
|
12 |
+
"""
|
13 |
+
# Execute the query
|
14 |
+
qresult = tap_service.search(ex_query)
|
15 |
+
|
16 |
+
# Convert to a Pandas DataFrame
|
17 |
+
ptable = qresult.to_table()
|
18 |
+
exoplanet_data = ptable.to_pandas()
|
19 |
+
|
20 |
+
return exoplanet_data
|
21 |
+
|
22 |
+
def generate_data_insights(user_input, client, exoplanet_data, max_tokens=500, temperature=0.3):
|
23 |
+
"""
|
24 |
+
Generate insights by passing the user's input along with the exoplanet data to GPT-4.
|
25 |
+
"""
|
26 |
+
# Convert the dataframe to a readable format for GPT (e.g., CSV-style text)
|
27 |
+
data_as_text = exoplanet_data.to_csv(index=False) # CSV-style for better readability
|
28 |
+
|
29 |
+
# Create a prompt with the user query and the data sample
|
30 |
+
insights_prompt = (
|
31 |
+
f"Analyze the following user query and provide relevant insights based on the provided exoplanet data.\n\n"
|
32 |
+
f"User Query: {user_input}\n\n"
|
33 |
+
f"Exoplanet Data:\n{data_as_text}\n\n"
|
34 |
+
f"Please provide insights that are relevant to the user's query."
|
35 |
+
)
|
36 |
+
|
37 |
+
# Call GPT-4 to generate insights based on the data and user input
|
38 |
+
response = client.chat.completions.create(
|
39 |
+
model="gpt-4o",
|
40 |
+
messages=[
|
41 |
+
{"role": "system", "content": "You are an expert in analyzing astronomical data and generating insights."},
|
42 |
+
{"role": "user", "content": insights_prompt}
|
43 |
+
],
|
44 |
+
max_tokens=max_tokens,
|
45 |
+
temperature=temperature
|
46 |
+
)
|
47 |
+
|
48 |
+
# Extract and return GPT-4's insights
|
49 |
+
insights_from_data = response.choices[0].message.content.strip()
|
50 |
+
return insights_from_data
|