Spaces:
Running
Running
Update app.py (#6)
Browse files- Update app.py (d0f2f5ad6b1531780cfdd6e5c81fb378dd2527cb)
Co-authored-by: Xilena Atenea Rojas Salazar <[email protected]>
app.py
CHANGED
@@ -1,46 +1,46 @@
|
|
1 |
# ---------------------------------------------------------------------------------
|
2 |
# Aplicaci贸n principal para cargar el modelo, generar prompts y explicar los datos
|
3 |
# ---------------------------------------------------------------------------------
|
|
|
4 |
import streamlit as st # type: ignore
|
5 |
import os
|
6 |
import re
|
7 |
import pandas as pd # type: ignore
|
8 |
-
from dotenv import load_dotenv # type: ignore
|
9 |
-
# Para cambios locales
|
10 |
from supabase import create_client, Client # type: ignore
|
|
|
11 |
# from pandasai import SmartDataframe # type: ignore
|
12 |
-
from pandasai import SmartDatalake # type: ignore
|
13 |
-
|
14 |
-
from pandasai.llm.local_llm import LocalLLM # type: ignore
|
15 |
from pandasai import Agent
|
16 |
-
import plotly.graph_objects as go
|
17 |
import matplotlib.pyplot as plt
|
18 |
import time
|
19 |
|
|
|
20 |
# ---------------------------------------------------------------------------------
|
21 |
# Funciones auxiliares
|
22 |
# ---------------------------------------------------------------------------------
|
23 |
|
|
|
24 |
def generate_graph_prompt(user_query):
|
25 |
prompt = f"""
|
26 |
You are a senior data scientist analyzing European labor force data.
|
|
|
27 |
Given the user's request: "{user_query}"
|
28 |
-
|
29 |
-
|
30 |
-
-
|
31 |
-
-
|
32 |
-
mode='lines+markers',
|
33 |
-
name='Country_name')))
|
34 |
- Include clear axis labels and a descriptive title.
|
35 |
- Save the plot as an image file (e.g., temp_chart.png).
|
36 |
-
|
37 |
-
2. After plotting, write a
|
38 |
-
-
|
39 |
-
-
|
40 |
-
-
|
41 |
-
- Avoid
|
42 |
-
|
43 |
-
3. Store the summary in a variable named
|
44 |
|
45 |
4. Return a result dictionary structured as follows:
|
46 |
result = {{
|
@@ -48,15 +48,15 @@ def generate_graph_prompt(user_query):
|
|
48 |
"value": "temp_chart.png",
|
49 |
"explanation": explanation
|
50 |
}}
|
51 |
-
|
52 |
IMPORTANT: Use only the data available in the input DataFrame.
|
53 |
"""
|
54 |
return prompt
|
55 |
|
56 |
-
|
57 |
# ---------------------------------------------------------------------------------
|
58 |
# Configuraci贸n de conexi贸n a Supabase
|
59 |
# ---------------------------------------------------------------------------------
|
|
|
60 |
# Cargar variables de entorno desde archivo .env
|
61 |
load_dotenv()
|
62 |
|
@@ -67,6 +67,7 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
|
|
67 |
# Crear cliente Supabase
|
68 |
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
69 |
|
|
|
70 |
# Funci贸n para cargar datos de una tabla de Supabase
|
71 |
# Tablas posibles: fertility, geo data, labor, population, predictions
|
72 |
def load_data(table):
|
@@ -89,6 +90,7 @@ def load_data(table):
|
|
89 |
else:
|
90 |
st.info("Response object does not have 'data' or known error attributes. Check the logs.")
|
91 |
return pd.DataFrame()
|
|
|
92 |
else:
|
93 |
st.error("Supabase client not initialized. Check environment variables.")
|
94 |
return pd.DataFrame()
|
@@ -96,41 +98,38 @@ def load_data(table):
|
|
96 |
st.error(f"An error occurred during data loading: {e}")
|
97 |
return pd.DataFrame()
|
98 |
|
|
|
99 |
# ---------------------------------------------------------------------------------
|
100 |
# Cargar datos iniciales
|
101 |
# ---------------------------------------------------------------------------------
|
102 |
|
103 |
-
# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
|
104 |
-
# Se puede si el modelo funciona con las gr谩ficas, sino que toca mejorarlo porque ser铆an consultas m谩s complejas.
|
105 |
-
|
106 |
labor_data = load_data("labor")
|
107 |
fertility_data = load_data("fertility")
|
108 |
# population_data = load_data("population")
|
109 |
-
# predictions_data = load_data("predictions")
|
110 |
-
|
111 |
-
# TODO: Buscar la forma de disminuir la latencia (muchos datos = mucha latencia)
|
112 |
|
113 |
# ---------------------------------------------------------------------------------
|
114 |
# Inicializar LLM desde Ollama con PandasAI
|
115 |
# ---------------------------------------------------------------------------------
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
|
|
|
|
121 |
|
122 |
-
lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
|
123 |
# sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
|
124 |
# sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
|
125 |
|
126 |
-
# agent = Agent([labor_data], config={"llm": lm_studio_llm})
|
127 |
agent = Agent(
|
128 |
[
|
129 |
labor_data,
|
130 |
fertility_data
|
131 |
],
|
132 |
config={
|
133 |
-
"llm":
|
134 |
"enable_cache": False,
|
135 |
"enable_filter_extraction": False # evita errores de parseo
|
136 |
}
|
@@ -140,67 +139,43 @@ agent = Agent(
|
|
140 |
# Configuraci贸n de la app en Streamlit
|
141 |
# ---------------------------------------------------------------------------------
|
142 |
|
143 |
-
|
144 |
-
st.title("
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
os.remove(answer) # Limpiar archivo temporal
|
184 |
-
|
185 |
-
if explanation:
|
186 |
-
st.markdown(f"**Explanation:** {explanation}")
|
187 |
-
else:
|
188 |
-
# Si no es una ruta v谩lida, mostrar como texto
|
189 |
-
st.markdown(str(answer))
|
190 |
-
else:
|
191 |
-
st.markdown("No se pudo generar una respuesta estructurada.")
|
192 |
-
if full_result is not None:
|
193 |
-
print(f"Error: `full_result` no es un diccionario: {full_result}")
|
194 |
-
else:
|
195 |
-
print("Error: `full_result` es None.")
|
196 |
-
|
197 |
-
elapsed_time = time.time() - start_time
|
198 |
-
print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
|
199 |
-
|
200 |
-
except Exception as e:
|
201 |
-
st.error(f"Error generating answer: {e}")
|
202 |
-
|
203 |
-
if st.button("Clear chat"):
|
204 |
-
st.session_state.messages = []
|
205 |
-
st.session_state.messages.append({"role": "assistant", "content": "Chat has been cleared. What graphic do you have in mind now?"})
|
206 |
-
st.rerun()
|
|
|
1 |
# ---------------------------------------------------------------------------------
|
2 |
# Aplicaci贸n principal para cargar el modelo, generar prompts y explicar los datos
|
3 |
# ---------------------------------------------------------------------------------
|
4 |
+
|
5 |
import streamlit as st # type: ignore
|
6 |
import os
|
7 |
import re
|
8 |
import pandas as pd # type: ignore
|
9 |
+
from dotenv import load_dotenv # type: ignore # Para cambios locales
|
|
|
10 |
from supabase import create_client, Client # type: ignore
|
11 |
+
|
12 |
# from pandasai import SmartDataframe # type: ignore
|
13 |
+
from pandasai import SmartDatalake # type: ignore # Porque ya usamos m谩s de un df (m谩s de una tabla de nuestra db)
|
14 |
+
from pandasai.llm.local_llm import LocalLLM # type: ignore
|
|
|
15 |
from pandasai import Agent
|
|
|
16 |
import matplotlib.pyplot as plt
|
17 |
import time
|
18 |
|
19 |
+
|
20 |
# ---------------------------------------------------------------------------------
|
21 |
# Funciones auxiliares
|
22 |
# ---------------------------------------------------------------------------------
|
23 |
|
24 |
+
|
25 |
def generate_graph_prompt(user_query):
|
26 |
prompt = f"""
|
27 |
You are a senior data scientist analyzing European labor force data.
|
28 |
+
|
29 |
Given the user's request: "{user_query}"
|
30 |
+
|
31 |
+
1. Plot the relevant data using matplotlib:
|
32 |
+
- Use pandas indexing with boolean conditions, not .query().
|
33 |
+
- For example: df[(df['geo'] == 'Germany') & (df['year'] >= 2018)]
|
|
|
|
|
34 |
- Include clear axis labels and a descriptive title.
|
35 |
- Save the plot as an image file (e.g., temp_chart.png).
|
36 |
+
|
37 |
+
2. After plotting, write a *concise analytical summary* of the trend based on those years. The summary should:
|
38 |
+
- Use .diff() followed by .idxmax() and .idxmin() to find where the largest change occurs.
|
39 |
+
- Use .loc[] to retrieve the corresponding year and value.
|
40 |
+
- Calculate percent changes safely (check for divide-by-zero).
|
41 |
+
- Avoid using .index() on float values.
|
42 |
+
|
43 |
+
3. Store the summary in a variable named explanation.
|
44 |
|
45 |
4. Return a result dictionary structured as follows:
|
46 |
result = {{
|
|
|
48 |
"value": "temp_chart.png",
|
49 |
"explanation": explanation
|
50 |
}}
|
|
|
51 |
IMPORTANT: Use only the data available in the input DataFrame.
|
52 |
"""
|
53 |
return prompt
|
54 |
|
55 |
+
|
56 |
# ---------------------------------------------------------------------------------
|
57 |
# Configuraci贸n de conexi贸n a Supabase
|
58 |
# ---------------------------------------------------------------------------------
|
59 |
+
|
60 |
# Cargar variables de entorno desde archivo .env
|
61 |
load_dotenv()
|
62 |
|
|
|
67 |
# Crear cliente Supabase
|
68 |
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
69 |
|
70 |
+
|
71 |
# Funci贸n para cargar datos de una tabla de Supabase
|
72 |
# Tablas posibles: fertility, geo data, labor, population, predictions
|
73 |
def load_data(table):
|
|
|
90 |
else:
|
91 |
st.info("Response object does not have 'data' or known error attributes. Check the logs.")
|
92 |
return pd.DataFrame()
|
93 |
+
|
94 |
else:
|
95 |
st.error("Supabase client not initialized. Check environment variables.")
|
96 |
return pd.DataFrame()
|
|
|
98 |
st.error(f"An error occurred during data loading: {e}")
|
99 |
return pd.DataFrame()
|
100 |
|
101 |
+
|
102 |
# ---------------------------------------------------------------------------------
|
103 |
# Cargar datos iniciales
|
104 |
# ---------------------------------------------------------------------------------
|
105 |
|
|
|
|
|
|
|
106 |
labor_data = load_data("labor")
|
107 |
fertility_data = load_data("fertility")
|
108 |
# population_data = load_data("population")
|
109 |
+
# predictions_data = load_data("predictions")
|
|
|
|
|
110 |
|
111 |
# ---------------------------------------------------------------------------------
|
112 |
# Inicializar LLM desde Ollama con PandasAI
|
113 |
# ---------------------------------------------------------------------------------
|
114 |
|
115 |
+
ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
|
116 |
+
model="gemma3:12b",
|
117 |
+
temperature=0.1,
|
118 |
+
max_tokens=8000)
|
119 |
+
|
120 |
+
# lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
|
121 |
|
|
|
122 |
# sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
|
123 |
# sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
|
124 |
|
125 |
+
# agent = Agent([labor_data], config={"llm": lm_studio_llm})
|
126 |
agent = Agent(
|
127 |
[
|
128 |
labor_data,
|
129 |
fertility_data
|
130 |
],
|
131 |
config={
|
132 |
+
"llm": ollama_llm,
|
133 |
"enable_cache": False,
|
134 |
"enable_filter_extraction": False # evita errores de parseo
|
135 |
}
|
|
|
139 |
# Configuraci贸n de la app en Streamlit
|
140 |
# ---------------------------------------------------------------------------------
|
141 |
|
142 |
+
# T铆tulo de la app
|
143 |
+
st.title("Europe GraphGen :blue[Graph generator] :flag-eu:")
|
144 |
+
|
145 |
+
# Entrada de usuario para describir el gr谩fico
|
146 |
+
user_input = st.chat_input("What graphics do you have in mind")
|
147 |
+
|
148 |
+
if user_input:
|
149 |
+
with st.spinner('Generating answer...'):
|
150 |
+
try:
|
151 |
+
print(f"\nGenerating prompt...\n")
|
152 |
+
prompt = generate_graph_prompt(user_input)
|
153 |
+
print(f"\nPrompt generated: {prompt}\n")
|
154 |
+
|
155 |
+
start_time = time.time()
|
156 |
+
|
157 |
+
answer = agent.chat(prompt)
|
158 |
+
print(f"\nAnswer type: {type(answer)}\n") # Verificar tipo de objeto
|
159 |
+
print(f"\nAnswer content: {answer}\n") # Inspeccionar contenido de la respuesta
|
160 |
+
print(f"\nFull result: {agent.last_result}\n")
|
161 |
+
|
162 |
+
full_result = agent.last_result
|
163 |
+
explanation = full_result.get("explanation", "")
|
164 |
+
|
165 |
+
elapsed_time = time.time() - start_time
|
166 |
+
print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
|
167 |
+
|
168 |
+
if isinstance(answer, str) and os.path.isfile(answer):
|
169 |
+
# Si el output es una ruta v谩lida a imagen
|
170 |
+
im = plt.imread(answer)
|
171 |
+
st.image(im)
|
172 |
+
os.remove(answer) # Limpiar archivo temporal
|
173 |
+
|
174 |
+
if explanation:
|
175 |
+
st.markdown(f"*Explanation:* {explanation}")
|
176 |
+
else:
|
177 |
+
# Si no es una ruta v谩lida, mostrar como texto
|
178 |
+
st.markdown(str(answer))
|
179 |
+
|
180 |
+
except Exception as e:
|
181 |
+
st.error(f"Error generating answer: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|