juancamval Xilena commited on
Commit
6f70ef7
verified
1 Parent(s): 3ad2a1a

Update app.py (#6)

Browse files

- Update app.py (d0f2f5ad6b1531780cfdd6e5c81fb378dd2527cb)


Co-authored-by: Xilena Atenea Rojas Salazar <[email protected]>

Files changed (1) hide show
  1. app.py +74 -99
app.py CHANGED
@@ -1,46 +1,46 @@
1
  # ---------------------------------------------------------------------------------
2
  # Aplicaci贸n principal para cargar el modelo, generar prompts y explicar los datos
3
  # ---------------------------------------------------------------------------------
 
4
  import streamlit as st # type: ignore
5
  import os
6
  import re
7
  import pandas as pd # type: ignore
8
- from dotenv import load_dotenv # type: ignore
9
- # Para cambios locales
10
  from supabase import create_client, Client # type: ignore
 
11
  # from pandasai import SmartDataframe # type: ignore
12
- from pandasai import SmartDatalake # type: ignore
13
- # Porque ya usamos m谩s de un df (m谩s de una tabla de nuestra db)
14
- from pandasai.llm.local_llm import LocalLLM # type: ignore
15
  from pandasai import Agent
16
- import plotly.graph_objects as go
17
  import matplotlib.pyplot as plt
18
  import time
19
 
 
20
  # ---------------------------------------------------------------------------------
21
  # Funciones auxiliares
22
  # ---------------------------------------------------------------------------------
23
 
 
24
  def generate_graph_prompt(user_query):
25
  prompt = f"""
26
  You are a senior data scientist analyzing European labor force data.
 
27
  Given the user's request: "{user_query}"
28
- 1. Plot the relevant data using graph_objects plotly:
29
- - Use `df.query("geo == 'X'")` to filter the country, instead of chained comparisons.
30
- - Avoid using filters like `df[df['geo'] == 'Germany']`.
31
- - Use traces with 'line+markers' mode. (e.g, fig.add_trace(go.Scatter(x='X', y='Y',
32
- mode='lines+markers',
33
- name='Country_name')))
34
  - Include clear axis labels and a descriptive title.
35
  - Save the plot as an image file (e.g., temp_chart.png).
36
-
37
- 2. After plotting, write a **concise analytical summary** of the trend based on those 5 years. The summary should:
38
- - Identify the **year with the largest increase** and the percent change.
39
- - Identify the **year with the largest decrease** and the percent change.
40
- - Provide a **brief overall trend interpretation** (e.g., steady growth, fluctuating, recovery, etc.).
41
- - Avoid listing every year individually, summarize intelligently.
42
-
43
- 3. Store the summary in a variable named `explanation`.
44
 
45
  4. Return a result dictionary structured as follows:
46
  result = {{
@@ -48,15 +48,15 @@ def generate_graph_prompt(user_query):
48
  "value": "temp_chart.png",
49
  "explanation": explanation
50
  }}
51
-
52
  IMPORTANT: Use only the data available in the input DataFrame.
53
  """
54
  return prompt
55
 
56
- #TODO: Continuar mejorando el prompt
57
  # ---------------------------------------------------------------------------------
58
  # Configuraci贸n de conexi贸n a Supabase
59
  # ---------------------------------------------------------------------------------
 
60
  # Cargar variables de entorno desde archivo .env
61
  load_dotenv()
62
 
@@ -67,6 +67,7 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
67
  # Crear cliente Supabase
68
  supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
69
 
 
70
  # Funci贸n para cargar datos de una tabla de Supabase
71
  # Tablas posibles: fertility, geo data, labor, population, predictions
72
  def load_data(table):
@@ -89,6 +90,7 @@ def load_data(table):
89
  else:
90
  st.info("Response object does not have 'data' or known error attributes. Check the logs.")
91
  return pd.DataFrame()
 
92
  else:
93
  st.error("Supabase client not initialized. Check environment variables.")
94
  return pd.DataFrame()
@@ -96,41 +98,38 @@ def load_data(table):
96
  st.error(f"An error occurred during data loading: {e}")
97
  return pd.DataFrame()
98
 
 
99
  # ---------------------------------------------------------------------------------
100
  # Cargar datos iniciales
101
  # ---------------------------------------------------------------------------------
102
 
103
- # TODO: La idea es luego usar todas las tablas, cuando ya funcione.
104
- # Se puede si el modelo funciona con las gr谩ficas, sino que toca mejorarlo porque ser铆an consultas m谩s complejas.
105
-
106
  labor_data = load_data("labor")
107
  fertility_data = load_data("fertility")
108
  # population_data = load_data("population")
109
- # predictions_data = load_data("predictions")
110
-
111
- # TODO: Buscar la forma de disminuir la latencia (muchos datos = mucha latencia)
112
 
113
  # ---------------------------------------------------------------------------------
114
  # Inicializar LLM desde Ollama con PandasAI
115
  # ---------------------------------------------------------------------------------
116
 
117
- # ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
118
- # model="gemma3:12b",
119
- # temperature=0.1,
120
- # max_tokens=8000)
 
 
121
 
122
- lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
123
  # sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
124
  # sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
125
 
126
- # agent = Agent([labor_data], config={"llm": lm_studio_llm}) # TODO: Probar Agent con multiples dfs
127
  agent = Agent(
128
  [
129
  labor_data,
130
  fertility_data
131
  ],
132
  config={
133
- "llm": lm_studio_llm,
134
  "enable_cache": False,
135
  "enable_filter_extraction": False # evita errores de parseo
136
  }
@@ -140,67 +139,43 @@ agent = Agent(
140
  # Configuraci贸n de la app en Streamlit
141
  # ---------------------------------------------------------------------------------
142
 
143
- st.set_page_config(page_title="GraphGen", page_icon="馃嚜馃嚭")
144
- st.title("_Europe GraphGen_ :blue[Graph generator] :flag-eu:")
145
- st.caption("Mapping Europe's data: Your tool for custom demographic charts")
146
-
147
- if "messages" not in st.session_state:
148
- st.session_state.messages = []
149
- st.session_state.messages.append({"role": "assistant", "content": "What graphic do you have in mind?"})
150
-
151
- for message in st.session_state.messages:
152
- with st.chat_message(message["role"]):
153
- st.markdown(message["content"])
154
-
155
- prompt = st.chat_input("Type your message here...", key="chat_input_bottom")
156
-
157
- if prompt:
158
- st.session_state.messages.append({"role": "user", "content": prompt})
159
- with st.chat_message("user"):
160
- st.markdown(prompt)
161
-
162
- with st.chat_message("assistant"):
163
- with st.spinner('Generating answer...'):
164
- try:
165
- print(f"\nGenerating prompt...\n")
166
- graph_prompt = generate_graph_prompt(prompt)
167
- print(f"\nPrompt generated: {graph_prompt}\n")
168
-
169
- start_time = time.time()
170
- answer = agent.chat(graph_prompt)
171
- print(f"\nAnswer type: {type(answer)}\n") # Verificar tipo de objeto
172
- print(f"\nAnswer content: {answer}\n") # Inspeccionar contenido de la respuesta
173
- print(f"\nFull result: {agent.last_result}\n")
174
-
175
- full_result = agent.last_result
176
-
177
- if full_result is not None and isinstance(full_result, dict):
178
- explanation = full_result.get("explanation", "")
179
- if isinstance(answer, str) and os.path.isfile(answer):
180
- # Si el output es una ruta v谩lida a imagen
181
- im = plt.imread(answer)
182
- st.image(im)
183
- os.remove(answer) # Limpiar archivo temporal
184
-
185
- if explanation:
186
- st.markdown(f"**Explanation:** {explanation}")
187
- else:
188
- # Si no es una ruta v谩lida, mostrar como texto
189
- st.markdown(str(answer))
190
- else:
191
- st.markdown("No se pudo generar una respuesta estructurada.")
192
- if full_result is not None:
193
- print(f"Error: `full_result` no es un diccionario: {full_result}")
194
- else:
195
- print("Error: `full_result` es None.")
196
-
197
- elapsed_time = time.time() - start_time
198
- print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
199
-
200
- except Exception as e:
201
- st.error(f"Error generating answer: {e}")
202
-
203
- if st.button("Clear chat"):
204
- st.session_state.messages = []
205
- st.session_state.messages.append({"role": "assistant", "content": "Chat has been cleared. What graphic do you have in mind now?"})
206
- st.rerun()
 
1
  # ---------------------------------------------------------------------------------
2
  # Aplicaci贸n principal para cargar el modelo, generar prompts y explicar los datos
3
  # ---------------------------------------------------------------------------------
4
+
5
  import streamlit as st # type: ignore
6
  import os
7
  import re
8
  import pandas as pd # type: ignore
9
+ from dotenv import load_dotenv # type: ignore # Para cambios locales
 
10
  from supabase import create_client, Client # type: ignore
11
+
12
  # from pandasai import SmartDataframe # type: ignore
13
+ from pandasai import SmartDatalake # type: ignore # Porque ya usamos m谩s de un df (m谩s de una tabla de nuestra db)
14
+ from pandasai.llm.local_llm import LocalLLM # type: ignore
 
15
  from pandasai import Agent
 
16
  import matplotlib.pyplot as plt
17
  import time
18
 
19
+
20
  # ---------------------------------------------------------------------------------
21
  # Funciones auxiliares
22
  # ---------------------------------------------------------------------------------
23
 
24
+
25
  def generate_graph_prompt(user_query):
26
  prompt = f"""
27
  You are a senior data scientist analyzing European labor force data.
28
+
29
  Given the user's request: "{user_query}"
30
+
31
+ 1. Plot the relevant data using matplotlib:
32
+ - Use pandas indexing with boolean conditions, not .query().
33
+ - For example: df[(df['geo'] == 'Germany') & (df['year'] >= 2018)]
 
 
34
  - Include clear axis labels and a descriptive title.
35
  - Save the plot as an image file (e.g., temp_chart.png).
36
+
37
+ 2. After plotting, write a *concise analytical summary* of the trend based on those years. The summary should:
38
+ - Use .diff() followed by .idxmax() and .idxmin() to find where the largest change occurs.
39
+ - Use .loc[] to retrieve the corresponding year and value.
40
+ - Calculate percent changes safely (check for divide-by-zero).
41
+ - Avoid using .index() on float values.
42
+
43
+ 3. Store the summary in a variable named explanation.
44
 
45
  4. Return a result dictionary structured as follows:
46
  result = {{
 
48
  "value": "temp_chart.png",
49
  "explanation": explanation
50
  }}
 
51
  IMPORTANT: Use only the data available in the input DataFrame.
52
  """
53
  return prompt
54
 
55
+
56
  # ---------------------------------------------------------------------------------
57
  # Configuraci贸n de conexi贸n a Supabase
58
  # ---------------------------------------------------------------------------------
59
+
60
  # Cargar variables de entorno desde archivo .env
61
  load_dotenv()
62
 
 
67
  # Crear cliente Supabase
68
  supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
69
 
70
+
71
  # Funci贸n para cargar datos de una tabla de Supabase
72
  # Tablas posibles: fertility, geo data, labor, population, predictions
73
  def load_data(table):
 
90
  else:
91
  st.info("Response object does not have 'data' or known error attributes. Check the logs.")
92
  return pd.DataFrame()
93
+
94
  else:
95
  st.error("Supabase client not initialized. Check environment variables.")
96
  return pd.DataFrame()
 
98
  st.error(f"An error occurred during data loading: {e}")
99
  return pd.DataFrame()
100
 
101
+
102
  # ---------------------------------------------------------------------------------
103
  # Cargar datos iniciales
104
  # ---------------------------------------------------------------------------------
105
 
 
 
 
106
  labor_data = load_data("labor")
107
  fertility_data = load_data("fertility")
108
  # population_data = load_data("population")
109
+ # predictions_data = load_data("predictions")
 
 
110
 
111
  # ---------------------------------------------------------------------------------
112
  # Inicializar LLM desde Ollama con PandasAI
113
  # ---------------------------------------------------------------------------------
114
 
115
+ ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
116
+ model="gemma3:12b",
117
+ temperature=0.1,
118
+ max_tokens=8000)
119
+
120
+ # lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
121
 
 
122
  # sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
123
  # sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
124
 
125
+ # agent = Agent([labor_data], config={"llm": lm_studio_llm})
126
  agent = Agent(
127
  [
128
  labor_data,
129
  fertility_data
130
  ],
131
  config={
132
+ "llm": ollama_llm,
133
  "enable_cache": False,
134
  "enable_filter_extraction": False # evita errores de parseo
135
  }
 
139
  # Configuraci贸n de la app en Streamlit
140
  # ---------------------------------------------------------------------------------
141
 
142
+ # T铆tulo de la app
143
+ st.title("Europe GraphGen :blue[Graph generator] :flag-eu:")
144
+
145
+ # Entrada de usuario para describir el gr谩fico
146
+ user_input = st.chat_input("What graphics do you have in mind")
147
+
148
+ if user_input:
149
+ with st.spinner('Generating answer...'):
150
+ try:
151
+ print(f"\nGenerating prompt...\n")
152
+ prompt = generate_graph_prompt(user_input)
153
+ print(f"\nPrompt generated: {prompt}\n")
154
+
155
+ start_time = time.time()
156
+
157
+ answer = agent.chat(prompt)
158
+ print(f"\nAnswer type: {type(answer)}\n") # Verificar tipo de objeto
159
+ print(f"\nAnswer content: {answer}\n") # Inspeccionar contenido de la respuesta
160
+ print(f"\nFull result: {agent.last_result}\n")
161
+
162
+ full_result = agent.last_result
163
+ explanation = full_result.get("explanation", "")
164
+
165
+ elapsed_time = time.time() - start_time
166
+ print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
167
+
168
+ if isinstance(answer, str) and os.path.isfile(answer):
169
+ # Si el output es una ruta v谩lida a imagen
170
+ im = plt.imread(answer)
171
+ st.image(im)
172
+ os.remove(answer) # Limpiar archivo temporal
173
+
174
+ if explanation:
175
+ st.markdown(f"*Explanation:* {explanation}")
176
+ else:
177
+ # Si no es una ruta v谩lida, mostrar como texto
178
+ st.markdown(str(answer))
179
+
180
+ except Exception as e:
181
+ st.error(f"Error generating answer: {e}")