Spaces:

juancamval
/

graph_generator

Running

App Files Files Community

juancamval

Xilena commited on 1 day ago

Commit

6f70ef7

verified ·

1 Parent(s): 3ad2a1a

Update app.py (#6)

Browse files

- Update app.py (d0f2f5ad6b1531780cfdd6e5c81fb378dd2527cb)

Co-authored-by: Xilena Atenea Rojas Salazar <[email protected]>

Files changed (1) hide show

app.py +74 -99

app.py CHANGED Viewed

@@ -1,46 +1,46 @@
 # ---------------------------------------------------------------------------------
 # Aplicación principal para cargar el modelo, generar prompts y explicar los datos
 # ---------------------------------------------------------------------------------
 import streamlit as st  # type: ignore
 import os
 import re
 import pandas as pd  # type: ignore
-from dotenv import load_dotenv  # type: ignore
-# Para cambios locales
 from supabase import create_client, Client  # type: ignore
 # from pandasai import SmartDataframe  # type: ignore
-from pandasai import SmartDatalake  # type: ignore
-# Porque ya usamos más de un df (más de una tabla de nuestra db)
-from pandasai.llm.local_llm import LocalLLM # type: ignore
 from pandasai import Agent
-import plotly.graph_objects as go
 import matplotlib.pyplot as plt
 import time
 # ---------------------------------------------------------------------------------
 # Funciones auxiliares
 # ---------------------------------------------------------------------------------
 def generate_graph_prompt(user_query):
     prompt = f"""
             You are a senior data scientist analyzing European labor force data.
             Given the user's request: "{user_query}"
-            1. Plot the relevant data using graph_objects plotly:
-            - Use `df.query("geo == 'X'")` to filter the country, instead of chained comparisons.
-            - Avoid using filters like `df[df['geo'] == 'Germany']`.
-            - Use traces with 'line+markers' mode. (e.g, fig.add_trace(go.Scatter(x='X', y='Y',
-                    mode='lines+markers',
-                    name='Country_name')))
             - Include clear axis labels and a descriptive title.
             - Save the plot as an image file (e.g., temp_chart.png).
-            2. After plotting, write a **concise analytical summary** of the trend based on those 5 years. The summary should:
-            - Identify the **year with the largest increase** and the percent change.
-            - Identify the **year with the largest decrease** and the percent change.
-            - Provide a **brief overall trend interpretation** (e.g., steady growth, fluctuating, recovery, etc.).
-            - Avoid listing every year individually, summarize intelligently.
-            3. Store the summary in a variable named `explanation`.
             4. Return a result dictionary structured as follows:
             result = {{
@@ -48,15 +48,15 @@ def generate_graph_prompt(user_query):
                 "value": "temp_chart.png",
                 "explanation": explanation
             }}
             IMPORTANT: Use only the data available in the input DataFrame.
             """
     return prompt
-#TODO: Continuar mejorando el prompt
 # ---------------------------------------------------------------------------------
 # Configuración de conexión a Supabase
 # ---------------------------------------------------------------------------------
 # Cargar variables de entorno desde archivo .env
 load_dotenv()
@@ -67,6 +67,7 @@ SUPABASE_KEY = os.getenv("SUPABASE_KEY")
 # Crear cliente Supabase
 supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
 # Función para cargar datos de una tabla de Supabase
 # Tablas posibles: fertility, geo data, labor, population, predictions
 def load_data(table):
@@ -89,6 +90,7 @@ def load_data(table):
             else:
                 st.info("Response object does not have 'data' or known error attributes. Check the logs.")
                 return pd.DataFrame()
         else:
             st.error("Supabase client not initialized. Check environment variables.")
             return pd.DataFrame()
@@ -96,41 +98,38 @@ def load_data(table):
         st.error(f"An error occurred during data loading: {e}")
         return pd.DataFrame()
 # ---------------------------------------------------------------------------------
 # Cargar datos iniciales
 # ---------------------------------------------------------------------------------
-# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
-# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo  porque serían consultas más complejas.
 labor_data = load_data("labor")
 fertility_data = load_data("fertility")
 # population_data = load_data("population")
-# predictions_data = load_data("predictions")
-# TODO: Buscar la forma de disminuir la latencia (muchos datos = mucha latencia)
 # ---------------------------------------------------------------------------------
 # Inicializar LLM desde Ollama con PandasAI
 # ---------------------------------------------------------------------------------
-# ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
-#                       model="gemma3:12b",
-#                       temperature=0.1,
-#                       max_tokens=8000)
-lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
 # sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
 # sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
-# agent = Agent([labor_data], config={"llm": lm_studio_llm}) # TODO: Probar Agent con multiples dfs
 agent = Agent(
     [
         labor_data,
         fertility_data
     ],
     config={
-        "llm": lm_studio_llm,
         "enable_cache": False,
         "enable_filter_extraction": False  # evita errores de parseo
     }
@@ -140,67 +139,43 @@ agent = Agent(
 # Configuración de la app en Streamlit
 # ---------------------------------------------------------------------------------
-st.set_page_config(page_title="GraphGen", page_icon="🇪🇺")
-st.title("_Europe GraphGen_  :blue[Graph generator] :flag-eu:")
-st.caption("Mapping Europe's data: Your tool for custom demographic charts")
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-    st.session_state.messages.append({"role": "assistant", "content": "What graphic do you have in mind?"})
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-prompt = st.chat_input("Type your message here...", key="chat_input_bottom")
-if prompt:
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    with st.chat_message("assistant"):
-        with st.spinner('Generating answer...'):
-            try:
-                print(f"\nGenerating prompt...\n")
-                graph_prompt = generate_graph_prompt(prompt)
-                print(f"\nPrompt generated: {graph_prompt}\n")
-                start_time = time.time()
-                answer = agent.chat(graph_prompt)
-                print(f"\nAnswer type: {type(answer)}\n")  # Verificar tipo de objeto
-                print(f"\nAnswer content: {answer}\n")  # Inspeccionar contenido de la respuesta
-                print(f"\nFull result: {agent.last_result}\n")
-                full_result = agent.last_result
-                if full_result is not None and isinstance(full_result, dict):
-                    explanation = full_result.get("explanation", "")
-                    if isinstance(answer, str) and os.path.isfile(answer):
-                        # Si el output es una ruta válida a imagen
-                        im = plt.imread(answer)
-                        st.image(im)
-                        os.remove(answer)  # Limpiar archivo temporal
-                        if explanation:
-                            st.markdown(f"**Explanation:** {explanation}")
-                    else:
-                        # Si no es una ruta válida, mostrar como texto
-                        st.markdown(str(answer))
-                else:
-                    st.markdown("No se pudo generar una respuesta estructurada.")
-                    if full_result is not None:
-                        print(f"Error: `full_result` no es un diccionario: {full_result}")
-                    else:
-                        print("Error: `full_result` es None.")
-                elapsed_time = time.time() - start_time
-                print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
-            except Exception as e:
-                st.error(f"Error generating answer: {e}")
-if st.button("Clear chat"):
-    st.session_state.messages = []
-    st.session_state.messages.append({"role": "assistant", "content": "Chat has been cleared. What graphic do you have in mind now?"})
-    st.rerun()

 # ---------------------------------------------------------------------------------
 # Aplicación principal para cargar el modelo, generar prompts y explicar los datos
 # ---------------------------------------------------------------------------------
 import streamlit as st  # type: ignore
 import os
 import re
 import pandas as pd  # type: ignore
+from dotenv import load_dotenv  # type: ignore # Para cambios locales
 from supabase import create_client, Client  # type: ignore
 # from pandasai import SmartDataframe  # type: ignore
+from pandasai import SmartDatalake  # type: ignore # Porque ya usamos más de un df (más de una tabla de nuestra db)
+from pandasai.llm.local_llm import LocalLLM  # type: ignore
 from pandasai import Agent
 import matplotlib.pyplot as plt
 import time
 # ---------------------------------------------------------------------------------
 # Funciones auxiliares
 # ---------------------------------------------------------------------------------
 def generate_graph_prompt(user_query):
     prompt = f"""
             You are a senior data scientist analyzing European labor force data.
             Given the user's request: "{user_query}"
+            1. Plot the relevant data using matplotlib:
+            - Use pandas indexing with boolean conditions, not .query().
+            - For example: df[(df['geo'] == 'Germany') & (df['year'] >= 2018)]
             - Include clear axis labels and a descriptive title.
             - Save the plot as an image file (e.g., temp_chart.png).
+            2. After plotting, write a *concise analytical summary* of the trend based on those years. The summary should:
+            - Use .diff() followed by .idxmax() and .idxmin() to find where the largest change occurs.
+            - Use .loc[] to retrieve the corresponding year and value.
+            - Calculate percent changes safely (check for divide-by-zero).
+            - Avoid using .index() on float values.
+            3. Store the summary in a variable named explanation.
             4. Return a result dictionary structured as follows:
             result = {{
                 "value": "temp_chart.png",
                 "explanation": explanation
             }}
             IMPORTANT: Use only the data available in the input DataFrame.
             """
     return prompt
 # ---------------------------------------------------------------------------------
 # Configuración de conexión a Supabase
 # ---------------------------------------------------------------------------------
 # Cargar variables de entorno desde archivo .env
 load_dotenv()
 # Crear cliente Supabase
 supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
 # Función para cargar datos de una tabla de Supabase
 # Tablas posibles: fertility, geo data, labor, population, predictions
 def load_data(table):
             else:
                 st.info("Response object does not have 'data' or known error attributes. Check the logs.")
                 return pd.DataFrame()
         else:
             st.error("Supabase client not initialized. Check environment variables.")
             return pd.DataFrame()
         st.error(f"An error occurred during data loading: {e}")
         return pd.DataFrame()
 # ---------------------------------------------------------------------------------
 # Cargar datos iniciales
 # ---------------------------------------------------------------------------------
 labor_data = load_data("labor")
 fertility_data = load_data("fertility")
 # population_data = load_data("population")
+# predictions_data = load_data("predictions")
 # ---------------------------------------------------------------------------------
 # Inicializar LLM desde Ollama con PandasAI
 # ---------------------------------------------------------------------------------
+ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
+                      model="gemma3:12b",
+                      temperature=0.1,
+                      max_tokens=8000)
+# lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1")  # el modelo es gemma-3-12b-it-qat
 # sdl = SmartDatalake([labor_data, fertility_data, population_data, predictions_data], config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
 # sdl = SmartDatalake([labor_data, fertility_data], config={"llm": ollama_llm})
+# agent = Agent([labor_data], config={"llm": lm_studio_llm})
 agent = Agent(
     [
         labor_data,
         fertility_data
     ],
     config={
+        "llm": ollama_llm,
         "enable_cache": False,
         "enable_filter_extraction": False  # evita errores de parseo
     }
 # Configuración de la app en Streamlit
 # ---------------------------------------------------------------------------------
+# Título de la app
+st.title("Europe GraphGen  :blue[Graph generator] :flag-eu:")
+# Entrada de usuario para describir el gráfico
+user_input = st.chat_input("What graphics do you have in mind")
+if user_input:
+    with st.spinner('Generating answer...'):
+        try:
+            print(f"\nGenerating prompt...\n")
+            prompt = generate_graph_prompt(user_input)
+            print(f"\nPrompt generated: {prompt}\n")
+            start_time = time.time()
+            answer = agent.chat(prompt)
+            print(f"\nAnswer type: {type(answer)}\n")  # Verificar tipo de objeto
+            print(f"\nAnswer content: {answer}\n")  # Inspeccionar contenido de la respuesta
+            print(f"\nFull result: {agent.last_result}\n")
+            full_result = agent.last_result
+            explanation = full_result.get("explanation", "")
+            elapsed_time = time.time() - start_time
+            print(f"\nExecution time: {elapsed_time:.2f} seconds\n")
+            if isinstance(answer, str) and os.path.isfile(answer):
+                # Si el output es una ruta válida a imagen
+                im = plt.imread(answer)
+                st.image(im)
+                os.remove(answer)  # Limpiar archivo temporal
+                if explanation:
+                    st.markdown(f"*Explanation:* {explanation}")
+            else:
+                # Si no es una ruta válida, mostrar como texto
+                st.markdown(str(answer))
+        except Exception as e:
+            st.error(f"Error generating answer: {e}")