Spaces:

juancamval
/

graph_generator

Running

App Files Files Community

Added plot explanation - App running locally (w/ gemma3:12b from LMStudio) (#4)

by angelicaporto - opened 14 days ago

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

+168

-173

Files changed (1) hide show

app.py +168 -173

app.py CHANGED Viewed

@@ -1,173 +1,168 @@
-# ---------------------------------------------------------------------------------
-# Aplicación principal para cargar el modelo, generar prompts y explicar los datos
-# ---------------------------------------------------------------------------------
-import streamlit as st  # type: ignore
-import os
-import re
-import pandas as pd  # type: ignore
-from dotenv import load_dotenv  # type: ignore # Para cambios locales
-from supabase import create_client, Client  # type: ignore
-# from transformers import pipeline
-from pandasai import SmartDataframe  # type: ignore
-from pandasai.llm.local_llm import LocalLLM
-# ---------------------------------------------------------------------------------
-# Funciones auxiliares
-# ---------------------------------------------------------------------------------
-# Función para extracción de código Python del output del modelo
-def extract_code(llm_output):
-    code_match = re.search(r"```python\n(.*?)\n```", llm_output, re.DOTALL)
-    if code_match:
-        return code_match.group(1)
-    return None
-# Función para generar prompts de gráficos comparativos
-# Ejemplo de prompt generado:
-# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030)
-def generate_graph_prompt(country1, country2, metric, start_year, end_year):
-    prompt = f"""
-    You have access to a database of European countries with data on {metric}, labor force participation, population, and their predictions for future years.
-    Generate Python code using matplotlib to create a line graph showing the trend of {metric} for {country1} and {country2} from {start_year} to {end_year}.
-    Also, provide a concise explanation of what this graph represents for an end user who might not be familiar with the data.
-    """
-    return prompt
-# ---------------------------------------------------------------------------------
-# Configuración de conexión a Supabase
-# ---------------------------------------------------------------------------------
-# Cargar variables de entorno desde archivo .env
-load_dotenv()
-# Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit)
-SUPABASE_URL = os.getenv("SUPABASE_URL")
-SUPABASE_KEY = os.getenv("SUPABASE_KEY")
-# Crear cliente Supabase
-supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
-# Función para cargar datos de una tabla de Supabase
-# Tablas posibles: fertility, geo data, labor, population, predictions
-def load_data(table):
-    try:
-        if supabase:
-            response = supabase.from_(table).select("*").execute()
-            print(f"Response object: {response}")  # Inspeccionar objeto completo
-            print(f"Response type: {type(response)}")  # Verificar tipo de objeto
-            # Acceder a atributos relacionados a error o data
-            if hasattr(response, 'data'):
-                print(f"Response data: {response.data}")
-                return pd.DataFrame(response.data)
-            elif hasattr(response, 'status_code'):
-                print(f"Response status code: {response.status_code}")
-            elif hasattr(response, '_error'):  # Versiones antiguas
-                print(f"Older error attribute: {response._error}")
-                st.error(f"Error fetching data: {response._error}")
-                return pd.DataFrame()
-            else:
-                st.info("Response object does not have 'data' or known error attributes. Check the logs.")
-                return pd.DataFrame()
-        else:
-            st.error("Supabase client not initialized. Check environment variables.")
-            return pd.DataFrame()
-    except Exception as e:
-        st.error(f"An error occurred during data loading: {e}")
-        return pd.DataFrame()
-# ---------------------------------------------------------------------------------
-# Cargar datos iniciales
-# ---------------------------------------------------------------------------------
-# # Cargar datos desde la tabla "labor"
-data = load_data("labor")
-# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
-# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo
-# porque serían consultas más complejas.
-# labor_data = load_data("labor")
-# fertility_data = load_data("fertility")
-# population_data = load_data("population")
-# predictions_data = load_data("predictions")
-"""
-# Ej:
-# import os
-# import pandas as pd
-# from pandasai import SmartDatalake
-# employees_data = {
-#     'EmployeeID': [1, 2, 3, 4, 5],
-#     'Name': ['John', 'Emma', 'Liam', 'Olivia', 'William'],
-#     'Department': ['HR', 'Sales', 'IT', 'Marketing', 'Finance']
-# }
-# salaries_data = {
-#     'EmployeeID': [1, 2, 3, 4, 5],
-#     'Salary': [5000, 6000, 4500, 7000, 5500]
-# }
-# employees_df = pd.DataFrame(employees_data)
-# salaries_df = pd.DataFrame(salaries_data)
-# # By default, unless you choose a different LLM, it will use BambooLLM.
-# # You can get your free API key signing up at https://pandabi.ai (you can also configure it in your .env file)
-# os.environ["PANDASAI_API_KEY"] = "YOUR_API_KEY"
-# lake = SmartDatalake([employees_df, salaries_df])
-# lake.chat("Who gets paid the most?")
-# # Output: Olivia gets paid the most
-"""
-# ---------------------------------------------------------------------------------
-# Inicializar modelo LLM
-# ---------------------------------------------------------------------------------
-# # Pendiente cambiar Keys dependiendo del modelo que escojamos
-# model_name = "google/flan-t5-small"  # Probando modelos
-# generator = pipeline("text-generation", model=model_name)
-# ---------------------------------------------------------------------------------
-# Inicializar PandasAI con StarCoder
-# ---------------------------------------------------------------------------------
-# # Definir el modelo StarCoder desde Hugging Face
-# huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
-# llm = Starcoder(api_token=huggingface_token)
-ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
-                      model="gemma3:12b",
-                      temperature=0.1,
-                      max_tokens=8000)
-sdf = SmartDataframe(data, config={"llm": ollama_llm}) # DataFrame PandasAI-ready.
-# ---------------------------------------------------------------------------------
-# Configuración de la app en Streamlit
-# ---------------------------------------------------------------------------------
-# Título de la app
-st.title("_Europe GraphGen_  :blue[Graph generator] :flag-eu:")
-# Entrada de usuario para describir el gráfico
-user_input = st.text_input("What graphics do you have in mind")
-generate_button = st.button("Generate")
-# Procesar el input del usuario con PandasAI
-if generate_button and user_input:
-    st.dataframe(data.head())
-    with st.spinner('Generating answer...'):
-        try:
-            answer = sdf.chat(user_input)
-            st.write(answer)
-        except Exception as e:
-            st.error(f"Error generating answer: {e}")
-# TODO: Output estructurado si vemos que es necesario.

+# ---------------------------------------------------------------------------------
+# Aplicación principal para cargar el modelo, generar prompts y explicar los datos
+# ---------------------------------------------------------------------------------
+import streamlit as st  # type: ignore
+import os
+import re
+import pandas as pd  # type: ignore
+from dotenv import load_dotenv  # type: ignore # Para cambios locales
+from supabase import create_client, Client  # type: ignore
+from pandasai import Agent
+# from pandasai import SmartDataframe  # type: ignore
+from pandasai.llm.local_llm import LocalLLM
+from pandasai import Agent
+import matplotlib.pyplot as plt
+# ---------------------------------------------------------------------------------
+# Funciones auxiliares
+# ---------------------------------------------------------------------------------
+# Ejemplo de prompt generado:
+# generate_graph_prompt("Germany", "France", "fertility rate", 2020, 2030)
+def generate_graph_prompt(user_query):
+    prompt = f"""
+    You are a highly skilled data scientist working with European demographic data.
+    Given the user's request: "{user_query}"
+    1. Plot the relevant data according to the user's request.
+    2. After generating the plot, write a clear, human-readable explanation of the plot (no code).
+    3. Save the explanation in a variable called "explanation".
+    VERY IMPORTANT:
+    - Declare a result variable as a dictionary that includes:
+      - type = "plot"
+      - value = the path to the saved plot
+      - explanation = the explanation text you wrote
+    Example of expected result dictionary:
+    result = {{
+        "type": "plot",
+        "value": "temp_chart.png",
+        "explanation": explanation
+    }}
+    Only respond with valid Python code.
+    IMPORTANT: Stick strictly to using the data available in the database.
+    """
+    return prompt
+# TODO: Mejorar prompt
+# ---------------------------------------------------------------------------------
+# Configuración de conexión a Supabase
+# ---------------------------------------------------------------------------------
+# Cargar variables de entorno desde archivo .env
+load_dotenv()
+# Conectar las credenciales de Supabase (ubicadas en "Secrets" en Streamlit)
+SUPABASE_URL = os.getenv("SUPABASE_URL")
+SUPABASE_KEY = os.getenv("SUPABASE_KEY")
+# Crear cliente Supabase
+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
+# Función para cargar datos de una tabla de Supabase
+# Tablas posibles: fertility, geo data, labor, population, predictions
+def load_data(table):
+    try:
+        if supabase:
+            response = supabase.from_(table).select("*").execute()
+            print(f"Response object: {response}")  # Inspeccionar objeto completo
+            print(f"Response type: {type(response)}")  # Verificar tipo de objeto
+            # Acceder a atributos relacionados a error o data
+            if hasattr(response, 'data'):
+                print(f"Response data: {response.data}")
+                return pd.DataFrame(response.data)
+            elif hasattr(response, 'status_code'):
+                print(f"Response status code: {response.status_code}")
+            elif hasattr(response, '_error'):  # Versiones antiguas
+                print(f"Older error attribute: {response._error}")
+                st.error(f"Error fetching data: {response._error}")
+                return pd.DataFrame()
+            else:
+                st.info("Response object does not have 'data' or known error attributes. Check the logs.")
+                return pd.DataFrame()
+        else:
+            st.error("Supabase client not initialized. Check environment variables.")
+            return pd.DataFrame()
+    except Exception as e:
+        st.error(f"An error occurred during data loading: {e}")
+        return pd.DataFrame()
+# ---------------------------------------------------------------------------------
+# Cargar datos iniciales
+# ---------------------------------------------------------------------------------
+# # Cargar datos desde la tabla "labor"
+data = load_data("labor")
+# TODO: La idea es luego usar todas las tablas, cuando ya funcione.
+# Se puede si el modelo funciona con las gráficas, sino que toca mejorarlo
+# porque serían consultas más complejas.
+# labor_data = load_data("labor")
+# fertility_data = load_data("fertility")
+# population_data = load_data("population")
+# predictions_data = load_data("predictions")
+# ---------------------------------------------------------------------------------
+# Inicializar modelo
+# ---------------------------------------------------------------------------------
+# ollama_llm = LocalLLM(api_base="http://localhost:11434/v1",
+#                       model="gemma3:12b",
+#                       temperature=0.1,
+#                       max_tokens=8000)
+lm_studio_llm = LocalLLM(api_base="http://localhost:1234/v1") # el modelo es gemma-3-12b-it-qat
+agent = Agent([labor_data], config={"llm": lm_studio_llm}) # Inicializar agent
+# ---------------------------------------------------------------------------------
+# Configuración de la app en Streamlit
+# ---------------------------------------------------------------------------------
+# Título de la app
+st.title("_Europe GraphGen_  :blue[Graph generator] :flag-eu:")
+# TODO: Poner instrucciones al usuario sobre cómo hacer un muy buen prompt (sin tecnisismos, pensando en el usuario final)
+# Entrada de usuario para describir el gráfico
+user_input = st.text_input("What graphics do you have in mind")
+generate_button = st.button("Generate")
+# Procesar el input del usuario con PandasAI
+if generate_button and user_input:
+    with st.spinner('Generating answer...'):
+        try:
+            prompt = generate_graph_prompt(user_input)
+            answer = agent.chat(prompt)
+            explanation = agent.explain()
+            print(f"\nAnswer type: {type(answer)}\n")  # Verificar tipo de objeto
+            print(f"\nAnswer content: {answer}\n")  # Inspeccionar contenido de la respuesta
+            print(f"\n explanation type: {type(explanation)}\n")  # Verificar tipo de objeto
+            print(f"\n explanation content: {explanation}\n")
+            if isinstance(answer, str) and os.path.isfile(answer):
+                # Si el output es una ruta válida a imagen
+                im = plt.imread(answer)
+                st.image(im)
+                os.remove(answer)  # Limpiar archivo temporal
+                st.markdown(str(explanation))
+            else:
+                # Si no es una ruta válida, mostrar como texto
+                st.markdown(str(answer))
+        except Exception as e:
+            st.error(f"Error generating answer: {e}")
+# TODO: Output estructurado si vemos que es necesario.