""" OpenSearchAgent - A web search agent for answering questions using the internet This module implements a powerful agent that can search the web, navigate pages, and analyze content to answer complex questions. It uses the smolagents library to create a hierarchical agent system with a manager agent and a web browser agent. The agent can: - Search the web using Google (via SerpAPI) - Visit and navigate web pages - Find and analyze text content - Process PDF files and other document formats - Visualize content when needed Environment variables required: - SERPAPI_API_KEY: API key for SerpAPI (for web search) - OPENAI_API_KEY: API key for OpenAI (for the language model) - HF_TOKEN: Hugging Face token (for accessing HF resources) """ import os import threading from dotenv import load_dotenv from huggingface_hub import login from scripts.text_inspector_tool import TextInspectorTool from scripts.text_web_browser import ( ArchiveSearchTool, FinderTool, FindNextTool, PageDownTool, PageUpTool, SimpleTextBrowser, VisitTool, ) from scripts.visual_qa import visualizer from smolagents import ( CodeAgent, GoogleSearchTool, # InferenceClientModel, # Uncomment if you want to use InferenceClientModel LiteLLMModel, ToolCallingAgent, OpenAIServerModel, ) # Load environment variables and authenticate with Hugging Face load_dotenv(override=True) login(os.getenv("HF_TOKEN")) # Global configurations for the agent custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"} # User agent string for web requests to avoid being blocked by websites user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" # Browser configuration for web navigation BROWSER_CONFIG = { "viewport_size": 1024 * 5, # Large viewport for capturing more content "downloads_folder": "downloads_folder", # Where to store downloaded files "request_kwargs": { "headers": {"User-Agent": user_agent}, "timeout": 300, # Generous timeout for slow websites }, "serpapi_key": os.getenv("SERPAPI_API_KEY"), # API key for web search } # Create downloads folder if it doesn't exist os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True) class OpenSearchAgent: """ A portable agent that can search the web and answer questions. This class encapsulates the functionality of the web search agent. """ def __init__(self, model_id="o1"): """ Initialize the OpenSearchAgent with the specified model. Args: model_id (str): The model ID to use for the agent. Default is "o1". Other options include "gpt-4o", "claude-3-opus", etc. """ self.model_id = model_id self.agent = self._create_agent() def _create_agent(self): """ Create and configure the agent with the appropriate tools and models. This is where you can customize the agent by adding new tools or changing the configuration of existing ones. Returns: CodeAgent: The configured agent ready to answer questions. """ # Configure the model parameters model_params = { "model_id": self.model_id, "custom_role_conversions": custom_role_conversions, "max_completion_tokens": 8192, } if self.model_id == "o1": model_params["reasoning_effort"] = "high" print(f"Using model parameters: {model_params}") # Initialize the model # You can switch between different model providers here # model = LiteLLMModel(**model_params) # For using LiteLLM model = OpenAIServerModel(model_id="gpt-4o") # For using OpenAI directly # Configure text browser and tools text_limit = 100000 # Maximum text length to process browser = SimpleTextBrowser(**BROWSER_CONFIG) # ===== TOOL CONFIGURATION ===== # This is where you can add new tools to enhance the agent's capabilities WEB_TOOLS = [ GoogleSearchTool(provider="serpapi"), # Web search tool VisitTool(browser), # Visit URLs PageUpTool(browser), # Navigate up in a page PageDownTool(browser), # Navigate down in a page FinderTool(browser), # Find text in a page FindNextTool(browser), # Find next occurrence of text ArchiveSearchTool(browser), # Search web archives TextInspectorTool(model, text_limit), # Analyze text content # ===== ADD YOUR CUSTOM TOOLS HERE ===== # Example: # CustomTool(), # Your custom tool implementation # ImageAnalysisTool(), # Tool for analyzing images # DataExtractionTool(), # Tool for extracting structured data ] # Create the web browser agent that handles web interactions text_webbrowser_agent = ToolCallingAgent( model=model, tools=WEB_TOOLS, max_steps=20, # Maximum steps before stopping verbosity_level=2, # Level of logging detail planning_interval=4, # How often to re-plan name="search_agent", description="""A team member that will search the internet to answer your question. Ask him for all your questions that require browsing the web. Provide him as much context as possible, in particular if you need to search on a specific timeframe! And don't hesitate to provide him with a complex search task, like finding a difference between two webpages. Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords. """, provide_run_summary=True, # Provide summary of actions taken ) # Add additional instructions to the web browser agent text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files. If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it. Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information.""" # ===== MANAGER AGENT CONFIGURATION ===== # Create the manager agent that oversees the web browser agent # You can add more managed agents here for different specialized tasks manager_agent = CodeAgent( model=model, tools=[ visualizer, # Tool for visualization tasks TextInspectorTool(model, text_limit), # Text analysis tool # ===== ADD YOUR CUSTOM MANAGER TOOLS HERE ===== # Example: # DataAnalysisTool(), # Tool for analyzing data # ReportGeneratorTool(), # Tool for generating reports ], max_steps=12, # Maximum steps before stopping verbosity_level=2, # Level of logging detail additional_authorized_imports=["*"], # Allow all imports planning_interval=4, # How often to re-plan managed_agents=[ text_webbrowser_agent, # The web browser agent # ===== ADD YOUR CUSTOM MANAGED AGENTS HERE ===== # Example: # data_analysis_agent, # An agent specialized in data analysis # image_processing_agent, # An agent specialized in image processing ], ) return manager_agent def __call__(self, question: str) -> str: """ Run the agent on the given question. Args: question (str): The question to answer. Returns: str: The agent's answer to the question. """ print(f"OpenSearchAgent received question: {question[:50]}...") answer = self.agent.run(question) # Convert answer to string to ensure it's subscriptable answer_str = str(answer) print(f"OpenSearchAgent found answer: {answer_str[:100]}...") return answer_str def main(): """ Example usage of the OpenSearchAgent. This function demonstrates how to create and use the OpenSearchAgent. You can modify the question or model_id to test different configurations. """ # Define your question here question = "How many studio albums did Mercedes Sosa release before 2007?" # Create the agent agent = OpenSearchAgent(model_id="o1") # Run the agent answer = agent(question) print(f"Got this answer: {answer}") if __name__ == "__main__": main()