Final_Assignment_Agent_Course / agent_open_search.py
memonkeyv01's picture
Initial commit for Final Assignment Agent
539dfc6
raw
history blame contribute delete
9.24 kB
"""
OpenSearchAgent - A web search agent for answering questions using the internet
This module implements a powerful agent that can search the web, navigate pages,
and analyze content to answer complex questions. It uses the smolagents library
to create a hierarchical agent system with a manager agent and a web browser agent.
The agent can:
- Search the web using Google (via SerpAPI)
- Visit and navigate web pages
- Find and analyze text content
- Process PDF files and other document formats
- Visualize content when needed
Environment variables required:
- SERPAPI_API_KEY: API key for SerpAPI (for web search)
- OPENAI_API_KEY: API key for OpenAI (for the language model)
- HF_TOKEN: Hugging Face token (for accessing HF resources)
"""
import os
import threading
from dotenv import load_dotenv
from huggingface_hub import login
from scripts.text_inspector_tool import TextInspectorTool
from scripts.text_web_browser import (
ArchiveSearchTool,
FinderTool,
FindNextTool,
PageDownTool,
PageUpTool,
SimpleTextBrowser,
VisitTool,
)
from scripts.visual_qa import visualizer
from smolagents import (
CodeAgent,
GoogleSearchTool,
# InferenceClientModel, # Uncomment if you want to use InferenceClientModel
LiteLLMModel,
ToolCallingAgent,
OpenAIServerModel,
)
# Load environment variables and authenticate with Hugging Face
load_dotenv(override=True)
login(os.getenv("HF_TOKEN"))
# Global configurations for the agent
custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}
# User agent string for web requests to avoid being blocked by websites
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
# Browser configuration for web navigation
BROWSER_CONFIG = {
"viewport_size": 1024 * 5, # Large viewport for capturing more content
"downloads_folder": "downloads_folder", # Where to store downloaded files
"request_kwargs": {
"headers": {"User-Agent": user_agent},
"timeout": 300, # Generous timeout for slow websites
},
"serpapi_key": os.getenv("SERPAPI_API_KEY"), # API key for web search
}
# Create downloads folder if it doesn't exist
os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)
class OpenSearchAgent:
"""
A portable agent that can search the web and answer questions.
This class encapsulates the functionality of the web search agent.
"""
def __init__(self, model_id="o1"):
"""
Initialize the OpenSearchAgent with the specified model.
Args:
model_id (str): The model ID to use for the agent. Default is "o1".
Other options include "gpt-4o", "claude-3-opus", etc.
"""
self.model_id = model_id
self.agent = self._create_agent()
def _create_agent(self):
"""
Create and configure the agent with the appropriate tools and models.
This is where you can customize the agent by adding new tools or
changing the configuration of existing ones.
Returns:
CodeAgent: The configured agent ready to answer questions.
"""
# Configure the model parameters
model_params = {
"model_id": self.model_id,
"custom_role_conversions": custom_role_conversions,
"max_completion_tokens": 8192,
}
if self.model_id == "o1":
model_params["reasoning_effort"] = "high"
print(f"Using model parameters: {model_params}")
# Initialize the model
# You can switch between different model providers here
# model = LiteLLMModel(**model_params) # For using LiteLLM
model = OpenAIServerModel(model_id="gpt-4o") # For using OpenAI directly
# Configure text browser and tools
text_limit = 100000 # Maximum text length to process
browser = SimpleTextBrowser(**BROWSER_CONFIG)
# ===== TOOL CONFIGURATION =====
# This is where you can add new tools to enhance the agent's capabilities
WEB_TOOLS = [
GoogleSearchTool(provider="serpapi"), # Web search tool
VisitTool(browser), # Visit URLs
PageUpTool(browser), # Navigate up in a page
PageDownTool(browser), # Navigate down in a page
FinderTool(browser), # Find text in a page
FindNextTool(browser), # Find next occurrence of text
ArchiveSearchTool(browser), # Search web archives
TextInspectorTool(model, text_limit), # Analyze text content
# ===== ADD YOUR CUSTOM TOOLS HERE =====
# Example:
# CustomTool(), # Your custom tool implementation
# ImageAnalysisTool(), # Tool for analyzing images
# DataExtractionTool(), # Tool for extracting structured data
]
# Create the web browser agent that handles web interactions
text_webbrowser_agent = ToolCallingAgent(
model=model,
tools=WEB_TOOLS,
max_steps=20, # Maximum steps before stopping
verbosity_level=2, # Level of logging detail
planning_interval=4, # How often to re-plan
name="search_agent",
description="""A team member that will search the internet to answer your question.
Ask him for all your questions that require browsing the web.
Provide him as much context as possible, in particular if you need to search on a specific timeframe!
And don't hesitate to provide him with a complex search task, like finding a difference between two webpages.
Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords.
""",
provide_run_summary=True, # Provide summary of actions taken
)
# Add additional instructions to the web browser agent
text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files.
If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it.
Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information."""
# ===== MANAGER AGENT CONFIGURATION =====
# Create the manager agent that oversees the web browser agent
# You can add more managed agents here for different specialized tasks
manager_agent = CodeAgent(
model=model,
tools=[
visualizer, # Tool for visualization tasks
TextInspectorTool(model, text_limit), # Text analysis tool
# ===== ADD YOUR CUSTOM MANAGER TOOLS HERE =====
# Example:
# DataAnalysisTool(), # Tool for analyzing data
# ReportGeneratorTool(), # Tool for generating reports
],
max_steps=12, # Maximum steps before stopping
verbosity_level=2, # Level of logging detail
additional_authorized_imports=["*"], # Allow all imports
planning_interval=4, # How often to re-plan
managed_agents=[
text_webbrowser_agent, # The web browser agent
# ===== ADD YOUR CUSTOM MANAGED AGENTS HERE =====
# Example:
# data_analysis_agent, # An agent specialized in data analysis
# image_processing_agent, # An agent specialized in image processing
],
)
return manager_agent
def __call__(self, question: str) -> str:
"""
Run the agent on the given question.
Args:
question (str): The question to answer.
Returns:
str: The agent's answer to the question.
"""
print(f"OpenSearchAgent received question: {question[:50]}...")
answer = self.agent.run(question)
# Convert answer to string to ensure it's subscriptable
answer_str = str(answer)
print(f"OpenSearchAgent found answer: {answer_str[:100]}...")
return answer_str
def main():
"""
Example usage of the OpenSearchAgent.
This function demonstrates how to create and use the OpenSearchAgent.
You can modify the question or model_id to test different configurations.
"""
# Define your question here
question = "How many studio albums did Mercedes Sosa release before 2007?"
# Create the agent
agent = OpenSearchAgent(model_id="o1")
# Run the agent
answer = agent(question)
print(f"Got this answer: {answer}")
if __name__ == "__main__":
main()