MHamdan commited on
Commit
b35bc08
·
verified ·
1 Parent(s): 808069a

Upload tool

Browse files
Files changed (3) hide show
  1. app.py +6 -0
  2. requirements.txt +4 -0
  3. tool.py +161 -0
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from smolagents import launch_gradio_demo
2
+ from tool import SimpleTool
3
+
4
+ tool = SimpleTool()
5
+
6
+ launch_gradio_demo(tool)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ bs4
2
+ requests
3
+ transformers
4
+ smolagents
tool.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from typing import Any, Optional
3
+
4
+ class SimpleTool(Tool):
5
+ name = "analyze_content"
6
+ description = "Enhanced web content analyzer with multiple analysis modes."
7
+ inputs = {"input_text":{"type":"string","description":"URL or direct text to analyze."},"mode":{"type":"string","nullable":True,"description":"Analysis mode ('analyze', 'summarize', 'sentiment', 'topics')."}}
8
+ output_type = "string"
9
+
10
+ def forward(self, input_text: str, mode: str = "analyze") -> str:
11
+ """Enhanced web content analyzer with multiple analysis modes.
12
+
13
+ Args:
14
+ input_text: URL or direct text to analyze.
15
+ mode: Analysis mode ('analyze', 'summarize', 'sentiment', 'topics').
16
+
17
+ Returns:
18
+ str: JSON-formatted analysis results
19
+ """
20
+ import requests
21
+ from bs4 import BeautifulSoup
22
+ import re
23
+ from transformers import pipeline
24
+ import json
25
+
26
+ try:
27
+ # Setup request headers
28
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}
29
+
30
+ # Process input
31
+ if input_text.startswith(('http://', 'https://')):
32
+ response = requests.get(input_text, headers=headers, timeout=10)
33
+ soup = BeautifulSoup(response.text, 'html.parser')
34
+
35
+ # Clean page content
36
+ for tag in soup(['script', 'style', 'meta']):
37
+ tag.decompose()
38
+
39
+ title = soup.title.string if soup.title else "No title found"
40
+ content = soup.get_text()
41
+ else:
42
+ title = "Text Analysis"
43
+ content = input_text
44
+
45
+ # Clean text
46
+ clean_text = re.sub(r'\s+', ' ', content).strip()
47
+
48
+ if len(clean_text) < 100:
49
+ return json.dumps({
50
+ "status": "error",
51
+ "message": "Content too short for analysis (minimum 100 characters)"
52
+ })
53
+
54
+ # Initialize models
55
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
56
+ classifier = pipeline("text-classification",
57
+ model="nlptown/bert-base-multilingual-uncased-sentiment")
58
+
59
+ # Basic stats
60
+ stats = {
61
+ "title": title,
62
+ "characters": len(clean_text),
63
+ "words": len(clean_text.split()),
64
+ "paragraphs": len([p for p in clean_text.split("\n") if p.strip()]),
65
+ "reading_time": f"{len(clean_text.split()) // 200} minutes"
66
+ }
67
+
68
+ result = {"status": "success", "stats": stats}
69
+
70
+ # Mode-specific processing
71
+ if mode == "analyze":
72
+ # Get summary
73
+ summary = summarizer(clean_text[:1024], max_length=100, min_length=30)[0]['summary_text']
74
+
75
+ # Get overall sentiment
76
+ sentiment = classifier(clean_text[:512])[0]
77
+ score = int(sentiment['label'][0])
78
+ sentiment_text = ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1]
79
+
80
+ result.update({
81
+ "summary": summary,
82
+ "sentiment": {
83
+ "overall": sentiment_text,
84
+ "score": score,
85
+ "confidence": f"{score/5*100:.1f}%"
86
+ }
87
+ })
88
+
89
+ elif mode == "sentiment":
90
+ # Analyze paragraphs
91
+ paragraphs = [p for p in clean_text.split("\n") if len(p.strip()) > 50]
92
+ sentiments = []
93
+
94
+ for i, para in enumerate(paragraphs[:5]):
95
+ sent = classifier(para[:512])[0]
96
+ score = int(sent['label'][0])
97
+ sentiments.append({
98
+ "section": i + 1,
99
+ "text": para[:100] + "...",
100
+ "sentiment": ["Very Negative", "Negative", "Neutral", "Positive", "Very Positive"][score-1],
101
+ "score": score
102
+ })
103
+
104
+ result.update({
105
+ "sentiment_analysis": {
106
+ "sections": sentiments,
107
+ "total_sections": len(sentiments)
108
+ }
109
+ })
110
+
111
+ elif mode == "summarize":
112
+ # Process in chunks
113
+ chunks = [clean_text[i:i+1024] for i in range(0, min(len(clean_text), 3072), 1024)]
114
+ summaries = []
115
+
116
+ for chunk in chunks:
117
+ if len(chunk) > 100:
118
+ summary = summarizer(chunk, max_length=100, min_length=30)[0]['summary_text']
119
+ summaries.append(summary)
120
+
121
+ result.update({
122
+ "summaries": summaries,
123
+ "chunks_analyzed": len(summaries)
124
+ })
125
+
126
+ elif mode == "topics":
127
+ # Basic topic categorization
128
+ categories = {
129
+ "Technology": r"tech|software|hardware|digital|computer|AI|data",
130
+ "Business": r"business|market|finance|economy|industry",
131
+ "Science": r"science|research|study|discovery",
132
+ "Health": r"health|medical|medicine|wellness",
133
+ "General": r"news|world|people|life"
134
+ }
135
+
136
+ topic_scores = {}
137
+ for topic, pattern in categories.items():
138
+ matches = len(re.findall(pattern, clean_text.lower()))
139
+ topic_scores[topic] = matches
140
+
141
+ result.update({
142
+ "topic_analysis": {
143
+ "detected_topics": topic_scores,
144
+ "primary_topic": max(topic_scores.items(), key=lambda x: x[1])[0]
145
+ }
146
+ })
147
+
148
+ return json.dumps(result, indent=2)
149
+
150
+ except requests.exceptions.RequestException as e:
151
+ return json.dumps({
152
+ "status": "error",
153
+ "message": f"Failed to fetch content: {str(e)}",
154
+ "type": "request_error"
155
+ })
156
+ except Exception as e:
157
+ return json.dumps({
158
+ "status": "error",
159
+ "message": f"Analysis failed: {str(e)}",
160
+ "type": "general_error"
161
+ })