Spaces:
Running
Running
Update files
Browse filesTo simplify the agent
- app.py +90 -204
- requirements.txt +4 -8
- smart_web_analyzer.py +184 -0
- space.yml +3 -0
app.py
CHANGED
@@ -1,228 +1,114 @@
|
|
|
|
|
|
|
|
1 |
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
import tempfile
|
10 |
-
import os
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
def
|
17 |
-
"""
|
18 |
-
|
19 |
-
scores = []
|
20 |
-
|
21 |
-
for item in sentiment_data['sections']:
|
22 |
-
sections.append(f"Section {item['section']}")
|
23 |
-
scores.append(item['score'])
|
24 |
-
|
25 |
-
fig = go.Figure(data=[
|
26 |
-
go.Bar(
|
27 |
-
x=sections,
|
28 |
-
y=scores,
|
29 |
-
marker_color='rgb(55, 83, 109)',
|
30 |
-
text=scores,
|
31 |
-
textposition='auto'
|
32 |
-
)
|
33 |
-
])
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
yaxis_title='Sentiment Score (1-5)',
|
39 |
-
yaxis_range=[0, 5]
|
40 |
-
)
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
pdf = FPDF()
|
47 |
-
pdf.add_page()
|
48 |
|
49 |
-
#
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
pdf.cell(0, 10, f'Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}', 0, 1)
|
57 |
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
pdf.set_font('Arial', '', 10)
|
62 |
|
63 |
-
|
64 |
-
|
65 |
-
pdf.cell(0, 10, f'{key.title()}: {value}', 0, 1)
|
66 |
|
67 |
-
if
|
68 |
-
|
69 |
-
pdf.cell(0, 10, 'Summary:', 0, 1)
|
70 |
-
pdf.set_font('Arial', '', 10)
|
71 |
-
pdf.multi_cell(0, 10, analysis_result['summary'])
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
return
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
#
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
analysis_cache[cache_key],
|
88 |
-
"Content preview unavailable for cached results",
|
89 |
-
"Using cached results",
|
90 |
-
None
|
91 |
-
)
|
92 |
-
|
93 |
-
# Process in steps
|
94 |
-
progress(0, desc="Initializing analysis...")
|
95 |
-
time.sleep(0.5) # Simulate processing
|
96 |
-
|
97 |
-
progress(0.3, desc="Fetching content...")
|
98 |
-
result = analyzer(input_text, mode)
|
99 |
-
analysis_result = json.loads(result)
|
100 |
-
|
101 |
-
progress(0.6, desc="Analyzing content...")
|
102 |
-
|
103 |
-
# Create visualization if sentiment mode
|
104 |
-
chart = None
|
105 |
-
if mode == "sentiment" and analysis_result.get('status') == 'success':
|
106 |
-
progress(0.8, desc="Generating visualizations...")
|
107 |
-
chart = create_sentiment_chart(analysis_result['sentiment_analysis'])
|
108 |
-
|
109 |
-
# Cache results
|
110 |
-
analysis_cache[cache_key] = analysis_result
|
111 |
-
|
112 |
-
# Generate preview text
|
113 |
-
preview = analysis_result.get('stats', {}).get('title', '')
|
114 |
-
if 'summary' in analysis_result:
|
115 |
-
preview += f"\n\nSummary:\n{analysis_result['summary']}"
|
116 |
-
|
117 |
-
progress(1.0, desc="Complete!")
|
118 |
-
return analysis_result, preview, "Analysis complete!", chart
|
119 |
-
|
120 |
-
except Exception as e:
|
121 |
-
return (
|
122 |
-
{"status": "error", "message": str(e)},
|
123 |
-
"Error occurred",
|
124 |
-
f"Error: {str(e)}",
|
125 |
-
None
|
126 |
)
|
127 |
-
|
128 |
-
def create_interface():
|
129 |
-
with gr.Blocks(title="Smart Web Analyzer Plus", theme=gr.themes.Base()) as iface:
|
130 |
-
# Header
|
131 |
-
gr.Markdown("# 🚀 Smart Web Analyzer Plus")
|
132 |
-
gr.Markdown("""
|
133 |
-
Advanced content analysis with AI-powered insights:
|
134 |
-
* 📊 Comprehensive Analysis
|
135 |
-
* 😊 Detailed Sentiment Analysis
|
136 |
-
* 📝 Smart Summarization
|
137 |
-
* 🎯 Topic Detection
|
138 |
-
""")
|
139 |
|
140 |
-
# Theme toggle
|
141 |
with gr.Row():
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
147 |
)
|
148 |
|
149 |
-
|
150 |
-
with gr.Tabs():
|
151 |
-
# Analysis Tab
|
152 |
-
with gr.Tab("Analysis"):
|
153 |
-
with gr.Row():
|
154 |
-
with gr.Column():
|
155 |
-
input_text = gr.Textbox(
|
156 |
-
label="URL or Text to Analyze",
|
157 |
-
placeholder="Enter URL or paste text",
|
158 |
-
lines=5
|
159 |
-
)
|
160 |
-
mode = gr.Radio(
|
161 |
-
choices=["analyze", "summarize", "sentiment", "topics"],
|
162 |
-
value="analyze",
|
163 |
-
label="Analysis Mode"
|
164 |
-
)
|
165 |
-
analyze_btn = gr.Button("🔍 Analyze", variant="primary")
|
166 |
-
status = gr.Markdown("Status: Ready")
|
167 |
-
|
168 |
-
with gr.Column():
|
169 |
-
results = gr.JSON(label="Analysis Results")
|
170 |
-
chart = gr.Plot(label="Visualization", visible=False)
|
171 |
-
|
172 |
-
# Show/hide chart based on mode
|
173 |
-
mode.change(
|
174 |
-
lambda m: gr.update(visible=(m == "sentiment")),
|
175 |
-
inputs=[mode],
|
176 |
-
outputs=[chart]
|
177 |
-
)
|
178 |
-
|
179 |
-
# Preview Tab
|
180 |
-
with gr.Tab("Preview"):
|
181 |
-
preview = gr.Textbox(
|
182 |
-
label="Content Preview",
|
183 |
-
lines=10,
|
184 |
-
interactive=False
|
185 |
-
)
|
186 |
-
|
187 |
-
# Report Tab
|
188 |
-
with gr.Tab("Report"):
|
189 |
-
download_btn = gr.Button("📥 Download PDF Report")
|
190 |
-
pdf_output = gr.File(label="Generated Report")
|
191 |
|
192 |
-
#
|
193 |
-
gr.
|
194 |
-
examples=[
|
195 |
-
["https://www.artificialintelligence-news.com/2024/02/14/openai-anthropic-google-white-house-red-teaming/", "analyze", "light"],
|
196 |
-
["https://www.artificialintelligence-news.com/2024/02/13/ai-21-labs-wordtune-chatgpt-plugin/", "sentiment", "light"]
|
197 |
-
],
|
198 |
-
inputs=[input_text, mode, theme],
|
199 |
-
outputs=[results, preview, status, chart],
|
200 |
-
fn=process_content,
|
201 |
-
cache_examples=True
|
202 |
-
)
|
203 |
-
|
204 |
-
# Handle theme changes
|
205 |
-
theme.change(
|
206 |
-
lambda t: gr.update(theme=gr.themes.Base() if t == "light" else gr.themes.Soft()),
|
207 |
-
inputs=[theme],
|
208 |
-
outputs=[iface]
|
209 |
-
)
|
210 |
|
211 |
-
#
|
212 |
-
|
213 |
-
fn=
|
214 |
-
inputs=[
|
215 |
-
outputs=
|
216 |
)
|
217 |
|
218 |
-
#
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
|
|
|
|
|
|
|
|
223 |
)
|
224 |
-
|
225 |
-
return iface
|
226 |
|
227 |
-
|
228 |
-
|
|
|
|
1 |
+
# app.py
|
2 |
+
"""
|
3 |
+
Gradio App for Smart Web Analyzer Plus
|
4 |
|
5 |
+
Key Features:
|
6 |
+
- Accepts a URL
|
7 |
+
- Lets users select analysis modes (Clean Text, Summarization, Sentiment, Topic)
|
8 |
+
- Fetches and processes content
|
9 |
+
- Displays JSON output with results
|
10 |
+
- Includes example URLs
|
11 |
+
"""
|
|
|
|
|
12 |
|
13 |
+
import gradio as gr
|
14 |
+
from smart_web_analyzer import (
|
15 |
+
fetch_web_content,
|
16 |
+
clean_text,
|
17 |
+
summarize_text,
|
18 |
+
analyze_sentiment,
|
19 |
+
detect_topic,
|
20 |
+
preview_clean_text,
|
21 |
+
)
|
22 |
|
23 |
+
def analyze_url(url, modes):
|
24 |
+
"""
|
25 |
+
Fetches web content and performs selected analyses (modes).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
Parameters:
|
28 |
+
url (str): URL to analyze
|
29 |
+
modes (list): list of selected modes
|
|
|
|
|
|
|
30 |
|
31 |
+
Returns:
|
32 |
+
dict: a dictionary of results or an error message
|
33 |
+
"""
|
34 |
+
results = {}
|
|
|
|
|
35 |
|
36 |
+
# Attempt to fetch the web content
|
37 |
+
try:
|
38 |
+
html_content = fetch_web_content(url)
|
39 |
+
except Exception as e:
|
40 |
+
return {"error": str(e)} # show error in JSON output
|
41 |
|
42 |
+
# Clean the content
|
43 |
+
cleaned = clean_text(html_content)
|
|
|
44 |
|
45 |
+
# Perform selected analyses
|
46 |
+
if "Clean Text Preview" in modes:
|
47 |
+
results["Clean Text Preview"] = preview_clean_text(cleaned, max_chars=500)
|
|
|
48 |
|
49 |
+
if "Summarization" in modes:
|
50 |
+
results["Summarization"] = summarize_text(cleaned)
|
|
|
51 |
|
52 |
+
if "Sentiment Analysis" in modes:
|
53 |
+
results["Sentiment Analysis"] = analyze_sentiment(cleaned)
|
|
|
|
|
|
|
54 |
|
55 |
+
if "Topic Detection" in modes:
|
56 |
+
topics = detect_topic(cleaned)
|
57 |
+
if isinstance(topics, dict) and "error" in topics:
|
58 |
+
results["Topic Detection"] = topics["error"]
|
59 |
+
else:
|
60 |
+
# Format detected topics into a readable string
|
61 |
+
# for the output
|
62 |
+
topics_formatted = "\n".join([f"{t}: {s:.2f}" for t, s in topics.items()])
|
63 |
+
results["Topic Detection"] = topics_formatted
|
64 |
|
65 |
+
return results
|
66 |
|
67 |
+
# Build Gradio Interface
|
68 |
+
def build_app():
|
69 |
+
with gr.Blocks(title="Smart Web Analyzer Plus") as demo:
|
70 |
+
gr.Markdown("# Smart Web Analyzer Plus")
|
71 |
+
gr.Markdown(
|
72 |
+
"Analyze web content for summarization, sentiment, and topics. "
|
73 |
+
"Choose your analysis modes and enter a URL below."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
|
|
76 |
with gr.Row():
|
77 |
+
url_input = gr.Textbox(
|
78 |
+
label="Enter URL",
|
79 |
+
placeholder="https://example.com",
|
80 |
+
lines=1
|
81 |
+
)
|
82 |
+
mode_selector = gr.CheckboxGroup(
|
83 |
+
label="Select Analysis Modes",
|
84 |
+
choices=["Clean Text Preview", "Summarization", "Sentiment Analysis", "Topic Detection"],
|
85 |
+
value=["Clean Text Preview", "Summarization", "Sentiment Analysis", "Topic Detection"]
|
86 |
)
|
87 |
|
88 |
+
output_box = gr.JSON(label="Analysis Results")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
# Button to run analysis
|
91 |
+
analyze_button = gr.Button("Analyze")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
# On click, run the analysis function
|
94 |
+
analyze_button.click(
|
95 |
+
fn=analyze_url,
|
96 |
+
inputs=[url_input, mode_selector],
|
97 |
+
outputs=output_box
|
98 |
)
|
99 |
|
100 |
+
# Example URLs
|
101 |
+
gr.Markdown("### Example URLs")
|
102 |
+
gr.Examples(
|
103 |
+
examples=[
|
104 |
+
["https://www.artificialintelligence-news.com/2024/02/14/openai-anthropic-google-white-house-red-teaming/"],
|
105 |
+
["https://www.artificialintelligence-news.com/2024/02/13/ai-21-labs-wordtune-chatgpt-plugin/"]
|
106 |
+
],
|
107 |
+
inputs=url_input,
|
108 |
+
label="Click an example to analyze"
|
109 |
)
|
110 |
+
return demo
|
|
|
111 |
|
112 |
+
if __name__ == "__main__":
|
113 |
+
demo_app = build_app()
|
114 |
+
demo_app.launch()
|
requirements.txt
CHANGED
@@ -1,9 +1,5 @@
|
|
1 |
-
|
2 |
gradio>=4.0.0
|
3 |
-
beautifulsoup4>=4.
|
4 |
-
requests>=2.
|
5 |
-
|
6 |
-
|
7 |
-
torch
|
8 |
-
plotly
|
9 |
-
fpdf
|
|
|
|
|
1 |
gradio>=4.0.0
|
2 |
+
beautifulsoup4>=4.12.0
|
3 |
+
requests>=2.31.0
|
4 |
+
transformers>=4.40.0
|
5 |
+
torch>=2.2.0
|
|
|
|
|
|
smart_web_analyzer.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# smart_web_analyzer.py
|
2 |
+
"""
|
3 |
+
Smart Web Analyzer Plus - Core Functionality
|
4 |
+
|
5 |
+
Features:
|
6 |
+
- Web content fetching with custom User-Agent (to avoid 403 errors)
|
7 |
+
- Basic HTML cleaning (no removal of script/style)
|
8 |
+
- Summarization using "facebook/bart-large-cnn"
|
9 |
+
- Sentiment analysis using "nlptown/bert-base-multilingual-uncased-sentiment"
|
10 |
+
- Topic detection via zero-shot classification ("facebook/bart-large-mnli")
|
11 |
+
- Preview text for display
|
12 |
+
"""
|
13 |
+
|
14 |
+
import requests
|
15 |
+
from bs4 import BeautifulSoup
|
16 |
+
from transformers import pipeline
|
17 |
+
|
18 |
+
# 1) Summarization Pipeline
|
19 |
+
try:
|
20 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
21 |
+
except Exception as e:
|
22 |
+
summarizer = None
|
23 |
+
print("Error loading summarization model:", e)
|
24 |
+
|
25 |
+
# 2) Sentiment Analysis Pipeline
|
26 |
+
try:
|
27 |
+
sentiment_analyzer = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
|
28 |
+
except Exception as e:
|
29 |
+
sentiment_analyzer = None
|
30 |
+
print("Error loading sentiment analysis model:", e)
|
31 |
+
|
32 |
+
# 3) Zero-Shot Topic Detection Pipeline
|
33 |
+
try:
|
34 |
+
zero_shot_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
35 |
+
except Exception as e:
|
36 |
+
zero_shot_classifier = None
|
37 |
+
print("Error loading topic detection model:", e)
|
38 |
+
|
39 |
+
|
40 |
+
def fetch_web_content(url):
|
41 |
+
"""
|
42 |
+
Fetches the HTML content of a given URL, using a spoofed User-Agent.
|
43 |
+
|
44 |
+
Parameters:
|
45 |
+
url (str): The URL to fetch.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
str: HTML content if successful.
|
49 |
+
|
50 |
+
Raises:
|
51 |
+
ValueError: if the URL is invalid.
|
52 |
+
Exception: if the request fails (network error, 4xx/5xx, etc.).
|
53 |
+
"""
|
54 |
+
# Validate input URL
|
55 |
+
if not url.startswith("http://") and not url.startswith("https://"):
|
56 |
+
raise ValueError("Invalid URL. URL must start with http:// or https://")
|
57 |
+
|
58 |
+
# Spoof common browser User-Agent to reduce 403 errors
|
59 |
+
headers = {
|
60 |
+
"User-Agent": (
|
61 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
|
62 |
+
"(KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
|
63 |
+
)
|
64 |
+
}
|
65 |
+
|
66 |
+
try:
|
67 |
+
response = requests.get(url, headers=headers, timeout=10)
|
68 |
+
response.raise_for_status() # Raises HTTPError for 4XX or 5XX
|
69 |
+
return response.text
|
70 |
+
except requests.exceptions.RequestException as e:
|
71 |
+
# Catch all exceptions from the requests library
|
72 |
+
raise Exception(f"Error fetching the URL: {e}")
|
73 |
+
|
74 |
+
|
75 |
+
def clean_text(html_content):
|
76 |
+
"""
|
77 |
+
Cleans HTML content to extract raw text (keeps <script> and <style>).
|
78 |
+
|
79 |
+
Parameters:
|
80 |
+
html_content (str): The raw HTML content.
|
81 |
+
|
82 |
+
Returns:
|
83 |
+
str: Cleaned text extracted from the HTML.
|
84 |
+
"""
|
85 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
86 |
+
# NOTE: We are NOT removing <script> or <style> tags here:
|
87 |
+
# for script_or_style in soup(["script", "style"]):
|
88 |
+
# script_or_style.decompose()
|
89 |
+
|
90 |
+
text = soup.get_text(separator=" ")
|
91 |
+
# Collapse multiple whitespaces
|
92 |
+
cleaned_text = " ".join(text.split())
|
93 |
+
return cleaned_text
|
94 |
+
|
95 |
+
|
96 |
+
def summarize_text(text, max_length=130, min_length=30):
|
97 |
+
"""
|
98 |
+
Summarizes text using the facebook/bart-large-cnn model.
|
99 |
+
|
100 |
+
Parameters:
|
101 |
+
text (str): The text to summarize.
|
102 |
+
max_length (int): Maximum length for the summary.
|
103 |
+
min_length (int): Minimum length for the summary.
|
104 |
+
|
105 |
+
Returns:
|
106 |
+
str: The summarized text or an error message.
|
107 |
+
"""
|
108 |
+
if not summarizer:
|
109 |
+
return "Summarization model is not available."
|
110 |
+
|
111 |
+
try:
|
112 |
+
summary_list = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
|
113 |
+
return summary_list[0]["summary_text"]
|
114 |
+
except Exception as e:
|
115 |
+
return f"Error during summarization: {e}"
|
116 |
+
|
117 |
+
|
118 |
+
def analyze_sentiment(text):
|
119 |
+
"""
|
120 |
+
Analyzes sentiment using nlptown/bert-base-multilingual-uncased-sentiment.
|
121 |
+
|
122 |
+
Parameters:
|
123 |
+
text (str): Text for sentiment analysis.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
str: A label describing sentiment (e.g., '4 stars') or an error message.
|
127 |
+
"""
|
128 |
+
if not sentiment_analyzer:
|
129 |
+
return "Sentiment analysis model is not available."
|
130 |
+
|
131 |
+
try:
|
132 |
+
results = sentiment_analyzer(text)
|
133 |
+
# Typically returns a list of results; we grab the first
|
134 |
+
label = results[0]["label"]
|
135 |
+
return label
|
136 |
+
except Exception as e:
|
137 |
+
return f"Error during sentiment analysis: {e}"
|
138 |
+
|
139 |
+
|
140 |
+
def detect_topic(text):
|
141 |
+
"""
|
142 |
+
Detects topics in text using zero-shot classification via facebook/bart-large-mnli.
|
143 |
+
|
144 |
+
Parameters:
|
145 |
+
text (str): The text to analyze.
|
146 |
+
|
147 |
+
Returns:
|
148 |
+
dict or str: Dictionary of topics & confidence scores OR an error string.
|
149 |
+
"""
|
150 |
+
if not zero_shot_classifier:
|
151 |
+
return {"error": "Topic detection model is not available."}
|
152 |
+
|
153 |
+
# Example candidate labels
|
154 |
+
candidate_labels = ["Politics", "Technology", "Business", "Entertainment", "Science", "Health", "Sports", "Education"]
|
155 |
+
|
156 |
+
try:
|
157 |
+
result = zero_shot_classifier(text, candidate_labels)
|
158 |
+
# result['labels'] are sorted by confidence
|
159 |
+
# We'll map each label to its corresponding score
|
160 |
+
topics = {
|
161 |
+
label: score for label, score
|
162 |
+
in zip(result["labels"], result["scores"])
|
163 |
+
}
|
164 |
+
return topics
|
165 |
+
except Exception as e:
|
166 |
+
return {"error": f"Error during topic detection: {e}"}
|
167 |
+
|
168 |
+
|
169 |
+
def preview_clean_text(text, max_chars=500):
|
170 |
+
"""
|
171 |
+
Returns a preview slice of the cleaned text for display.
|
172 |
+
|
173 |
+
Parameters:
|
174 |
+
text (str): The text to preview.
|
175 |
+
max_chars (int): Maximum number of characters in the preview.
|
176 |
+
|
177 |
+
Returns:
|
178 |
+
str: The truncated text plus ellipsis if it's longer than max_chars.
|
179 |
+
"""
|
180 |
+
if len(text) > max_chars:
|
181 |
+
return text[:max_chars] + "..."
|
182 |
+
return text
|
183 |
+
|
184 |
+
# End of smart_web_analyzer.py
|
space.yml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
title: Smart Web Analyzer Plus
|
2 |
+
sdk: gradio
|
3 |
+
python_version: 3.10
|