Spaces:
Running
Running
Commit
·
e78d617
0
Parent(s):
Initial Commit
Browse files- README.md +10 -0
- app.py +586 -0
- model/analyzer.py +245 -0
- requirements.txt +11 -0
- script_search_api.py +279 -0
README.md
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: TREAT-EXAONE
|
3 |
+
emoji: 🥤
|
4 |
+
colorFrom: amber
|
5 |
+
colorTo: pink
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: "5.11.0"
|
8 |
+
app_file: app.py
|
9 |
+
pinned: true
|
10 |
+
---
|
app.py
ADDED
@@ -0,0 +1,586 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from model.analyzer import analyze_content
|
3 |
+
import asyncio
|
4 |
+
import time
|
5 |
+
import httpx
|
6 |
+
import subprocess
|
7 |
+
import atexit
|
8 |
+
|
9 |
+
# Start the API server
|
10 |
+
def start_api_server():
|
11 |
+
# Start uvicorn in a subprocess
|
12 |
+
process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
|
13 |
+
return process
|
14 |
+
|
15 |
+
# Stop the API server
|
16 |
+
def stop_api_server(process):
|
17 |
+
process.terminate()
|
18 |
+
|
19 |
+
# Register the exit handler
|
20 |
+
api_process = start_api_server()
|
21 |
+
atexit.register(stop_api_server, api_process)
|
22 |
+
|
23 |
+
|
24 |
+
custom_css = """
|
25 |
+
* {
|
26 |
+
font-family: 'Inter', system-ui, sans-serif;
|
27 |
+
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
|
28 |
+
}
|
29 |
+
|
30 |
+
.gradio-container {
|
31 |
+
background: #0a0a0f !important;
|
32 |
+
color: #fff !important;
|
33 |
+
min-height: 100vh;
|
34 |
+
position: relative;
|
35 |
+
overflow: hidden;
|
36 |
+
}
|
37 |
+
|
38 |
+
/* Animated Background */
|
39 |
+
.gradio-container::before {
|
40 |
+
content: '';
|
41 |
+
position: fixed;
|
42 |
+
top: 0;
|
43 |
+
left: 0;
|
44 |
+
right: 0;
|
45 |
+
bottom: 0;
|
46 |
+
background:
|
47 |
+
linear-gradient(125deg,
|
48 |
+
#0a0a0f 0%,
|
49 |
+
rgba(99, 102, 241, 0.05) 30%,
|
50 |
+
rgba(99, 102, 241, 0.1) 50%,
|
51 |
+
rgba(99, 102, 241, 0.05) 70%,
|
52 |
+
#0a0a0f 100%);
|
53 |
+
animation: gradientMove 15s ease infinite;
|
54 |
+
background-size: 400% 400%;
|
55 |
+
z-index: 0;
|
56 |
+
}
|
57 |
+
|
58 |
+
/* Floating Particles */
|
59 |
+
.gradio-container::after {
|
60 |
+
content: '';
|
61 |
+
position: fixed;
|
62 |
+
top: 0;
|
63 |
+
left: 0;
|
64 |
+
width: 100%;
|
65 |
+
height: 100%;
|
66 |
+
background: radial-gradient(circle at center, transparent 0%, #0a0a0f 70%),
|
67 |
+
url("data:image/svg+xml,%3Csvg width='100' height='100' viewBox='0 0 100 100' xmlns='http://www.w3.org/2000/svg'%3E%3Ccircle cx='50' cy='50' r='1' fill='rgba(99, 102, 241, 0.15)'/%3E%3C/svg%3E");
|
68 |
+
opacity: 0.5;
|
69 |
+
animation: floatingParticles 20s linear infinite;
|
70 |
+
z-index: 1;
|
71 |
+
}
|
72 |
+
|
73 |
+
/* Futuristic Header */
|
74 |
+
.treat-title {
|
75 |
+
text-align: center;
|
76 |
+
padding: 3rem 1rem;
|
77 |
+
position: relative;
|
78 |
+
overflow: hidden;
|
79 |
+
z-index: 2;
|
80 |
+
background: linear-gradient(180deg,
|
81 |
+
rgba(99, 102, 241, 0.1),
|
82 |
+
transparent 70%);
|
83 |
+
}
|
84 |
+
|
85 |
+
.treat-title::before {
|
86 |
+
content: '';
|
87 |
+
position: absolute;
|
88 |
+
top: 0;
|
89 |
+
left: 50%;
|
90 |
+
width: 80%;
|
91 |
+
height: 1px;
|
92 |
+
background: linear-gradient(90deg,
|
93 |
+
transparent,
|
94 |
+
rgba(99, 102, 241, 0.5),
|
95 |
+
transparent);
|
96 |
+
transform: translateX(-50%);
|
97 |
+
animation: scanline 3s ease-in-out infinite;
|
98 |
+
}
|
99 |
+
|
100 |
+
.treat-title h1 {
|
101 |
+
font-size: 4.5rem;
|
102 |
+
font-weight: 800;
|
103 |
+
background: linear-gradient(135deg,
|
104 |
+
#2a2b55 0%,
|
105 |
+
#6366f1 50%,
|
106 |
+
#2a2b55 100%);
|
107 |
+
background-size: 200% auto;
|
108 |
+
-webkit-background-clip: text;
|
109 |
+
-webkit-text-fill-color: transparent;
|
110 |
+
margin-bottom: 0.5rem;
|
111 |
+
letter-spacing: -0.05em;
|
112 |
+
animation: gradientFlow 8s ease infinite;
|
113 |
+
position: relative;
|
114 |
+
}
|
115 |
+
|
116 |
+
.treat-title h1::after {
|
117 |
+
content: attr(data-text);
|
118 |
+
position: absolute;
|
119 |
+
left: 0;
|
120 |
+
top: 0;
|
121 |
+
width: 100%;
|
122 |
+
height: 100%;
|
123 |
+
background: linear-gradient(135deg,
|
124 |
+
transparent 0%,
|
125 |
+
rgba(99, 102, 241, 0.4) 50%,
|
126 |
+
transparent 100%);
|
127 |
+
background-size: 200% auto;
|
128 |
+
-webkit-background-clip: text;
|
129 |
+
-webkit-text-fill-color: transparent;
|
130 |
+
opacity: 0.5;
|
131 |
+
animation: textGlow 4s ease-in-out infinite;
|
132 |
+
}
|
133 |
+
|
134 |
+
.treat-title p {
|
135 |
+
font-size: 1.1rem;
|
136 |
+
color: rgba(255, 255, 255, 0.7);
|
137 |
+
max-width: 600px;
|
138 |
+
margin: 0 auto;
|
139 |
+
position: relative;
|
140 |
+
animation: fadeInUp 1s ease-out;
|
141 |
+
}
|
142 |
+
|
143 |
+
/* Tabs Styling */
|
144 |
+
.tabs {
|
145 |
+
background: rgba(17, 17, 27, 0.7);
|
146 |
+
border: 1px solid rgba(99, 102, 241, 0.2);
|
147 |
+
border-radius: 16px;
|
148 |
+
padding: 1rem;
|
149 |
+
margin: 0 1rem 2rem 1rem;
|
150 |
+
position: relative;
|
151 |
+
z-index: 2;
|
152 |
+
backdrop-filter: blur(10px);
|
153 |
+
box-shadow: 0 0 30px rgba(99, 102, 241, 0.1);
|
154 |
+
animation: floatIn 1s ease-out;
|
155 |
+
}
|
156 |
+
|
157 |
+
.tabs::before {
|
158 |
+
content: '';
|
159 |
+
position: absolute;
|
160 |
+
top: -1px;
|
161 |
+
left: -1px;
|
162 |
+
right: -1px;
|
163 |
+
bottom: -1px;
|
164 |
+
background: linear-gradient(45deg,
|
165 |
+
rgba(99, 102, 241, 0.1),
|
166 |
+
transparent,
|
167 |
+
rgba(99, 102, 241, 0.1));
|
168 |
+
border-radius: 16px;
|
169 |
+
z-index: -1;
|
170 |
+
animation: borderGlow 4s ease-in-out infinite;
|
171 |
+
}
|
172 |
+
|
173 |
+
/* Content Area */
|
174 |
+
.content-area {
|
175 |
+
background: rgba(17, 17, 27, 0.7) !important;
|
176 |
+
border: 1px solid rgba(99, 102, 241, 0.2) !important;
|
177 |
+
border-radius: 12px !important;
|
178 |
+
padding: 1.5rem !important;
|
179 |
+
backdrop-filter: blur(10px);
|
180 |
+
position: relative;
|
181 |
+
overflow: hidden;
|
182 |
+
animation: fadeScale 0.5s ease-out;
|
183 |
+
}
|
184 |
+
|
185 |
+
.content-area::before {
|
186 |
+
content: '';
|
187 |
+
position: absolute;
|
188 |
+
top: -50%;
|
189 |
+
left: -50%;
|
190 |
+
width: 200%;
|
191 |
+
height: 200%;
|
192 |
+
background: radial-gradient(circle at center,
|
193 |
+
rgba(99, 102, 241, 0.1) 0%,
|
194 |
+
transparent 70%);
|
195 |
+
animation: rotateGradient 10s linear infinite;
|
196 |
+
}
|
197 |
+
|
198 |
+
/* Input Fields */
|
199 |
+
.gradio-textbox textarea {
|
200 |
+
background: rgba(17, 17, 27, 0.6) !important;
|
201 |
+
border: 1px solid rgba(99, 102, 241, 0.3) !important;
|
202 |
+
border-radius: 8px !important;
|
203 |
+
color: rgba(255, 255, 255, 0.9) !important;
|
204 |
+
font-size: 0.95rem !important;
|
205 |
+
line-height: 1.6 !important;
|
206 |
+
padding: 1rem !important;
|
207 |
+
transition: all 0.3s ease;
|
208 |
+
position: relative;
|
209 |
+
z-index: 2;
|
210 |
+
}
|
211 |
+
|
212 |
+
.gradio-textbox textarea:focus {
|
213 |
+
border-color: #6366f1 !important;
|
214 |
+
box-shadow: 0 0 20px rgba(99, 102, 241, 0.2) !important;
|
215 |
+
background: rgba(17, 17, 27, 0.8) !important;
|
216 |
+
transform: translateY(-2px);
|
217 |
+
}
|
218 |
+
|
219 |
+
/* Buttons */
|
220 |
+
.gradio-button {
|
221 |
+
background: linear-gradient(45deg,
|
222 |
+
#6366f1,
|
223 |
+
#818cf8,
|
224 |
+
#6366f1) !important;
|
225 |
+
background-size: 200% auto !important;
|
226 |
+
border: none !important;
|
227 |
+
border-radius: 8px !important;
|
228 |
+
color: white !important;
|
229 |
+
font-weight: 600 !important;
|
230 |
+
font-size: 0.95rem !important;
|
231 |
+
padding: 0.75rem 1.5rem !important;
|
232 |
+
letter-spacing: 0.025em !important;
|
233 |
+
position: relative;
|
234 |
+
overflow: hidden;
|
235 |
+
transition: all 0.3s ease !important;
|
236 |
+
animation: gradientFlow 3s ease infinite;
|
237 |
+
}
|
238 |
+
|
239 |
+
.gradio-button::before {
|
240 |
+
content: '';
|
241 |
+
position: absolute;
|
242 |
+
top: -50%;
|
243 |
+
left: -50%;
|
244 |
+
width: 200%;
|
245 |
+
height: 200%;
|
246 |
+
background: radial-gradient(circle at center,
|
247 |
+
rgba(255, 255, 255, 0.2) 0%,
|
248 |
+
transparent 70%);
|
249 |
+
transform: scale(0);
|
250 |
+
transition: transform 0.5s ease;
|
251 |
+
}
|
252 |
+
|
253 |
+
.gradio-button:hover {
|
254 |
+
transform: translateY(-2px);
|
255 |
+
box-shadow: 0 5px 20px rgba(99, 102, 241, 0.4) !important;
|
256 |
+
}
|
257 |
+
|
258 |
+
.gradio-button:hover::before {
|
259 |
+
transform: scale(1);
|
260 |
+
}
|
261 |
+
|
262 |
+
/* Results Area */
|
263 |
+
.results-area {
|
264 |
+
background: rgba(17, 17, 27, 0.7) !important;
|
265 |
+
border: 1px solid rgba(99, 102, 241, 0.2) !important;
|
266 |
+
border-radius: 12px !important;
|
267 |
+
margin-top: 2rem !important;
|
268 |
+
backdrop-filter: blur(10px);
|
269 |
+
animation: slideUp 0.5s ease-out;
|
270 |
+
position: relative;
|
271 |
+
overflow: hidden;
|
272 |
+
}
|
273 |
+
|
274 |
+
.footer {
|
275 |
+
text-align: center;
|
276 |
+
padding: 2rem 0;
|
277 |
+
margin-top: 3rem;
|
278 |
+
font-size: 1.0rem;
|
279 |
+
position: relative;
|
280 |
+
z-index: 2;
|
281 |
+
}
|
282 |
+
|
283 |
+
.footer p {
|
284 |
+
color: rgba(255, 255, 255, 0.8);
|
285 |
+
display: flex;
|
286 |
+
align-items: center;
|
287 |
+
justify-content: center;
|
288 |
+
gap: 0.5rem;
|
289 |
+
}
|
290 |
+
|
291 |
+
.footer .heart {
|
292 |
+
color: #6366f1;
|
293 |
+
display: inline-block;
|
294 |
+
position: relative;
|
295 |
+
font-size: 1.0rem;
|
296 |
+
transform-origin: center;
|
297 |
+
animation: heartbeat 1.5s ease infinite;
|
298 |
+
}
|
299 |
+
|
300 |
+
.footer .heart::before,
|
301 |
+
.footer .heart::after {
|
302 |
+
content: '✦';
|
303 |
+
position: absolute;
|
304 |
+
opacity: 0;
|
305 |
+
font-size: 0.6rem;
|
306 |
+
animation: sparkle 1.5s ease infinite;
|
307 |
+
}
|
308 |
+
|
309 |
+
.footer .heart::before {
|
310 |
+
top: -8px;
|
311 |
+
left: -8px;
|
312 |
+
animation-delay: 0.2s;
|
313 |
+
}
|
314 |
+
|
315 |
+
.footer .heart::after {
|
316 |
+
top: -8px;
|
317 |
+
right: -8px;
|
318 |
+
animation-delay: 0.4s;
|
319 |
+
}
|
320 |
+
|
321 |
+
.footer .name {
|
322 |
+
color: #6366f1;
|
323 |
+
text-decoration: none;
|
324 |
+
position: relative;
|
325 |
+
transition: all 0.3s ease;
|
326 |
+
padding: 0 4px;
|
327 |
+
}
|
328 |
+
|
329 |
+
.footer .name:hover {
|
330 |
+
color: #818cf8;
|
331 |
+
}
|
332 |
+
|
333 |
+
footer {
|
334 |
+
visibility: hidden;
|
335 |
+
}
|
336 |
+
|
337 |
+
/* Animations */
|
338 |
+
@keyframes gradientMove {
|
339 |
+
0% { background-position: 0% 50%; }
|
340 |
+
50% { background-position: 100% 50%; }
|
341 |
+
100% { background-position: 0% 50%; }
|
342 |
+
}
|
343 |
+
|
344 |
+
@keyframes floatingParticles {
|
345 |
+
0% { transform: translateY(0); }
|
346 |
+
100% { transform: translateY(-100%); }
|
347 |
+
}
|
348 |
+
|
349 |
+
@keyframes scanline {
|
350 |
+
0% { transform: translateX(-150%) scaleX(0.5); opacity: 0; }
|
351 |
+
50% { transform: translateX(-50%) scaleX(1); opacity: 1; }
|
352 |
+
100% { transform: translateX(50%) scaleX(0.5); opacity: 0; }
|
353 |
+
}
|
354 |
+
|
355 |
+
@keyframes gradientFlow {
|
356 |
+
0% { background-position: 0% 50%; }
|
357 |
+
50% { background-position: 100% 50%; }
|
358 |
+
100% { background-position: 0% 50%; }
|
359 |
+
}
|
360 |
+
|
361 |
+
@keyframes textGlow {
|
362 |
+
0% { opacity: 0.3; transform: scale(1); }
|
363 |
+
50% { opacity: 0.5; transform: scale(1.02); }
|
364 |
+
100% { opacity: 0.3; transform: scale(1); }
|
365 |
+
}
|
366 |
+
|
367 |
+
@keyframes borderGlow {
|
368 |
+
0% { opacity: 0.5; }
|
369 |
+
50% { opacity: 1; }
|
370 |
+
100% { opacity: 0.5; }
|
371 |
+
}
|
372 |
+
|
373 |
+
@keyframes rotateGradient {
|
374 |
+
0% { transform: rotate(0deg); }
|
375 |
+
100% { transform: rotate(360deg); }
|
376 |
+
}
|
377 |
+
|
378 |
+
@keyframes fadeScale {
|
379 |
+
0% { opacity: 0; transform: scale(0.95); }
|
380 |
+
100% { opacity: 1; transform: scale(1); }
|
381 |
+
}
|
382 |
+
|
383 |
+
@keyframes slideUp {
|
384 |
+
0% { opacity: 0; transform: translateY(20px); }
|
385 |
+
100% { opacity: 1; transform: translateY(0); }
|
386 |
+
}
|
387 |
+
|
388 |
+
@keyframes floatIn {
|
389 |
+
0% { opacity: 0; transform: translateY(20px); }
|
390 |
+
100% { opacity: 1; transform: translateY(0); }
|
391 |
+
}
|
392 |
+
|
393 |
+
@keyframes fadeInUp {
|
394 |
+
0% { opacity: 0; transform: translateY(10px); }
|
395 |
+
100% { opacity: 1; transform: translateY(0); }
|
396 |
+
}
|
397 |
+
|
398 |
+
@keyframes heartbeat {
|
399 |
+
0% { transform: scale(1); }
|
400 |
+
10% { transform: scale(1.2); }
|
401 |
+
20% { transform: scale(0.9); }
|
402 |
+
30% { transform: scale(1.1); }
|
403 |
+
40% { transform: scale(0.95); }
|
404 |
+
50% { transform: scale(1); }
|
405 |
+
100% { transform: scale(1); }
|
406 |
+
}
|
407 |
+
|
408 |
+
@keyframes sparkle {
|
409 |
+
0% { transform: scale(0); opacity: 0; }
|
410 |
+
50% { transform: scale(1.2); opacity: 1; }
|
411 |
+
100% { transform: scale(0); opacity: 0; }
|
412 |
+
}
|
413 |
+
"""
|
414 |
+
# Start the API server
|
415 |
+
def start_api_server():
|
416 |
+
# Start uvicorn in a subprocess
|
417 |
+
process = subprocess.Popen(["uvicorn", "script_search_api:app", "--reload"])
|
418 |
+
return process
|
419 |
+
|
420 |
+
# Stop the API server
|
421 |
+
def stop_api_server(process):
|
422 |
+
process.terminate()
|
423 |
+
|
424 |
+
# Register the exit handler
|
425 |
+
api_process = start_api_server()
|
426 |
+
atexit.register(stop_api_server, api_process)
|
427 |
+
|
428 |
+
async def analyze_with_progress(movie_name, progress=gr.Progress()):
|
429 |
+
"""Handle analysis with progress updates in Gradio"""
|
430 |
+
try:
|
431 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
432 |
+
# Start the analysis
|
433 |
+
response = await client.get(
|
434 |
+
"http://localhost:8000/api/start_analysis",
|
435 |
+
params={"movie_name": movie_name}
|
436 |
+
)
|
437 |
+
response.raise_for_status()
|
438 |
+
task_id = response.json()["task_id"]
|
439 |
+
|
440 |
+
# Poll for progress
|
441 |
+
while True:
|
442 |
+
progress_response = await client.get(
|
443 |
+
f"http://localhost:8000/api/progress/{task_id}"
|
444 |
+
)
|
445 |
+
progress_response.raise_for_status()
|
446 |
+
status = progress_response.json()
|
447 |
+
|
448 |
+
# Update Gradio progress
|
449 |
+
progress(status["progress"], desc=status["status"])
|
450 |
+
|
451 |
+
if status["is_complete"]:
|
452 |
+
if status["error"]:
|
453 |
+
return f"Error: {status['error']}"
|
454 |
+
elif status["result"]:
|
455 |
+
triggers = status["result"].get("detected_triggers", [])
|
456 |
+
if not triggers or triggers == ["None"]:
|
457 |
+
return "✓ No triggers detected in the content."
|
458 |
+
else:
|
459 |
+
trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
|
460 |
+
return f"⚠ Triggers Detected:\n{trigger_list}"
|
461 |
+
break
|
462 |
+
|
463 |
+
await asyncio.sleep(0.5)
|
464 |
+
|
465 |
+
except Exception as e:
|
466 |
+
return f"Error: {str(e)}"
|
467 |
+
|
468 |
+
def analyze_with_loading(text, progress=gr.Progress()):
|
469 |
+
"""
|
470 |
+
Synchronous wrapper for the async analyze_content function with smooth progress updates
|
471 |
+
"""
|
472 |
+
# Initialize progress
|
473 |
+
progress(0, desc="Starting analysis...")
|
474 |
+
|
475 |
+
# Initial setup phase - smoother progression
|
476 |
+
for i in range(25):
|
477 |
+
time.sleep(0.04) # Slightly longer sleep for smoother animation
|
478 |
+
progress((i + 1) / 100, desc="Initializing analysis...")
|
479 |
+
|
480 |
+
# Pre-processing phase
|
481 |
+
for i in range(25, 45):
|
482 |
+
time.sleep(0.03)
|
483 |
+
progress((i + 1) / 100, desc="Pre-processing content...")
|
484 |
+
|
485 |
+
# Perform analysis
|
486 |
+
progress(0.45, desc="Analyzing content...")
|
487 |
+
try:
|
488 |
+
result = asyncio.run(analyze_content(text))
|
489 |
+
|
490 |
+
# Analysis progress simulation
|
491 |
+
for i in range(45, 75):
|
492 |
+
time.sleep(0.03)
|
493 |
+
progress((i + 1) / 100, desc="Processing results...")
|
494 |
+
|
495 |
+
except Exception as e:
|
496 |
+
return f"Error during analysis: {str(e)}"
|
497 |
+
|
498 |
+
# Final processing with smooth progression
|
499 |
+
for i in range(75, 100):
|
500 |
+
time.sleep(0.02)
|
501 |
+
progress((i + 1) / 100, desc="Finalizing results...")
|
502 |
+
|
503 |
+
# Format the results
|
504 |
+
triggers = result["detected_triggers"]
|
505 |
+
if triggers == ["None"]:
|
506 |
+
return "✓ No triggers detected in the content."
|
507 |
+
else:
|
508 |
+
trigger_list = "\n".join([f"• {trigger}" for trigger in triggers])
|
509 |
+
return f"⚠ Triggers Detected:\n{trigger_list}"
|
510 |
+
|
511 |
+
# Update the Gradio interface with new styling
|
512 |
+
import gradio as gr
|
513 |
+
from model.analyzer import analyze_content
|
514 |
+
import asyncio
|
515 |
+
import time
|
516 |
+
import httpx
|
517 |
+
import subprocess
|
518 |
+
import atexit
|
519 |
+
|
520 |
+
# Keep your existing CSS and server setup code...
|
521 |
+
# [Previous code until the interface definition remains the same]
|
522 |
+
|
523 |
+
# Update the Gradio interface with fixed button handling
|
524 |
+
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as iface:
|
525 |
+
# Title section
|
526 |
+
gr.HTML("""
|
527 |
+
<div class="treat-title">
|
528 |
+
<h1 data-text="TREAT">TREAT</h1>
|
529 |
+
<p>Trigger Recognition for Enjoyable and Appropriate Television</p>
|
530 |
+
</div>
|
531 |
+
""")
|
532 |
+
|
533 |
+
with gr.Tabs() as tabs:
|
534 |
+
with gr.Tab("Content Analysis"): # Changed from TabItem to Tab
|
535 |
+
with gr.Column():
|
536 |
+
input_text = gr.Textbox(
|
537 |
+
label="ANALYZE CONTENT",
|
538 |
+
placeholder="Enter the content you want to analyze...",
|
539 |
+
lines=8
|
540 |
+
)
|
541 |
+
analyze_btn = gr.Button("✨ Analyze")
|
542 |
+
|
543 |
+
with gr.Tab("Movie Search"): # Changed from TabItem to Tab
|
544 |
+
with gr.Column():
|
545 |
+
search_query = gr.Textbox(
|
546 |
+
label="SEARCH MOVIES",
|
547 |
+
placeholder="Type a movie title to search...",
|
548 |
+
lines=1
|
549 |
+
)
|
550 |
+
search_button = gr.Button("🔍 Search")
|
551 |
+
|
552 |
+
output_text = gr.Textbox(
|
553 |
+
label="ANALYSIS RESULTS",
|
554 |
+
lines=5,
|
555 |
+
interactive=False
|
556 |
+
)
|
557 |
+
|
558 |
+
status_text = gr.Markdown(
|
559 |
+
value=""
|
560 |
+
)
|
561 |
+
|
562 |
+
# Define click events
|
563 |
+
analyze_btn.click(
|
564 |
+
fn=analyze_with_loading,
|
565 |
+
inputs=input_text,
|
566 |
+
outputs=output_text
|
567 |
+
)
|
568 |
+
|
569 |
+
search_button.click(
|
570 |
+
fn=analyze_with_progress,
|
571 |
+
inputs=search_query,
|
572 |
+
outputs=output_text
|
573 |
+
)
|
574 |
+
|
575 |
+
gr.HTML("""
|
576 |
+
<div class="footer">
|
577 |
+
<p>Made with <span class="heart">💖</span> by <a href="https://www.linkedin.com/in/kubermehta/" target="_blank">Kuber Mehta</a></p>
|
578 |
+
</div>
|
579 |
+
""")
|
580 |
+
|
581 |
+
if __name__ == "__main__":
|
582 |
+
iface.launch(
|
583 |
+
share=False,
|
584 |
+
debug=True,
|
585 |
+
show_error=True
|
586 |
+
)
|
model/analyzer.py
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
+
import torch
|
4 |
+
from datetime import datetime
|
5 |
+
import gradio as gr
|
6 |
+
from typing import Dict, List, Union, Optional
|
7 |
+
import logging
|
8 |
+
import traceback
|
9 |
+
|
10 |
+
# Configure logging
|
11 |
+
logging.basicConfig(level=logging.INFO)
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
class ContentAnalyzer:
|
15 |
+
def __init__(self):
|
16 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
+
self.model = None
|
18 |
+
self.tokenizer = None
|
19 |
+
self.batch_size = 2 # Reduced batch size for deeper thinking
|
20 |
+
self.max_thinking_time = 30 # Maximum seconds per batch for reasoning
|
21 |
+
self.trigger_categories = {
|
22 |
+
"Violence": {
|
23 |
+
"mapped_name": "Violence",
|
24 |
+
"description": "Physical force, aggression, or actions causing harm to living beings or property."
|
25 |
+
},
|
26 |
+
"Death": {
|
27 |
+
"mapped_name": "Death References",
|
28 |
+
"description": "Direct or implied loss of life, mortality discussions, or death-related events."
|
29 |
+
},
|
30 |
+
"Substance_Use": {
|
31 |
+
"mapped_name": "Substance Use",
|
32 |
+
"description": "Usage or discussion of drugs, alcohol, or addictive substances."
|
33 |
+
},
|
34 |
+
"Gore": {
|
35 |
+
"mapped_name": "Gore",
|
36 |
+
"description": "Graphic depictions of injuries, blood, or severe bodily harm."
|
37 |
+
},
|
38 |
+
"Sexual_Content": {
|
39 |
+
"mapped_name": "Sexual Content",
|
40 |
+
"description": "Sexual activity, intimacy, or explicit sexual references."
|
41 |
+
},
|
42 |
+
"Sexual_Abuse": {
|
43 |
+
"mapped_name": "Sexual Abuse",
|
44 |
+
"description": "Non-consensual sexual acts, exploitation, or sexual violence."
|
45 |
+
},
|
46 |
+
"Self_Harm": {
|
47 |
+
"mapped_name": "Self-Harm",
|
48 |
+
"description": "Self-inflicted injury, suicidal thoughts, or destructive behaviors."
|
49 |
+
},
|
50 |
+
"Mental_Health": {
|
51 |
+
"mapped_name": "Mental Health Issues",
|
52 |
+
"description": "Psychological distress, mental disorders, or emotional trauma."
|
53 |
+
}
|
54 |
+
}
|
55 |
+
logger.info(f"Initialized analyzer with device: {self.device}")
|
56 |
+
|
57 |
+
async def load_model(self, progress=None) -> None:
|
58 |
+
"""Load the model and tokenizer with progress updates."""
|
59 |
+
try:
|
60 |
+
if progress:
|
61 |
+
progress(0.1, "Loading tokenizer...")
|
62 |
+
|
63 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
64 |
+
"LGAI-EXAONE/EXAONE-Deep-2.4B",
|
65 |
+
use_fast=True
|
66 |
+
)
|
67 |
+
|
68 |
+
if progress:
|
69 |
+
progress(0.3, "Loading model...")
|
70 |
+
|
71 |
+
self.model = AutoModelForSeq2SeqLM.from_pretrained(
|
72 |
+
"LGAI-EXAONE/EXAONE-Deep-2.4B",
|
73 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
74 |
+
device_map="auto"
|
75 |
+
)
|
76 |
+
|
77 |
+
if self.device == "cuda":
|
78 |
+
self.model.eval()
|
79 |
+
torch.cuda.empty_cache()
|
80 |
+
|
81 |
+
if progress:
|
82 |
+
progress(0.5, "Model loaded successfully")
|
83 |
+
|
84 |
+
except Exception as e:
|
85 |
+
logger.error(f"Error loading model: {str(e)}")
|
86 |
+
raise
|
87 |
+
|
88 |
+
def _chunk_text(self, text: str, chunk_size: int = 20000, overlap: int = 100) -> List[str]:
|
89 |
+
"""Split text into overlapping chunks."""
|
90 |
+
words = text.split()
|
91 |
+
chunks = []
|
92 |
+
for i in range(0, len(words), chunk_size - overlap):
|
93 |
+
chunk = ' '.join(words[i:i + chunk_size])
|
94 |
+
chunks.append(chunk)
|
95 |
+
return chunks
|
96 |
+
|
97 |
+
def _validate_response(self, response: str) -> str:
|
98 |
+
"""Validate and clean model response."""
|
99 |
+
valid_responses = {"YES", "NO", "MAYBE"}
|
100 |
+
response = response.strip().upper()
|
101 |
+
first_word = response.split()[0] if response else "NO"
|
102 |
+
return first_word if first_word in valid_responses else "NO"
|
103 |
+
|
104 |
+
async def analyze_chunks_batch(
|
105 |
+
self,
|
106 |
+
chunks: List[str],
|
107 |
+
progress: Optional[gr.Progress] = None,
|
108 |
+
current_progress: float = 0,
|
109 |
+
progress_step: float = 0
|
110 |
+
) -> Dict[str, float]:
|
111 |
+
"""Analyze multiple chunks in batches."""
|
112 |
+
all_triggers = {}
|
113 |
+
|
114 |
+
for category, info in self.trigger_categories.items():
|
115 |
+
mapped_name = info["mapped_name"]
|
116 |
+
description = info["description"]
|
117 |
+
|
118 |
+
for i in range(0, len(chunks), self.batch_size):
|
119 |
+
batch_chunks = chunks[i:i + self.batch_size]
|
120 |
+
prompts = []
|
121 |
+
|
122 |
+
for chunk in batch_chunks:
|
123 |
+
prompt = f"Analyze text for {mapped_name}. Definition: {description}. Content: \"{chunk}\". Answer YES/NO/MAYBE based on clear evidence."
|
124 |
+
prompts.append(prompt)
|
125 |
+
|
126 |
+
try:
|
127 |
+
inputs = self.tokenizer(
|
128 |
+
prompts,
|
129 |
+
return_tensors="pt",
|
130 |
+
padding=True,
|
131 |
+
truncation=True,
|
132 |
+
max_length=512
|
133 |
+
).to(self.device)
|
134 |
+
|
135 |
+
import signal
|
136 |
+
def timeout_handler(signum, frame):
|
137 |
+
raise TimeoutError("Model thinking time exceeded")
|
138 |
+
|
139 |
+
signal.signal(signal.SIGALRM, timeout_handler)
|
140 |
+
signal.alarm(self.max_thinking_time)
|
141 |
+
|
142 |
+
with torch.no_grad():
|
143 |
+
outputs = self.model.generate(
|
144 |
+
**inputs,
|
145 |
+
max_new_tokens=20,
|
146 |
+
temperature=0.2,
|
147 |
+
top_p=0.85,
|
148 |
+
num_beams=3,
|
149 |
+
early_stopping=True,
|
150 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
151 |
+
do_sample=True
|
152 |
+
)
|
153 |
+
|
154 |
+
responses = [
|
155 |
+
self.tokenizer.decode(output, skip_special_tokens=True)
|
156 |
+
for output in outputs
|
157 |
+
]
|
158 |
+
|
159 |
+
for response in responses:
|
160 |
+
validated_response = self._validate_response(response)
|
161 |
+
if validated_response == "YES":
|
162 |
+
all_triggers[mapped_name] = all_triggers.get(mapped_name, 0) + 1
|
163 |
+
elif validated_response == "MAYBE":
|
164 |
+
all_triggers[mapped_name] = all_triggers.get(mapped_name, 0) + 0.5
|
165 |
+
|
166 |
+
except Exception as e:
|
167 |
+
logger.error(f"Error processing batch for {mapped_name}: {str(e)}")
|
168 |
+
continue
|
169 |
+
|
170 |
+
if progress:
|
171 |
+
current_progress += progress_step
|
172 |
+
progress(min(current_progress, 0.9), f"Analyzing {mapped_name}...")
|
173 |
+
|
174 |
+
return all_triggers
|
175 |
+
|
176 |
+
async def analyze_script(self, script: str, progress: Optional[gr.Progress] = None) -> List[str]:
|
177 |
+
"""Analyze the entire script."""
|
178 |
+
if not self.model or not self.tokenizer:
|
179 |
+
await self.load_model(progress)
|
180 |
+
|
181 |
+
chunks = self._chunk_text(script)
|
182 |
+
identified_triggers = await self.analyze_chunks_batch(
|
183 |
+
chunks,
|
184 |
+
progress,
|
185 |
+
current_progress=0.5,
|
186 |
+
progress_step=0.4 / (len(chunks) * len(self.trigger_categories))
|
187 |
+
)
|
188 |
+
|
189 |
+
if progress:
|
190 |
+
progress(0.95, "Finalizing results...")
|
191 |
+
|
192 |
+
final_triggers = []
|
193 |
+
chunk_threshold = max(1, len(chunks) * 0.1)
|
194 |
+
|
195 |
+
for mapped_name, count in identified_triggers.items():
|
196 |
+
if count >= chunk_threshold:
|
197 |
+
final_triggers.append(mapped_name)
|
198 |
+
|
199 |
+
return final_triggers if final_triggers else ["None"]
|
200 |
+
|
201 |
+
async def analyze_content(
|
202 |
+
script: str,
|
203 |
+
progress: Optional[gr.Progress] = None
|
204 |
+
) -> Dict[str, Union[List[str], str]]:
|
205 |
+
"""Main analysis function for the Gradio interface."""
|
206 |
+
logger.info("Starting content analysis")
|
207 |
+
|
208 |
+
analyzer = ContentAnalyzer()
|
209 |
+
|
210 |
+
try:
|
211 |
+
# Fix: Use the analyzer instance's method instead of undefined function
|
212 |
+
triggers = await analyzer.analyze_script(script, progress)
|
213 |
+
|
214 |
+
if progress:
|
215 |
+
progress(1.0, "Analysis complete!")
|
216 |
+
|
217 |
+
result = {
|
218 |
+
"detected_triggers": triggers,
|
219 |
+
"confidence": "High - Content detected" if triggers != ["None"] else "High - No concerning content detected",
|
220 |
+
"model": "google/large-t5-base",
|
221 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
222 |
+
}
|
223 |
+
|
224 |
+
logger.info(f"Analysis complete: {result}")
|
225 |
+
return result
|
226 |
+
|
227 |
+
except Exception as e:
|
228 |
+
logger.error(f"Analysis error: {str(e)}")
|
229 |
+
return {
|
230 |
+
"detected_triggers": ["Error occurred during analysis"],
|
231 |
+
"confidence": "Error",
|
232 |
+
"model": "google/flan-t5-base",
|
233 |
+
"analysis_timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
234 |
+
"error": str(e)
|
235 |
+
}
|
236 |
+
|
237 |
+
if __name__ == "__main__":
|
238 |
+
iface = gr.Interface(
|
239 |
+
fn=analyze_content,
|
240 |
+
inputs=gr.Textbox(lines=8, label="Input Text"),
|
241 |
+
outputs=gr.JSON(),
|
242 |
+
title="Content Trigger Analysis",
|
243 |
+
description="Analyze text content for sensitive topics and trigger warnings"
|
244 |
+
)
|
245 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask
|
2 |
+
flask_cors
|
3 |
+
torch
|
4 |
+
gradio
|
5 |
+
transformers
|
6 |
+
accelerate
|
7 |
+
safetensors
|
8 |
+
huggingface-hub
|
9 |
+
beautifulsoup4
|
10 |
+
protobuf
|
11 |
+
fastapi
|
script_search_api.py
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# script_search_api.py
|
2 |
+
from fastapi import FastAPI, HTTPException
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
import asyncio
|
5 |
+
from datetime import datetime, timedelta
|
6 |
+
from typing import Dict, Optional
|
7 |
+
from pydantic import BaseModel
|
8 |
+
from dataclasses import dataclass
|
9 |
+
import logging
|
10 |
+
import requests
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
+
from difflib import get_close_matches
|
13 |
+
from model.analyzer import analyze_content
|
14 |
+
|
15 |
+
logging.basicConfig(level=logging.INFO)
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
app = FastAPI()
|
19 |
+
|
20 |
+
app.add_middleware(
|
21 |
+
CORSMiddleware,
|
22 |
+
allow_origins=["*"],
|
23 |
+
allow_credentials=True,
|
24 |
+
allow_methods=["*"],
|
25 |
+
allow_headers=["*"],
|
26 |
+
)
|
27 |
+
|
28 |
+
@dataclass
|
29 |
+
class ProgressState:
|
30 |
+
progress: float
|
31 |
+
status: str
|
32 |
+
timestamp: datetime
|
33 |
+
task_id: str
|
34 |
+
is_complete: bool = False
|
35 |
+
result: Optional[dict] = None
|
36 |
+
error: Optional[str] = None
|
37 |
+
|
38 |
+
class ProgressResponse(BaseModel):
|
39 |
+
progress: float
|
40 |
+
status: str
|
41 |
+
is_complete: bool
|
42 |
+
result: Optional[dict] = None
|
43 |
+
error: Optional[str] = None
|
44 |
+
|
45 |
+
# Global progress tracker
|
46 |
+
progress_tracker: Dict[str, ProgressState] = {}
|
47 |
+
|
48 |
+
BASE_URL = "https://imsdb.com"
|
49 |
+
ALL_SCRIPTS_URL = f"{BASE_URL}/all-scripts.html"
|
50 |
+
|
51 |
+
def create_task_id(movie_name: str) -> str:
|
52 |
+
"""Create a unique task ID for a movie analysis request"""
|
53 |
+
return f"{movie_name}-{datetime.now().timestamp()}"
|
54 |
+
|
55 |
+
async def cleanup_old_tasks():
|
56 |
+
"""Remove tasks older than 1 hour"""
|
57 |
+
while True:
|
58 |
+
current_time = datetime.now()
|
59 |
+
expired_tasks = [
|
60 |
+
task_id for task_id, state in progress_tracker.items()
|
61 |
+
if current_time - state.timestamp > timedelta(hours=1)
|
62 |
+
]
|
63 |
+
for task_id in expired_tasks:
|
64 |
+
del progress_tracker[task_id]
|
65 |
+
await asyncio.sleep(300) # Cleanup every 5 minutes
|
66 |
+
|
67 |
+
@app.on_event("startup")
|
68 |
+
async def startup_event():
|
69 |
+
"""Initialize the server and start cleanup task"""
|
70 |
+
progress_tracker.clear()
|
71 |
+
asyncio.create_task(cleanup_old_tasks())
|
72 |
+
logger.info("Server started, progress tracker initialized")
|
73 |
+
|
74 |
+
def update_progress(task_id: str, progress: float, status: str, result: Optional[dict] = None, error: Optional[str] = None):
|
75 |
+
"""Update progress state for a task"""
|
76 |
+
is_complete = progress >= 1.0
|
77 |
+
progress_tracker[task_id] = ProgressState(
|
78 |
+
progress=progress,
|
79 |
+
status=status,
|
80 |
+
timestamp=datetime.now(),
|
81 |
+
task_id=task_id,
|
82 |
+
is_complete=is_complete,
|
83 |
+
result=result,
|
84 |
+
error=error
|
85 |
+
)
|
86 |
+
logger.info(f"Task {task_id}: {status} (Progress: {progress * 100:.0f}%)")
|
87 |
+
|
88 |
+
@app.get("/api/start_analysis")
|
89 |
+
async def start_analysis(movie_name: str):
|
90 |
+
"""Start a new analysis task"""
|
91 |
+
task_id = create_task_id(movie_name)
|
92 |
+
update_progress(task_id, 0.0, "Starting analysis...")
|
93 |
+
|
94 |
+
# Start the analysis task in the background
|
95 |
+
asyncio.create_task(run_analysis(task_id, movie_name))
|
96 |
+
|
97 |
+
return {"task_id": task_id}
|
98 |
+
|
99 |
+
@app.get("/api/progress/{task_id}")
|
100 |
+
async def get_progress(task_id: str) -> ProgressResponse:
|
101 |
+
"""Get current progress for a task"""
|
102 |
+
if task_id not in progress_tracker:
|
103 |
+
raise HTTPException(status_code=404, detail="Task not found")
|
104 |
+
|
105 |
+
state = progress_tracker[task_id]
|
106 |
+
return ProgressResponse(
|
107 |
+
progress=state.progress,
|
108 |
+
status=state.status,
|
109 |
+
is_complete=state.is_complete,
|
110 |
+
result=state.result,
|
111 |
+
error=state.error
|
112 |
+
)
|
113 |
+
|
114 |
+
def find_movie_link(movie_name: str, soup: BeautifulSoup) -> str | None:
|
115 |
+
"""Find the closest matching movie link from the script database."""
|
116 |
+
movie_links = {link.text.strip().lower(): link['href'] for link in soup.find_all('a', href=True)}
|
117 |
+
close_matches = get_close_matches(movie_name.lower(), movie_links.keys(), n=1, cutoff=0.6)
|
118 |
+
|
119 |
+
if close_matches:
|
120 |
+
logger.info(f"Close match found: {close_matches[0]}")
|
121 |
+
return BASE_URL + movie_links[close_matches[0]]
|
122 |
+
|
123 |
+
logger.info("No close match found.")
|
124 |
+
return None
|
125 |
+
|
126 |
+
def find_script_link(soup: BeautifulSoup, movie_name: str) -> str | None:
|
127 |
+
"""Find the script download link for a given movie."""
|
128 |
+
patterns = [
|
129 |
+
f'Read "{movie_name}" Script',
|
130 |
+
f'Read "{movie_name.title()}" Script',
|
131 |
+
f'Read "{movie_name.upper()}" Script',
|
132 |
+
f'Read "{movie_name.lower()}" Script'
|
133 |
+
]
|
134 |
+
|
135 |
+
for link in soup.find_all('a', href=True):
|
136 |
+
link_text = link.text.strip()
|
137 |
+
if any(pattern.lower() in link_text.lower() for pattern in patterns):
|
138 |
+
return link['href']
|
139 |
+
elif all(word.lower() in link_text.lower() for word in ["Read", "Script", movie_name]):
|
140 |
+
return link['href']
|
141 |
+
return None
|
142 |
+
|
143 |
+
def fetch_script(movie_name: str) -> str | None:
|
144 |
+
"""Fetch and extract the script content for a given movie."""
|
145 |
+
# Initial page load
|
146 |
+
update_progress(movie_name, 0.1, "Fetching the script database...")
|
147 |
+
try:
|
148 |
+
response = requests.get(ALL_SCRIPTS_URL)
|
149 |
+
response.raise_for_status()
|
150 |
+
except requests.RequestException as e:
|
151 |
+
logger.error(f"Failed to load the main page: {str(e)}")
|
152 |
+
return None
|
153 |
+
|
154 |
+
# Search for movie
|
155 |
+
update_progress(movie_name, 0.2, "Searching for the movie...")
|
156 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
157 |
+
movie_link = find_movie_link(movie_name, soup)
|
158 |
+
|
159 |
+
if not movie_link:
|
160 |
+
logger.error(f"Script for '{movie_name}' not found.")
|
161 |
+
return None
|
162 |
+
|
163 |
+
# Fetch movie page
|
164 |
+
update_progress(movie_name, 0.3, "Loading movie details...")
|
165 |
+
try:
|
166 |
+
response = requests.get(movie_link)
|
167 |
+
response.raise_for_status()
|
168 |
+
except requests.RequestException as e:
|
169 |
+
logger.error(f"Failed to load the movie page: {str(e)}")
|
170 |
+
return None
|
171 |
+
|
172 |
+
# Find script link
|
173 |
+
update_progress(movie_name, 0.4, "Locating script download...")
|
174 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
175 |
+
script_link = find_script_link(soup, movie_name)
|
176 |
+
|
177 |
+
if not script_link:
|
178 |
+
logger.error(f"Unable to find script link for '{movie_name}'.")
|
179 |
+
return None
|
180 |
+
|
181 |
+
# Fetch script content
|
182 |
+
script_page_url = BASE_URL + script_link
|
183 |
+
update_progress(movie_name, 0.5, "Downloading script content...")
|
184 |
+
|
185 |
+
try:
|
186 |
+
response = requests.get(script_page_url)
|
187 |
+
response.raise_for_status()
|
188 |
+
except requests.RequestException as e:
|
189 |
+
logger.error(f"Failed to load the script: {str(e)}")
|
190 |
+
return None
|
191 |
+
|
192 |
+
# Extract script text
|
193 |
+
update_progress(movie_name, 0.6, "Extracting script text...")
|
194 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
195 |
+
script_content = soup.find('pre')
|
196 |
+
|
197 |
+
if script_content:
|
198 |
+
update_progress(movie_name, 0.7, "Script extracted successfully")
|
199 |
+
return script_content.get_text()
|
200 |
+
else:
|
201 |
+
logger.error("Failed to extract script content.")
|
202 |
+
return None
|
203 |
+
|
204 |
+
async def run_analysis(task_id: str, movie_name: str):
|
205 |
+
"""Run the actual analysis task"""
|
206 |
+
try:
|
207 |
+
# Fetch script
|
208 |
+
update_progress(task_id, 0.2, "Fetching script...")
|
209 |
+
script_text = fetch_script(movie_name)
|
210 |
+
if not script_text:
|
211 |
+
raise Exception("Script not found")
|
212 |
+
|
213 |
+
# Analyze content
|
214 |
+
update_progress(task_id, 0.6, "Analyzing content...")
|
215 |
+
result = await analyze_content(script_text)
|
216 |
+
|
217 |
+
# Complete
|
218 |
+
update_progress(task_id, 1.0, "Analysis complete", result=result)
|
219 |
+
|
220 |
+
except Exception as e:
|
221 |
+
logger.error(f"Error in analysis: {str(e)}", exc_info=True)
|
222 |
+
update_progress(task_id, 1.0, "Error occurred", error=str(e))
|
223 |
+
|
224 |
+
@app.get("/api/fetch_and_analyze")
|
225 |
+
async def fetch_and_analyze(movie_name: str):
|
226 |
+
"""Fetch and analyze a movie script, with progress tracking."""
|
227 |
+
try:
|
228 |
+
# Initialize progress
|
229 |
+
task_id = create_task_id(movie_name)
|
230 |
+
update_progress(task_id, 0.0, "Starting script search...")
|
231 |
+
|
232 |
+
# Fetch script
|
233 |
+
script_text = fetch_script(movie_name)
|
234 |
+
if not script_text:
|
235 |
+
raise HTTPException(status_code=404, detail="Script not found or error occurred")
|
236 |
+
|
237 |
+
# Analyze content
|
238 |
+
update_progress(task_id, 0.8, "Analyzing script content...")
|
239 |
+
result = await analyze_content(script_text)
|
240 |
+
|
241 |
+
# Finalize
|
242 |
+
update_progress(task_id, 1.0, "Analysis complete!")
|
243 |
+
return result
|
244 |
+
|
245 |
+
except Exception as e:
|
246 |
+
logger.error(f"Error in fetch_and_analyze: {str(e)}", exc_info=True)
|
247 |
+
# Clean up progress tracker in case of error
|
248 |
+
if movie_name in progress_tracker:
|
249 |
+
del progress_tracker[movie_name]
|
250 |
+
raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
|
251 |
+
|
252 |
+
@app.get("/api/progress")
|
253 |
+
def get_progress(movie_name: str):
|
254 |
+
"""Get the current progress and status for a movie analysis."""
|
255 |
+
if movie_name not in progress_tracker:
|
256 |
+
return {
|
257 |
+
"progress": 0,
|
258 |
+
"status": "Waiting to start..."
|
259 |
+
}
|
260 |
+
|
261 |
+
progress_info = progress_tracker[movie_name]
|
262 |
+
|
263 |
+
# Clean up old entries (optional)
|
264 |
+
current_time = datetime.now()
|
265 |
+
if (current_time - progress_info.timestamp).total_seconds() > 3600: # 1 hour timeout
|
266 |
+
del progress_tracker[movie_name]
|
267 |
+
return {
|
268 |
+
"progress": 0,
|
269 |
+
"status": "Session expired. Please try again."
|
270 |
+
}
|
271 |
+
|
272 |
+
return {
|
273 |
+
"progress": progress_info.progress,
|
274 |
+
"status": progress_info.status
|
275 |
+
}
|
276 |
+
|
277 |
+
if __name__ == "__main__":
|
278 |
+
import uvicorn
|
279 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|