Spaces:
Running
on
Zero
Running
on
Zero
update
Browse files- app.py +4 -3
- demo/__init__.py +1 -0
- demo/binary_classifier_demo.py +39 -17
- model_utils.py +40 -0
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
-
|
|
|
2 |
|
3 |
if __name__ == "__main__":
|
4 |
-
# Launch
|
5 |
-
print("Starting
|
6 |
binary_app.launch(show_api=False, debug=True, share=True)
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from demo import binary_app
|
3 |
|
4 |
if __name__ == "__main__":
|
5 |
+
# Launch the classifier demo
|
6 |
+
print("Starting AI Text Classifier demo...")
|
7 |
binary_app.launch(show_api=False, debug=True, share=True)
|
demo/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .binary_classifier_demo import binary_app
|
demo/binary_classifier_demo.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6 |
import spaces
|
7 |
import gc
|
8 |
|
9 |
-
from model_utils import load_model, classify_text
|
10 |
from binoculars_utils import compute_scores, cleanup_model, cleanup_models
|
11 |
|
12 |
MINIMUM_TOKENS = 200
|
@@ -30,6 +30,14 @@ css = """
|
|
30 |
border-radius: 0.5rem;
|
31 |
font-weight: bold;
|
32 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
.analysis-block {
|
34 |
background: #f5f5f5;
|
35 |
padding: 15px;
|
@@ -46,7 +54,7 @@ css = """
|
|
46 |
"""
|
47 |
|
48 |
@spaces.GPU
|
49 |
-
def
|
50 |
# Check GPU status at the beginning
|
51 |
if torch.cuda.is_available():
|
52 |
print(f"Starting classification with GPU: {torch.cuda.get_device_name(0)}")
|
@@ -59,10 +67,13 @@ def run_binary_classifier(text, show_analysis=False):
|
|
59 |
return gr.Markdown(f"Текст слишком короткий. Требуется минимум {MINIMUM_TOKENS} символов."), None, None
|
60 |
|
61 |
try:
|
62 |
-
# Load
|
63 |
-
|
|
|
|
|
|
|
64 |
|
65 |
-
# Compute scores
|
66 |
scores = compute_scores(text, use_chat=True, use_coder=True)
|
67 |
|
68 |
# Run classification
|
@@ -87,7 +98,7 @@ def run_binary_classifier(text, show_analysis=False):
|
|
87 |
scores_str += f"- Score Coder: {scores['score_coder']:.4f}\n"
|
88 |
|
89 |
# Result markdown
|
90 |
-
class_style = "human-text" if predicted_class == "Human" else "ai-text"
|
91 |
result_md = f"""
|
92 |
## Результат классификации
|
93 |
|
@@ -314,7 +325,7 @@ def reset_outputs():
|
|
314 |
with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
|
315 |
with gr.Row():
|
316 |
with gr.Column(scale=3):
|
317 |
-
gr.HTML("<h1
|
318 |
|
319 |
with gr.Row():
|
320 |
with gr.Column():
|
@@ -322,7 +333,15 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
|
|
322 |
lines=10, label="Текст для анализа")
|
323 |
|
324 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
analysis_checkbox = gr.Checkbox(label="Показать детальный анализ текста", value=False)
|
|
|
|
|
326 |
submit_button = gr.Button("Классифицировать", variant="primary")
|
327 |
clear_button = gr.Button("Очистить")
|
328 |
|
@@ -336,15 +355,18 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
|
|
336 |
|
337 |
with gr.Accordion("О модели", open=False):
|
338 |
gr.Markdown("""
|
339 |
-
### О
|
|
|
|
|
340 |
|
341 |
-
|
|
|
|
|
342 |
|
343 |
-
####
|
344 |
-
-
|
345 |
-
-
|
346 |
-
-
|
347 |
-
- Dropout: 0.3
|
348 |
|
349 |
#### Особенности:
|
350 |
- Используется анализ текста и оценки качества текста с помощью Binoculars
|
@@ -353,13 +375,13 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
|
|
353 |
|
354 |
#### Рекомендации:
|
355 |
- Для более точной классификации рекомендуется использовать тексты длиннее 200 слов
|
356 |
-
-
|
357 |
""")
|
358 |
|
359 |
# Set up event handlers
|
360 |
submit_button.click(
|
361 |
-
fn=
|
362 |
-
inputs=[input_text, analysis_checkbox],
|
363 |
outputs=[result_output, analysis_output, input_text]
|
364 |
)
|
365 |
|
|
|
6 |
import spaces
|
7 |
import gc
|
8 |
|
9 |
+
from model_utils import load_model, load_ternary_model, classify_text
|
10 |
from binoculars_utils import compute_scores, cleanup_model, cleanup_models
|
11 |
|
12 |
MINIMUM_TOKENS = 200
|
|
|
30 |
border-radius: 0.5rem;
|
31 |
font-weight: bold;
|
32 |
}
|
33 |
+
.rephrased-text {
|
34 |
+
color: black !important;
|
35 |
+
line-height: 1.9em;
|
36 |
+
padding: 0.5em;
|
37 |
+
background: #ffcc99;
|
38 |
+
border-radius: 0.5rem;
|
39 |
+
font-weight: bold;
|
40 |
+
}
|
41 |
.analysis-block {
|
42 |
background: #f5f5f5;
|
43 |
padding: 15px;
|
|
|
54 |
"""
|
55 |
|
56 |
@spaces.GPU
|
57 |
+
def run_classifier(text, mode="binary", show_analysis=False):
|
58 |
# Check GPU status at the beginning
|
59 |
if torch.cuda.is_available():
|
60 |
print(f"Starting classification with GPU: {torch.cuda.get_device_name(0)}")
|
|
|
67 |
return gr.Markdown(f"Текст слишком короткий. Требуется минимум {MINIMUM_TOKENS} символов."), None, None
|
68 |
|
69 |
try:
|
70 |
+
# Load appropriate classifier model based on mode
|
71 |
+
if mode == "binary":
|
72 |
+
model, scaler, label_encoder, imputer = load_model()
|
73 |
+
else: # ternary
|
74 |
+
model, scaler, label_encoder, imputer = load_ternary_model()
|
75 |
|
76 |
+
# Compute scores
|
77 |
scores = compute_scores(text, use_chat=True, use_coder=True)
|
78 |
|
79 |
# Run classification
|
|
|
98 |
scores_str += f"- Score Coder: {scores['score_coder']:.4f}\n"
|
99 |
|
100 |
# Result markdown
|
101 |
+
class_style = "human-text" if predicted_class == "Human" else "ai-text" if predicted_class in ["AI", "Raw AI"] else "rephrased-text"
|
102 |
result_md = f"""
|
103 |
## Результат классификации
|
104 |
|
|
|
325 |
with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
|
326 |
with gr.Row():
|
327 |
with gr.Column(scale=3):
|
328 |
+
gr.HTML("<h1>Классификатор AI-текста</h1>")
|
329 |
|
330 |
with gr.Row():
|
331 |
with gr.Column():
|
|
|
333 |
lines=10, label="Текст для анализа")
|
334 |
|
335 |
with gr.Row():
|
336 |
+
model_mode = gr.Radio(
|
337 |
+
["binary", "ternary"],
|
338 |
+
label="Режим классификации",
|
339 |
+
value="binary",
|
340 |
+
info="Выберите тип классификации: бинарная (человек/ИИ) или тернарная (человек/ИИ/перефразированный ИИ)"
|
341 |
+
)
|
342 |
analysis_checkbox = gr.Checkbox(label="Показать детальный анализ текста", value=False)
|
343 |
+
|
344 |
+
with gr.Row():
|
345 |
submit_button = gr.Button("Классифицировать", variant="primary")
|
346 |
clear_button = gr.Button("Очистить")
|
347 |
|
|
|
355 |
|
356 |
with gr.Accordion("О модели", open=False):
|
357 |
gr.Markdown("""
|
358 |
+
### О классификаторе AI-текста
|
359 |
+
|
360 |
+
Эта демонстрация использует нейронные сети для классификации текста в двух режимах:
|
361 |
|
362 |
+
#### Бинарная классификация:
|
363 |
+
- Human (Человек) - текст написан человеком
|
364 |
+
- AI (ИИ) - текст сгенерирован искусственным интеллектом
|
365 |
|
366 |
+
#### Тернарная классификация:
|
367 |
+
- Human (Человек) - текст написан челове��ом
|
368 |
+
- Raw AI (Чистый ИИ) - текст сгенерирован искусственным интеллектом без редактирования
|
369 |
+
- Rephrased AI (Перефразированный ИИ) - текст сгенерирован ИИ и затем отредактирован
|
|
|
370 |
|
371 |
#### Особенности:
|
372 |
- Используется анализ текста и оценки качества текста с помощью Binoculars
|
|
|
375 |
|
376 |
#### Рекомендации:
|
377 |
- Для более точной классификации рекомендуется использовать тексты длиннее 200 слов
|
378 |
+
- Модели обучены на русскоязычных текстах
|
379 |
""")
|
380 |
|
381 |
# Set up event handlers
|
382 |
submit_button.click(
|
383 |
+
fn=run_classifier,
|
384 |
+
inputs=[input_text, model_mode, analysis_checkbox],
|
385 |
outputs=[result_output, analysis_output, input_text]
|
386 |
)
|
387 |
|
model_utils.py
CHANGED
@@ -4,6 +4,7 @@ import joblib
|
|
4 |
import numpy as np
|
5 |
from sklearn.impute import SimpleImputer
|
6 |
from NN_classifier.simple_binary_classifier import Medium_Binary_Network
|
|
|
7 |
from feature_extraction import extract_features
|
8 |
import pandas as pd
|
9 |
|
@@ -45,6 +46,45 @@ def load_model(model_dir='models/medium_binary_classifier'):
|
|
45 |
|
46 |
return model, scaler, label_encoder, imputer
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None):
|
49 |
features_df, text_analysis = extract_features(text, scores=scores)
|
50 |
|
|
|
4 |
import numpy as np
|
5 |
from sklearn.impute import SimpleImputer
|
6 |
from NN_classifier.simple_binary_classifier import Medium_Binary_Network
|
7 |
+
from NN_classifier.neural_net_t import Neural_Network
|
8 |
from feature_extraction import extract_features
|
9 |
import pandas as pd
|
10 |
|
|
|
46 |
|
47 |
return model, scaler, label_encoder, imputer
|
48 |
|
49 |
+
def load_ternary_model(model_dir='models/neural_network'):
|
50 |
+
model_path = os.path.join(model_dir, 'nn_model.pt')
|
51 |
+
scaler_path = os.path.join(model_dir, 'scaler.joblib')
|
52 |
+
encoder_path = os.path.join(model_dir, 'label_encoder.joblib')
|
53 |
+
imputer_path = os.path.join(model_dir, 'imputer.joblib')
|
54 |
+
|
55 |
+
if not os.path.exists(model_path):
|
56 |
+
raise FileNotFoundError(f"Model not found at: {model_path}")
|
57 |
+
|
58 |
+
label_encoder = joblib.load(encoder_path)
|
59 |
+
scaler = joblib.load(scaler_path)
|
60 |
+
|
61 |
+
imputer = None
|
62 |
+
if os.path.exists(imputer_path):
|
63 |
+
imputer = joblib.load(imputer_path)
|
64 |
+
else:
|
65 |
+
print("Warning: Imputer not found, will create a new one during classification")
|
66 |
+
|
67 |
+
input_size = scaler.n_features_in_
|
68 |
+
num_classes = len(label_encoder.classes_)
|
69 |
+
|
70 |
+
model = Neural_Network(input_size, hidden_layers=[256, 192, 128, 64], num_classes=num_classes, dropout_rate=0.3).to(DEVICE)
|
71 |
+
model.load_state_dict(torch.load(model_path, map_location=DEVICE))
|
72 |
+
model.eval()
|
73 |
+
|
74 |
+
print(f"Loaded ternary classifier model with {num_classes} classes: {label_encoder.classes_}")
|
75 |
+
|
76 |
+
if imputer is not None:
|
77 |
+
try:
|
78 |
+
if hasattr(imputer, 'feature_names_in_'):
|
79 |
+
print(f"Imputer has {len(imputer.feature_names_in_)} features")
|
80 |
+
print(f"First few feature names: {imputer.feature_names_in_[:5]}")
|
81 |
+
else:
|
82 |
+
print("Warning: Imputer does not have feature_names_in_ attribute")
|
83 |
+
except Exception as e:
|
84 |
+
print(f"Error checking imputer: {str(e)}")
|
85 |
+
|
86 |
+
return model, scaler, label_encoder, imputer
|
87 |
+
|
88 |
def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None):
|
89 |
features_df, text_analysis = extract_features(text, scores=scores)
|
90 |
|