Spaces:

CoffeBank
/

RU_AI_Detector

Running on Zero

App Files Files Community

CoffeBank commited on 29 days ago

Commit

c5cea9c

1 Parent(s): ce79581

update

Browse files

Files changed (4) hide show

app.py +4 -3
demo/__init__.py +1 -0
demo/binary_classifier_demo.py +39 -17
model_utils.py +40 -0

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from demo.binary_classifier_demo import binary_app
 if __name__ == "__main__":
-    # Launch only the binary classifier demo
-    print("Starting Binary Classifier demo...")
     binary_app.launch(show_api=False, debug=True, share=True)

+import gradio as gr
+from demo import binary_app
 if __name__ == "__main__":
+    # Launch the classifier demo
+    print("Starting AI Text Classifier demo...")
     binary_app.launch(show_api=False, debug=True, share=True)

demo/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .binary_classifier_demo import binary_app

demo/binary_classifier_demo.py CHANGED Viewed

@@ -6,7 +6,7 @@ import os
 import spaces
 import gc
-from model_utils import load_model, classify_text
 from binoculars_utils import compute_scores, cleanup_model, cleanup_models
 MINIMUM_TOKENS = 200
@@ -30,6 +30,14 @@ css = """
     border-radius: 0.5rem;
     font-weight: bold;
 }
 .analysis-block {
     background: #f5f5f5;
     padding: 15px;
@@ -46,7 +54,7 @@ css = """
 """
 @spaces.GPU
-def run_binary_classifier(text, show_analysis=False):
     # Check GPU status at the beginning
     if torch.cuda.is_available():
         print(f"Starting classification with GPU: {torch.cuda.get_device_name(0)}")
@@ -59,10 +67,13 @@ def run_binary_classifier(text, show_analysis=False):
         return gr.Markdown(f"Текст слишком короткий. Требуется минимум {MINIMUM_TOKENS} символов."), None, None
     try:
-        # Load binary classifier model
-        model, scaler, label_encoder, imputer = load_model()
-        # Compute scores последовательно
         scores = compute_scores(text, use_chat=True, use_coder=True)
         # Run classification
@@ -87,7 +98,7 @@ def run_binary_classifier(text, show_analysis=False):
                 scores_str += f"- Score Coder: {scores['score_coder']:.4f}\n"
         # Result markdown
-        class_style = "human-text" if predicted_class == "Human" else "ai-text"
         result_md = f"""
 ## Результат классификации
@@ -314,7 +325,7 @@ def reset_outputs():
 with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
     with gr.Row():
         with gr.Column(scale=3):
-            gr.HTML("<h1>Бинарный классификатор: Human vs AI Detection</h1>")
     with gr.Row():
         with gr.Column():
@@ -322,7 +333,15 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
                                    lines=10, label="Текст для анализа")
             with gr.Row():
                 analysis_checkbox = gr.Checkbox(label="Показать детальный анализ текста", value=False)
                 submit_button = gr.Button("Классифицировать", variant="primary")
                 clear_button = gr.Button("Очистить")
@@ -336,15 +355,18 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
     with gr.Accordion("О модели", open=False):
         gr.Markdown("""
-        ### О бинарном классификаторе
-        Эта демонстрация использует нейронную сеть для классификации текста как написанного человеком или сгенерированного ИИ.
-        #### Архитектура модели:
-        - Входной слой: Количество признаков (зависит от анализа текста)
-        - Скрытые слои: [256, 192, 128, 64]
-        - Выходной слой: 2 класса (Human, AI)
-        - Dropout: 0.3
         #### Особенности:
         - Используется анализ текста и оценки качества текста с помощью Binoculars
@@ -353,13 +375,13 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
         #### Рекомендации:
         - Для более точной классификации рекомендуется использовать тексты длиннее 200 слов
-        - Модель обучена на русскоязычных текстах
         """)
     # Set up event handlers
     submit_button.click(
-        fn=run_binary_classifier,
-        inputs=[input_text, analysis_checkbox],
         outputs=[result_output, analysis_output, input_text]
     )

 import spaces
 import gc
+from model_utils import load_model, load_ternary_model, classify_text
 from binoculars_utils import compute_scores, cleanup_model, cleanup_models
 MINIMUM_TOKENS = 200
     border-radius: 0.5rem;
     font-weight: bold;
 }
+.rephrased-text {
+    color: black !important;
+    line-height: 1.9em;
+    padding: 0.5em;
+    background: #ffcc99;
+    border-radius: 0.5rem;
+    font-weight: bold;
+}
 .analysis-block {
     background: #f5f5f5;
     padding: 15px;
 """
 @spaces.GPU
+def run_classifier(text, mode="binary", show_analysis=False):
     # Check GPU status at the beginning
     if torch.cuda.is_available():
         print(f"Starting classification with GPU: {torch.cuda.get_device_name(0)}")
         return gr.Markdown(f"Текст слишком короткий. Требуется минимум {MINIMUM_TOKENS} символов."), None, None
     try:
+        # Load appropriate classifier model based on mode
+        if mode == "binary":
+            model, scaler, label_encoder, imputer = load_model()
+        else:  # ternary
+            model, scaler, label_encoder, imputer = load_ternary_model()
+        # Compute scores
         scores = compute_scores(text, use_chat=True, use_coder=True)
         # Run classification
                 scores_str += f"- Score Coder: {scores['score_coder']:.4f}\n"
         # Result markdown
+        class_style = "human-text" if predicted_class == "Human" else "ai-text" if predicted_class in ["AI", "Raw AI"] else "rephrased-text"
         result_md = f"""
 ## Результат классификации
 with gr.Blocks(css=css, theme=gr.themes.Base()) as binary_app:
     with gr.Row():
         with gr.Column(scale=3):
+            gr.HTML("<h1>Классификатор AI-текста</h1>")
     with gr.Row():
         with gr.Column():
                                    lines=10, label="Текст для анализа")
             with gr.Row():
+                model_mode = gr.Radio(
+                    ["binary", "ternary"],
+                    label="Режим классификации",
+                    value="binary",
+                    info="Выберите тип классификации: бинарная (человек/ИИ) или тернарная (человек/ИИ/перефразированный ИИ)"
+                )
                 analysis_checkbox = gr.Checkbox(label="Показать детальный анализ текста", value=False)
+            with gr.Row():
                 submit_button = gr.Button("Классифицировать", variant="primary")
                 clear_button = gr.Button("Очистить")
     with gr.Accordion("О модели", open=False):
         gr.Markdown("""
+        ### О классификаторе AI-текста
+        Эта демонстрация использует нейронные сети для классификации текста в двух режимах:
+        #### Бинарная классификация:
+        - Human (Человек) - текст написан человеком
+        - AI (ИИ) - текст сгенерирован искусственным интеллектом
+        #### Тернарная классификация:
+        - Human (Человек) - текст написан челове��ом
+        - Raw AI (Чистый ИИ) - текст сгенерирован искусственным интеллектом без редактирования
+        - Rephrased AI (Перефразированный ИИ) - текст сгенерирован ИИ и затем отредактирован
         #### Особенности:
         - Используется анализ текста и оценки качества текста с помощью Binoculars
         #### Рекомендации:
         - Для более точной классификации рекомендуется использовать тексты длиннее 200 слов
+        - Модели обучены на русскоязычных текстах
         """)
     # Set up event handlers
     submit_button.click(
+        fn=run_classifier,
+        inputs=[input_text, model_mode, analysis_checkbox],
         outputs=[result_output, analysis_output, input_text]
     )

model_utils.py CHANGED Viewed

@@ -4,6 +4,7 @@ import joblib
 import numpy as np
 from sklearn.impute import SimpleImputer
 from NN_classifier.simple_binary_classifier import Medium_Binary_Network
 from feature_extraction import extract_features
 import pandas as pd
@@ -45,6 +46,45 @@ def load_model(model_dir='models/medium_binary_classifier'):
     return model, scaler, label_encoder, imputer
 def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None):
     features_df, text_analysis = extract_features(text, scores=scores)

 import numpy as np
 from sklearn.impute import SimpleImputer
 from NN_classifier.simple_binary_classifier import Medium_Binary_Network
+from NN_classifier.neural_net_t import Neural_Network
 from feature_extraction import extract_features
 import pandas as pd
     return model, scaler, label_encoder, imputer
+def load_ternary_model(model_dir='models/neural_network'):
+    model_path = os.path.join(model_dir, 'nn_model.pt')
+    scaler_path = os.path.join(model_dir, 'scaler.joblib')
+    encoder_path = os.path.join(model_dir, 'label_encoder.joblib')
+    imputer_path = os.path.join(model_dir, 'imputer.joblib')
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model not found at: {model_path}")
+    label_encoder = joblib.load(encoder_path)
+    scaler = joblib.load(scaler_path)
+    imputer = None
+    if os.path.exists(imputer_path):
+        imputer = joblib.load(imputer_path)
+    else:
+        print("Warning: Imputer not found, will create a new one during classification")
+    input_size = scaler.n_features_in_
+    num_classes = len(label_encoder.classes_)
+    model = Neural_Network(input_size, hidden_layers=[256, 192, 128, 64], num_classes=num_classes, dropout_rate=0.3).to(DEVICE)
+    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
+    model.eval()
+    print(f"Loaded ternary classifier model with {num_classes} classes: {label_encoder.classes_}")
+    if imputer is not None:
+        try:
+            if hasattr(imputer, 'feature_names_in_'):
+                print(f"Imputer has {len(imputer.feature_names_in_)} features")
+                print(f"First few feature names: {imputer.feature_names_in_[:5]}")
+            else:
+                print("Warning: Imputer does not have feature_names_in_ attribute")
+        except Exception as e:
+            print(f"Error checking imputer: {str(e)}")
+    return model, scaler, label_encoder, imputer
 def classify_text(text, model, scaler, label_encoder, imputer=None, scores=None):
     features_df, text_analysis = extract_features(text, scores=scores)