Spaces:

ewan-rvl
/

toxicity-detector

Running

App Files Files Community

toxicity-detector / app.py

ewan-rvl

First true version

c3fd231 24 days ago

raw

history blame contribute delete

2.5 kB

	import os
	import re
	import logging
	import nltk
	import torch
	import gradio as gr
	from transformers import pipeline, AutoConfig
	from nltk.tokenize import word_tokenize
	from nltk.stem import WordNetLemmatizer
	from textblob import TextBlob

	# Configuration du logging
	logging.basicConfig(level=logging.DEBUG)

	# Vérifier la disponibilité du GPU
	device = 0 if torch.cuda.is_available() else -1

	# Charger le modèle et sa configuration
	model_name = "AgentPublic/camembert-base-toxic-fr-user-prompts"
	config = AutoConfig.from_pretrained(model_name)
	classifier = pipeline('text-classification', model=model_name, device=device)

	# Chargement des ressources NLTK
	nltk.download('punkt')
	nltk.download('punkt_tab')

	try:
	nltk.data.find('corpora/wordnet')
	except LookupError:
	nltk.download('wordnet')

	lemmatizer = WordNetLemmatizer()
	insult_words = [
	"con", "cons", "connard", "connards", "enculé", "enculés",
	"pute", "putes", "putain", "merde", "idiot"
	]
	insult_pattern = re.compile(r'\b(?:' + '\|'.join(insult_words) + r')\b', re.IGNORECASE)

	def analyze_text(text, threshold=0.5):
	"""
	Analyse un texte pour détecter la toxicité avec un seuil de confiance.
	Retourne True si la toxicité détectée est supérieure ou égale au seuil.
	"""
	result = classifier(text, truncation=True)[0]
	label_map = {v: k for k, v in config.label2id.items()}
	toxic_label = label_map.get(1, "toxic") # Sécurisation de l'accès
	logging.debug(f"Texte: {text} -> Score: {result['score']}")
	return result['label'] == toxic_label and result['score'] >= threshold

	def detect_toxicity(message):
	"""
	Vérifie si un message est toxique selon l'IA et les règles heuristiques.
	"""
	words = [lemmatizer.lemmatize(word) for word in word_tokenize(message.lower())]
	blob = TextBlob(" ".join(words))
	sentiment = blob.sentiment.polarity

	# Conversion en booléen pour éviter de renvoyer un objet re.Match
	return analyze_text(message) or bool(insult_pattern.search(message)) or sentiment < -0.5

	def predict(text):
	is_toxic = detect_toxicity(text)
	return f"Is toxic: {is_toxic}"

	# Création de l'interface Gradio avec la nouvelle syntaxe
	iface = gr.Interface(
	fn=predict,
	inputs=gr.Textbox(lines=5, label="Texte en français"),
	outputs=gr.Textbox(label="Résultat"),
	title="Détecteur de Toxicité",
	description="Entrez un texte en français pour vérifier s'il est toxique."
	)

	if __name__ == "__main__":
	iface.launch()