Spaces:

krushna123
/

nlp-sentiment-app

Sleeping

App Files Files Community

krushna123 commited on Feb 5

Commit

ba1e688

verified ·

1 Parent(s): f43447e

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -237

app.py CHANGED Viewed

@@ -1,270 +1,74 @@
 # -*- coding: utf-8 -*-
-"""Emotion Detection NLP Mental Health
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/#fileId=https%3A//storage.googleapis.com/kaggle-colab-exported-notebooks/emotion-detection-nlp-mental-health-07377912-eef1-476c-bca0-e3f3abe2bc31.ipynb%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com/20250205/auto/storage/goog4_request%26X-Goog-Date%3D20250205T063040Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D3379ac810304cc40b0fa5fa915ff09212c0da161bbdae3190bbb13f09d158e28ddbebaecc6f31f960598bf39852f632c8d65288530a38effc9d316c50e6ab1a71aedc9066b12ef4487648ede7d5646dbef0283c9eb7a5539c47ac342e640964e13ff9ea00f5ca777b4adc007f3a830e7d9cfccc590924dc8a5057440bfd82b0e97c9739112dba40371f7321d5231ddd5b476890fb7d4fced9ed0ba155fde73046cb775adeadd827f01dcc90a583f7dab149ca3a5c35f2b29df5106ca356258ee13267ac10671a604057af3e053d45fdabb4d1758c1b3f3da38ddbab02762b81b7f717321a649a1b63f8bc5773a8a27377de6214668dd1b1253012ff8017e2850
-"""
-'''# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
-# THEN FEEL FREE TO DELETE THIS CELL.
-# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
-# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
-# NOTEBOOK.
-import kagglehub
-thedevastator_nlp_mental_health_conversations_path = kagglehub.dataset_download('thedevastator/nlp-mental-health-conversations')
-print('Data source import complete.')'''
-"""# Introduction
-In recent years, mental health awareness has grown, leading to a greater emphasis on making support more accessible to everyone. Artificial Intelligence (AI) is playing a pivotal role in bridging the gap between those in need of mental health advice and the limited number of qualified professionals available. The dataset provided in this project is a valuable resource for developing Natural Language Processing (NLP) models that can assist with mental health support.
-The dataset used in this project consists of anonymized conversations between patients and experienced psychologists, where we will concentrate on detecting the emotional context of the dialogue. By understanding the emotions present in these exchanges, the NLP model will be able to respond more appropriately and offer tailored advice based on the patient's emotional state.
-## Purpose
-The notebook will explore, preprocess, and model the data with the goal of improving emotion detection in patient conversations. This will allow us to understand the emotional landscape of mental health discussions and create AI systems capable of providing emotionally aware responses.
-# Libraries
-"""
-'''#Download and Extracting Data from Kaggle
-import os
-import zipfile'''
-# Data Preprcessing
 import string
 import re
-from warnings import filterwarnings
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from PIL import Image
 import nltk
-from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
-from nltk.sentiment import SentimentIntensityAnalyzer
 from nltk.stem import WordNetLemmatizer
-#Label Encouding
-from sklearn.preprocessing import LabelEncoder
-from textblob import Word, TextBlob
-from wordcloud import WordCloud
-#Feature Extracting
-from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.feature_extraction.text import TfidfVectorizer
-filterwarnings('ignore')
-pd.set_option('display.max_columns', None)
-pd.set_option('display.max_colwidth', None)
-pd.set_option('display.width', 200)
-pd.set_option('display.float_format', lambda x: '%.2f' % x)
-# Download necessary NLTK resources
-import nltk
 nltk.download('punkt_tab')
 nltk.download('stopwords')
 nltk.download('punkt')
-nltk.download('wordnet')  # Download the wordnet corpus for lemmatization
-"""# Data
-## Download and Extracting
-"""
-'''# Downlaod the dataset using kaggle API
-os.system("kaggle datasets download -d thedevastator/nlp-mental-health-conversations")
-#Extract the download zip files
-dataset_zip='nlp-mental-health-conversations.zip'
-extracted_folder='nlp_mental_health_conversations'
-#Extract the dataset
-with zipfile.ZipFile(dataset_zip,'r') as zip_ref:
-  zip_ref.extractall(extracted_folder)
-print("Dataset downloaded and extracted successfully.")'''
-"""## Explore Data"""
-data = pd.read_csv("train.csv")
-data.head()
-reponse=data.loc[0,"Response"]
-print("Length Before text preprocessing : ",len(reponse))
-"""## Text Preprocessing
-- Normlaize
-- Punctuation
-- Numbers
-- StopWords
-- Lemmezation
-- Removing Words
-"""
-# Initialize the lemmatizer
 lemmatizer = WordNetLemmatizer()
 def clean_text(text):
-    # Convert to string
-    text = str(text)
-    # Convert to lowercase
-    text = text.lower()
-    # Remove punctuation
-    text = text.translate(str.maketrans('', '', string.punctuation))
-    # Remove numbers
-    text = re.sub(r'\d+', '', text)
-    # Tokenize text
     tokens = word_tokenize(text)
-    # Remove stop words
     stop_words = set(stopwords.words('english'))
-    tokens = [word for word in tokens if word not in stop_words]
-    # Lemmatize tokens
-    tokens = [lemmatizer.lemmatize(word) for word in tokens]
-    # Join tokens back into a string
     return ' '.join(tokens)
-# Apply the clean_text function to your 'Context' column
 data['Context'] = data['Context'].apply(clean_text)
-# Remove Rarewords:
-# Let's remove words used less than 1
-temp_Context = pd.Series(' '.join(data['Context']).split()).value_counts()
-drops = temp_Context[temp_Context <= 1]
-data['Context'] = data['Context'].apply(lambda x: " ".join(x for x in x.split() if x not in drops))
-"""## Text visualization"""
-tf_Context = data["Context"].apply(lambda x: pd.value_counts(x.split(" "))).sum(axis=0).reset_index()
-tf_Context.columns = ["words", "tf"]
-tf_Context.sort_values("tf", ascending=False)
-# Barplot for Context
-tf_Context[tf_Context["tf"] > 300].plot.bar(x="words", y="tf")
-plt.show()
-"""# Emotions Anaylsis"""
-from transformers import pipeline
-# Extract and clean 'Context' column
-contexts = data['Context']
-# Load pre-trained emotion detection model
 emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base')
-# Analyze emotions in 'Context'
 emotions = contexts.apply(lambda x: emotion_model(x)[0]['label'])
-# Add detected emotions as a new column
 data['Detected_Emotion'] = emotions
-data.head()
-data['Detected_Emotion'].value_counts()
-"""# Feature Extraction"""
-# Initialize TF-IDF Vectorizer
 vectorizer = TfidfVectorizer()
-# Fit and transform the data
-tfidf_matrix = vectorizer.fit_transform(contexts)
-# Convert to array (if needed)
-tfidf_array = tfidf_matrix.toarray()
-"""# Model
-## Data Spilting
-"""
-from sklearn.model_selection import train_test_split
-# Split the data
-X_train, X_test, y_train, y_test = train_test_split(tfidf_array, data['Detected_Emotion'], test_size=0.3, random_state=42)
-from sklearn.ensemble import RandomForestClassifier
-# Initialize the model
 model = RandomForestClassifier()
-"""## Fine Tuning"""
-from sklearn.model_selection import GridSearchCV
-# Define the parameter grid
-param_grid = {
-    'n_estimators': [100, 200, 300],
-    'max_depth': [None, 10, 20, 30]
-}
-# Perform grid search
-grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
-grid_search.fit(X_train, y_train)
-# Best parameters
-print(f'Best parameters: {grid_search.best_params_}')
-"""# Train and Evaluation
-## Train
-"""
-model = RandomForestClassifier()
-# Train the model
 model.fit(X_train, y_train)
-"""## Evaluation"""
-import seaborn as sns
-from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
-# Make predictions
-y_pred = model.predict(X_test)
-# Calculate accuracy
-accuracy = accuracy_score(y_test, y_pred)
-print(f'Accuracy: {accuracy}')
-# Print classification report
-print("Classification Report:")
-print(classification_report(y_test, y_pred))
-# Generate confusion matrix
-conf_matrix = confusion_matrix(y_test, y_pred)
-print("Confusion Matrix:")
-print(conf_matrix)
-# Plot confusion matrix
-plt.figure(figsize=(10, 7))
-sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_pred), yticklabels=np.unique(y_test))
-plt.xlabel('Predicted')
-plt.ylabel('Actual')
-plt.title('Confusion Matrix')
-plt.show()
-"""## Test Unseen Data"""
-# Example new text
-new_text = ["let's leave i am scared"]
-# Clean and transform the new text
-new_text_cleaned = [clean_text(text) for text in new_text]
-new_text_tfidf = vectorizer.transform(new_text_cleaned)
-# Predict emotion
-predicted_emotion = model.predict(new_text_tfidf)
-print(predicted_emotion)

 # -*- coding: utf-8 -*-
+"""Emotion Detection NLP Mental Health"""
 import string
 import re
 import nltk
+import pandas as pd
 from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
 from nltk.stem import WordNetLemmatizer
+from transformers import pipeline
+import gradio as gr
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
 from sklearn.feature_extraction.text import TfidfVectorizer
+from warnings import filterwarnings
+# NLTK Downloads
 nltk.download('punkt_tab')
 nltk.download('stopwords')
 nltk.download('punkt')
+nltk.download('wordnet')
+# Text Preprocessing
 lemmatizer = WordNetLemmatizer()
 def clean_text(text):
+    """Cleans and preprocesses the input text."""
+    text = str(text).lower()
+    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
+    text = re.sub(r'\d+', '', text)  # Remove numbers
     tokens = word_tokenize(text)
     stop_words = set(stopwords.words('english'))
+    tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatize words
     return ' '.join(tokens)
+# Load data
+data = pd.read_csv("train.csv")
 data['Context'] = data['Context'].apply(clean_text)
+# Emotion Detection Model
 emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base')
+contexts = data['Context']
 emotions = contexts.apply(lambda x: emotion_model(x)[0]['label'])
 data['Detected_Emotion'] = emotions
+# Feature Extraction
 vectorizer = TfidfVectorizer()
+tfidf_matrix = vectorizer.fit_transform(data['Context'])
+X_train, X_test, y_train, y_test = train_test_split(tfidf_matrix.toarray(), data['Detected_Emotion'], test_size=0.3, random_state=42)
+# Train a Random Forest Classifier
 model = RandomForestClassifier()
 model.fit(X_train, y_train)
+# Function to predict emotion of new text
+def predict_emotion(text):
+    """Predicts the emotion for the given text."""
+    cleaned_text = clean_text(text)
+    tfidf_text = vectorizer.transform([cleaned_text])
+    predicted_emotion = model.predict(tfidf_text)
+    return predicted_emotion[0]
+# Gradio Interface
+iface = gr.Interface(fn=predict_emotion, inputs="text", outputs="text", live=True)
+# Launch the Gradio Interface
+if __name__ == "__main__":
+    iface.launch()