krushna123 commited on
Commit
ba1e688
·
verified ·
1 Parent(s): f43447e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -237
app.py CHANGED
@@ -1,270 +1,74 @@
1
  # -*- coding: utf-8 -*-
2
- """Emotion Detection NLP Mental Health
3
 
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/#fileId=https%3A//storage.googleapis.com/kaggle-colab-exported-notebooks/emotion-detection-nlp-mental-health-07377912-eef1-476c-bca0-e3f3abe2bc31.ipynb%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com/20250205/auto/storage/goog4_request%26X-Goog-Date%3D20250205T063040Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D3379ac810304cc40b0fa5fa915ff09212c0da161bbdae3190bbb13f09d158e28ddbebaecc6f31f960598bf39852f632c8d65288530a38effc9d316c50e6ab1a71aedc9066b12ef4487648ede7d5646dbef0283c9eb7a5539c47ac342e640964e13ff9ea00f5ca777b4adc007f3a830e7d9cfccc590924dc8a5057440bfd82b0e97c9739112dba40371f7321d5231ddd5b476890fb7d4fced9ed0ba155fde73046cb775adeadd827f01dcc90a583f7dab149ca3a5c35f2b29df5106ca356258ee13267ac10671a604057af3e053d45fdabb4d1758c1b3f3da38ddbab02762b81b7f717321a649a1b63f8bc5773a8a27377de6214668dd1b1253012ff8017e2850
8
- """
9
-
10
- '''# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
11
- # THEN FEEL FREE TO DELETE THIS CELL.
12
- # NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
13
- # ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
14
- # NOTEBOOK.
15
- import kagglehub
16
- thedevastator_nlp_mental_health_conversations_path = kagglehub.dataset_download('thedevastator/nlp-mental-health-conversations')
17
-
18
- print('Data source import complete.')'''
19
-
20
- """# Introduction
21
-
22
- In recent years, mental health awareness has grown, leading to a greater emphasis on making support more accessible to everyone. Artificial Intelligence (AI) is playing a pivotal role in bridging the gap between those in need of mental health advice and the limited number of qualified professionals available. The dataset provided in this project is a valuable resource for developing Natural Language Processing (NLP) models that can assist with mental health support.
23
-
24
- The dataset used in this project consists of anonymized conversations between patients and experienced psychologists, where we will concentrate on detecting the emotional context of the dialogue. By understanding the emotions present in these exchanges, the NLP model will be able to respond more appropriately and offer tailored advice based on the patient's emotional state.
25
-
26
- ## Purpose
27
-
28
- The notebook will explore, preprocess, and model the data with the goal of improving emotion detection in patient conversations. This will allow us to understand the emotional landscape of mental health discussions and create AI systems capable of providing emotionally aware responses.
29
-
30
- # Libraries
31
- """
32
-
33
- '''#Download and Extracting Data from Kaggle
34
- import os
35
- import zipfile'''
36
-
37
-
38
- # Data Preprcessing
39
  import string
40
  import re
41
- from warnings import filterwarnings
42
- import matplotlib.pyplot as plt
43
- import numpy as np
44
- import pandas as pd
45
- from PIL import Image
46
-
47
-
48
  import nltk
49
- from nltk.corpus import stopwords
50
  from nltk.tokenize import word_tokenize
51
- from nltk.sentiment import SentimentIntensityAnalyzer
52
  from nltk.stem import WordNetLemmatizer
53
-
54
- #Label Encouding
55
- from sklearn.preprocessing import LabelEncoder
56
- from textblob import Word, TextBlob
57
- from wordcloud import WordCloud
58
-
59
- #Feature Extracting
60
- from sklearn.feature_extraction.text import CountVectorizer
61
  from sklearn.feature_extraction.text import TfidfVectorizer
 
62
 
63
-
64
- filterwarnings('ignore')
65
- pd.set_option('display.max_columns', None)
66
- pd.set_option('display.max_colwidth', None)
67
- pd.set_option('display.width', 200)
68
- pd.set_option('display.float_format', lambda x: '%.2f' % x)
69
-
70
- # Download necessary NLTK resources
71
- import nltk
72
  nltk.download('punkt_tab')
73
  nltk.download('stopwords')
74
  nltk.download('punkt')
75
- nltk.download('wordnet') # Download the wordnet corpus for lemmatization
76
-
77
- """# Data
78
-
79
- ## Download and Extracting
80
- """
81
-
82
- '''# Downlaod the dataset using kaggle API
83
- os.system("kaggle datasets download -d thedevastator/nlp-mental-health-conversations")
84
-
85
- #Extract the download zip files
86
- dataset_zip='nlp-mental-health-conversations.zip'
87
- extracted_folder='nlp_mental_health_conversations'
88
-
89
- #Extract the dataset
90
- with zipfile.ZipFile(dataset_zip,'r') as zip_ref:
91
- zip_ref.extractall(extracted_folder)
92
-
93
- print("Dataset downloaded and extracted successfully.")'''
94
-
95
- """## Explore Data"""
96
-
97
- data = pd.read_csv("train.csv")
98
-
99
-
100
- data.head()
101
-
102
- reponse=data.loc[0,"Response"]
103
- print("Length Before text preprocessing : ",len(reponse))
104
-
105
- """## Text Preprocessing
106
- - Normlaize
107
- - Punctuation
108
- - Numbers
109
- - StopWords
110
- - Lemmezation
111
- - Removing Words
112
- """
113
 
114
- # Initialize the lemmatizer
115
  lemmatizer = WordNetLemmatizer()
116
 
117
  def clean_text(text):
118
- # Convert to string
119
- text = str(text)
120
- # Convert to lowercase
121
- text = text.lower()
122
- # Remove punctuation
123
- text = text.translate(str.maketrans('', '', string.punctuation))
124
- # Remove numbers
125
- text = re.sub(r'\d+', '', text)
126
- # Tokenize text
127
  tokens = word_tokenize(text)
128
- # Remove stop words
129
  stop_words = set(stopwords.words('english'))
130
- tokens = [word for word in tokens if word not in stop_words]
131
- # Lemmatize tokens
132
- tokens = [lemmatizer.lemmatize(word) for word in tokens]
133
-
134
- # Join tokens back into a string
135
  return ' '.join(tokens)
136
 
137
- # Apply the clean_text function to your 'Context' column
 
138
  data['Context'] = data['Context'].apply(clean_text)
139
 
140
- # Remove Rarewords:
141
-
142
- # Let's remove words used less than 1
143
- temp_Context = pd.Series(' '.join(data['Context']).split()).value_counts()
144
- drops = temp_Context[temp_Context <= 1]
145
- data['Context'] = data['Context'].apply(lambda x: " ".join(x for x in x.split() if x not in drops))
146
-
147
- """## Text visualization"""
148
-
149
- tf_Context = data["Context"].apply(lambda x: pd.value_counts(x.split(" "))).sum(axis=0).reset_index()
150
- tf_Context.columns = ["words", "tf"]
151
- tf_Context.sort_values("tf", ascending=False)
152
-
153
- # Barplot for Context
154
-
155
- tf_Context[tf_Context["tf"] > 300].plot.bar(x="words", y="tf")
156
- plt.show()
157
-
158
- """# Emotions Anaylsis"""
159
-
160
- from transformers import pipeline
161
-
162
- # Extract and clean 'Context' column
163
- contexts = data['Context']
164
-
165
- # Load pre-trained emotion detection model
166
  emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base')
167
-
168
- # Analyze emotions in 'Context'
169
  emotions = contexts.apply(lambda x: emotion_model(x)[0]['label'])
170
-
171
- # Add detected emotions as a new column
172
  data['Detected_Emotion'] = emotions
173
 
174
- data.head()
175
-
176
- data['Detected_Emotion'].value_counts()
177
-
178
- """# Feature Extraction"""
179
-
180
- # Initialize TF-IDF Vectorizer
181
  vectorizer = TfidfVectorizer()
 
 
182
 
183
- # Fit and transform the data
184
- tfidf_matrix = vectorizer.fit_transform(contexts)
185
-
186
- # Convert to array (if needed)
187
- tfidf_array = tfidf_matrix.toarray()
188
-
189
- """# Model
190
-
191
- ## Data Spilting
192
- """
193
-
194
- from sklearn.model_selection import train_test_split
195
-
196
- # Split the data
197
- X_train, X_test, y_train, y_test = train_test_split(tfidf_array, data['Detected_Emotion'], test_size=0.3, random_state=42)
198
-
199
- from sklearn.ensemble import RandomForestClassifier
200
-
201
- # Initialize the model
202
  model = RandomForestClassifier()
203
-
204
- """## Fine Tuning"""
205
-
206
- from sklearn.model_selection import GridSearchCV
207
-
208
- # Define the parameter grid
209
- param_grid = {
210
- 'n_estimators': [100, 200, 300],
211
- 'max_depth': [None, 10, 20, 30]
212
- }
213
-
214
- # Perform grid search
215
- grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
216
- grid_search.fit(X_train, y_train)
217
-
218
- # Best parameters
219
- print(f'Best parameters: {grid_search.best_params_}')
220
-
221
- """# Train and Evaluation
222
-
223
- ## Train
224
- """
225
-
226
- model = RandomForestClassifier()
227
- # Train the model
228
  model.fit(X_train, y_train)
229
 
230
- """## Evaluation"""
231
-
232
- import seaborn as sns
233
- from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
234
-
235
- # Make predictions
236
- y_pred = model.predict(X_test)
237
-
238
- # Calculate accuracy
239
- accuracy = accuracy_score(y_test, y_pred)
240
- print(f'Accuracy: {accuracy}')
241
-
242
- # Print classification report
243
- print("Classification Report:")
244
- print(classification_report(y_test, y_pred))
245
-
246
- # Generate confusion matrix
247
- conf_matrix = confusion_matrix(y_test, y_pred)
248
- print("Confusion Matrix:")
249
- print(conf_matrix)
250
-
251
- # Plot confusion matrix
252
- plt.figure(figsize=(10, 7))
253
- sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y_pred), yticklabels=np.unique(y_test))
254
- plt.xlabel('Predicted')
255
- plt.ylabel('Actual')
256
- plt.title('Confusion Matrix')
257
- plt.show()
258
-
259
- """## Test Unseen Data"""
260
-
261
- # Example new text
262
- new_text = ["let's leave i am scared"]
263
 
264
- # Clean and transform the new text
265
- new_text_cleaned = [clean_text(text) for text in new_text]
266
- new_text_tfidf = vectorizer.transform(new_text_cleaned)
267
 
268
- # Predict emotion
269
- predicted_emotion = model.predict(new_text_tfidf)
270
- print(predicted_emotion)
 
1
  # -*- coding: utf-8 -*-
2
+ """Emotion Detection NLP Mental Health"""
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import string
5
  import re
 
 
 
 
 
 
 
6
  import nltk
7
+ import pandas as pd
8
  from nltk.tokenize import word_tokenize
9
+ from nltk.corpus import stopwords
10
  from nltk.stem import WordNetLemmatizer
11
+ from transformers import pipeline
12
+ import gradio as gr
13
+ import matplotlib.pyplot as plt
14
+ import seaborn as sns
15
+ import numpy as np
16
+ from sklearn.model_selection import train_test_split
17
+ from sklearn.ensemble import RandomForestClassifier
18
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
19
  from sklearn.feature_extraction.text import TfidfVectorizer
20
+ from warnings import filterwarnings
21
 
22
+ # NLTK Downloads
 
 
 
 
 
 
 
 
23
  nltk.download('punkt_tab')
24
  nltk.download('stopwords')
25
  nltk.download('punkt')
26
+ nltk.download('wordnet')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ # Text Preprocessing
29
  lemmatizer = WordNetLemmatizer()
30
 
31
  def clean_text(text):
32
+ """Cleans and preprocesses the input text."""
33
+ text = str(text).lower()
34
+ text = text.translate(str.maketrans('', '', string.punctuation)) # Remove punctuation
35
+ text = re.sub(r'\d+', '', text) # Remove numbers
 
 
 
 
 
36
  tokens = word_tokenize(text)
 
37
  stop_words = set(stopwords.words('english'))
38
+ tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
39
+ tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize words
 
 
 
40
  return ' '.join(tokens)
41
 
42
+ # Load data
43
+ data = pd.read_csv("train.csv")
44
  data['Context'] = data['Context'].apply(clean_text)
45
 
46
+ # Emotion Detection Model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  emotion_model = pipeline('sentiment-analysis', model='j-hartmann/emotion-english-distilroberta-base')
48
+ contexts = data['Context']
 
49
  emotions = contexts.apply(lambda x: emotion_model(x)[0]['label'])
 
 
50
  data['Detected_Emotion'] = emotions
51
 
52
+ # Feature Extraction
 
 
 
 
 
 
53
  vectorizer = TfidfVectorizer()
54
+ tfidf_matrix = vectorizer.fit_transform(data['Context'])
55
+ X_train, X_test, y_train, y_test = train_test_split(tfidf_matrix.toarray(), data['Detected_Emotion'], test_size=0.3, random_state=42)
56
 
57
+ # Train a Random Forest Classifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  model = RandomForestClassifier()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  model.fit(X_train, y_train)
60
 
61
+ # Function to predict emotion of new text
62
+ def predict_emotion(text):
63
+ """Predicts the emotion for the given text."""
64
+ cleaned_text = clean_text(text)
65
+ tfidf_text = vectorizer.transform([cleaned_text])
66
+ predicted_emotion = model.predict(tfidf_text)
67
+ return predicted_emotion[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Gradio Interface
70
+ iface = gr.Interface(fn=predict_emotion, inputs="text", outputs="text", live=True)
 
71
 
72
+ # Launch the Gradio Interface
73
+ if __name__ == "__main__":
74
+ iface.launch()