bangaboy commited on
Commit
3af41db
·
verified ·
1 Parent(s): 9b3e507

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +435 -0
app.py CHANGED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ from sklearn.linear_model import LinearRegression
7
+ from sklearn.ensemble import RandomForestRegressor
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.model_selection import train_test_split
10
+
11
+ # Set page configuration with custom theme
12
+ st.set_page_config(
13
+ page_title="Data Analytics Hub",
14
+ page_icon="📊",
15
+ layout="wide",
16
+ initial_sidebar_state="expanded"
17
+ )
18
+
19
+ # Custom CSS for better styling
20
+ st.markdown("""
21
+ <style>
22
+ .main {
23
+ padding-top: 2rem;
24
+ }
25
+ .stButton>button {
26
+ width: 100%;
27
+ border-radius: 5px;
28
+ height: 3em;
29
+ background-color: #ff4b4b;
30
+ color: white;
31
+ border: none;
32
+ }
33
+ .stButton>button:hover {
34
+ background-color: #ff6b6b;
35
+ color: white;
36
+ }
37
+ div[data-testid="stSidebarNav"] {
38
+ background-image: linear-gradient(#f0f2f6, #e0e2e6);
39
+ padding: 2rem 0;
40
+ border-radius: 10px;
41
+ }
42
+ .css-1d391kg {
43
+ padding: 2rem 1rem;
44
+ }
45
+ .stAlert {
46
+ padding: 1rem;
47
+ border-radius: 5px;
48
+ }
49
+ div[data-testid="stMetricValue"] {
50
+ background-color: #f0f2f6;
51
+ padding: 1rem;
52
+ border-radius: 5px;
53
+ }
54
+ </style>
55
+ """, unsafe_allow_html=True)
56
+
57
+ # Initialize session state
58
+ if 'data' not in st.session_state:
59
+ # Create sample data
60
+ np.random.seed(42)
61
+ dates = pd.date_range('2023-01-01', periods=100, freq='D')
62
+ st.session_state.data = pd.DataFrame({
63
+ 'date': dates,
64
+ 'sales': np.random.normal(1000, 200, 100),
65
+ 'visitors': np.random.normal(500, 100, 100),
66
+ 'conversion_rate': np.random.uniform(0.01, 0.05, 100),
67
+ 'customer_satisfaction': np.random.normal(4.2, 0.5, 100),
68
+ 'region': np.random.choice(['North', 'South', 'East', 'West'], 100)
69
+ })
70
+
71
+ # Sidebar with enhanced styling
72
+ with st.sidebar:
73
+ st.image("https://via.placeholder.com/150?text=Analytics+Hub", width=150)
74
+ st.title("Analytics Hub")
75
+ selected_page = st.radio(
76
+ "📑 Navigation",
77
+ ["🏠 Dashboard", "🔍 Data Explorer", "📊 Visualization", "🤖 ML Predictions"],
78
+ key="navigation"
79
+ )
80
+
81
+ # Dashboard page
82
+ if selected_page == "🏠 Dashboard":
83
+ st.title("📊 Data Analytics Dashboard")
84
+
85
+ # Quick stats in a grid
86
+ col1, col2, col3, col4 = st.columns(4)
87
+
88
+ with col1:
89
+ st.metric(
90
+ "Total Records",
91
+ f"{len(st.session_state.data):,}",
92
+ "Current dataset size"
93
+ )
94
+
95
+ with col2:
96
+ st.metric(
97
+ "Avg Sales",
98
+ f"${st.session_state.data['sales'].mean():,.2f}",
99
+ f"{st.session_state.data['sales'].pct_change().mean()*100:.1f}%"
100
+ )
101
+
102
+ with col3:
103
+ st.metric(
104
+ "Avg Visitors",
105
+ f"{st.session_state.data['visitors'].mean():,.0f}",
106
+ f"{st.session_state.data['visitors'].pct_change().mean()*100:.1f}%"
107
+ )
108
+
109
+ with col4:
110
+ st.metric(
111
+ "Satisfaction",
112
+ f"{st.session_state.data['customer_satisfaction'].mean():.2f}",
113
+ "Average rating"
114
+ )
115
+
116
+ # Data upload section with better styling
117
+ st.markdown("### 📁 Upload Your Dataset")
118
+ upload_col1, upload_col2 = st.columns([2, 3])
119
+
120
+ with upload_col1:
121
+ uploaded_file = st.file_uploader(
122
+ "Choose a CSV file",
123
+ type="csv",
124
+ help="Upload your CSV file to begin analysis"
125
+ )
126
+ if uploaded_file is not None:
127
+ try:
128
+ st.session_state.data = pd.read_csv(uploaded_file)
129
+ st.success("✅ Data uploaded successfully!")
130
+ except Exception as e:
131
+ st.error(f"❌ Error uploading file: {e}")
132
+
133
+ with upload_col2:
134
+ st.markdown("#### Dataset Preview")
135
+ st.dataframe(
136
+ st.session_state.data.head(3),
137
+ use_container_width=True
138
+ )
139
+ # Data Explorer page
140
+ elif selected_page == "🔍 Data Explorer":
141
+ st.title("🔍 Data Explorer")
142
+
143
+ # Enhanced data summary
144
+ col1, col2 = st.columns([1, 2])
145
+
146
+ with col1:
147
+ st.markdown("### 📊 Dataset Overview")
148
+ st.info(f"""
149
+ - **Rows:** {st.session_state.data.shape[0]:,}
150
+ - **Columns:** {st.session_state.data.shape[1]}
151
+ - **Memory Usage:** {st.session_state.data.memory_usage().sum() / 1024**2:.2f} MB
152
+ """)
153
+
154
+ with col2:
155
+ st.markdown("### 📈 Quick Stats")
156
+ st.dataframe(
157
+ st.session_state.data.describe(),
158
+ use_container_width=True
159
+ )
160
+
161
+ # Column analysis with better visualization
162
+ st.markdown("### 🔬 Column Analysis")
163
+
164
+ col1, col2, col3 = st.columns([1, 1, 2])
165
+
166
+ with col1:
167
+ column = st.selectbox(
168
+ "Select column:",
169
+ st.session_state.data.columns,
170
+ help="Choose a column to analyze"
171
+ )
172
+
173
+ with col2:
174
+ if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
175
+ analysis_type = st.selectbox(
176
+ "Analysis type:",
177
+ ["Distribution", "Time Series"] if "date" in column.lower() else ["Distribution"],
178
+ help="Choose type of analysis"
179
+ )
180
+ else:
181
+ analysis_type = "Value Counts"
182
+
183
+ with col3:
184
+ if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
185
+ stats_col1, stats_col2 = st.columns(2)
186
+ with stats_col1:
187
+ st.metric("Mean", f"{st.session_state.data[column].mean():.2f}")
188
+ st.metric("Std Dev", f"{st.session_state.data[column].std():.2f}")
189
+ with stats_col2:
190
+ st.metric("Median", f"{st.session_state.data[column].median():.2f}")
191
+ st.metric("IQR", f"{st.session_state.data[column].quantile(0.75) - st.session_state.data[column].quantile(0.25):.2f}")
192
+
193
+ # Enhanced visualization
194
+ fig, ax = plt.subplots(figsize=(12, 6))
195
+ if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
196
+ sns.set_style("whitegrid")
197
+ sns.histplot(data=st.session_state.data, x=column, kde=True, ax=ax)
198
+ ax.set_title(f"Distribution of {column}", pad=20)
199
+ else:
200
+ value_counts = st.session_state.data[column].value_counts()
201
+ sns.barplot(x=value_counts.index, y=value_counts.values, ax=ax)
202
+ ax.set_title(f"Value Counts for {column}", pad=20)
203
+ plt.xticks(rotation=45)
204
+
205
+ st.pyplot(fig)
206
+ # Visualization page
207
+ elif selected_page == "📊 Visualization":
208
+ st.title("📊 Advanced Visualizations")
209
+
210
+ # Enhanced chart selection
211
+ chart_type = st.selectbox(
212
+ "Select visualization type:",
213
+ ["📊 Bar Chart", "📈 Line Chart", "🔵 Scatter Plot", "🌡️ Heatmap"],
214
+ help="Choose the type of visualization you want to create"
215
+ )
216
+
217
+ if chart_type in ["📊 Bar Chart", "📈 Line Chart"]:
218
+ col1, col2, col3 = st.columns([1, 1, 1])
219
+
220
+ with col1:
221
+ x_column = st.selectbox("X-axis:", st.session_state.data.columns)
222
+
223
+ with col2:
224
+ y_column = st.selectbox(
225
+ "Y-axis:",
226
+ [col for col in st.session_state.data.columns
227
+ if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
228
+ )
229
+
230
+ with col3:
231
+ color_theme = st.selectbox(
232
+ "Color theme:",
233
+ ["viridis", "magma", "plasma", "inferno"]
234
+ )
235
+
236
+ # Create enhanced visualization
237
+ fig, ax = plt.subplots(figsize=(12, 6))
238
+ sns.set_style("whitegrid")
239
+ sns.set_palette(color_theme)
240
+
241
+ if not pd.api.types.is_numeric_dtype(st.session_state.data[x_column]):
242
+ agg_data = st.session_state.data.groupby(x_column)[y_column].mean().reset_index()
243
+
244
+ if "Bar" in chart_type:
245
+ sns.barplot(x=x_column, y=y_column, data=agg_data, ax=ax)
246
+ else:
247
+ sns.lineplot(x=x_column, y=y_column, data=agg_data, ax=ax, marker='o')
248
+ else:
249
+ if "Bar" in chart_type:
250
+ sns.barplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
251
+ else:
252
+ sns.lineplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
253
+
254
+ plt.xticks(rotation=45)
255
+ ax.set_title(f"{y_column} by {x_column}", pad=20)
256
+ st.pyplot(fig)
257
+
258
+ elif "Scatter" in chart_type:
259
+ col1, col2, col3 = st.columns([1, 1, 1])
260
+
261
+ with col1:
262
+ x_column = st.selectbox(
263
+ "X-axis:",
264
+ [col for col in st.session_state.data.columns
265
+ if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
266
+ )
267
+
268
+ with col2:
269
+ y_column = st.selectbox(
270
+ "Y-axis:",
271
+ [col for col in st.session_state.data.columns
272
+ if pd.api.types.is_numeric_dtype(st.session_state.data[col]) and col != x_column]
273
+ )
274
+
275
+ with col3:
276
+ hue_column = st.selectbox(
277
+ "Color by:",
278
+ ["None"] + list(st.session_state.data.columns)
279
+ )
280
+
281
+ fig, ax = plt.subplots(figsize=(12, 6))
282
+ sns.set_style("whitegrid")
283
+
284
+ if hue_column != "None":
285
+ sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, hue=hue_column, ax=ax)
286
+ else:
287
+ sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
288
+
289
+ ax.set_title(f"{y_column} vs {x_column}", pad=20)
290
+ st.pyplot(fig)
291
+
292
+ elif "Heatmap" in chart_type:
293
+ st.markdown("### 🌡️ Correlation Heatmap")
294
+
295
+ numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
296
+ correlation = st.session_state.data[numeric_cols].corr()
297
+
298
+ fig, ax = plt.subplots(figsize=(12, 8))
299
+ mask = np.triu(np.ones_like(correlation))
300
+ sns.heatmap(
301
+ correlation,
302
+ mask=mask,
303
+ annot=True,
304
+ cmap='coolwarm',
305
+ ax=ax,
306
+ center=0,
307
+ square=True,
308
+ fmt='.2f',
309
+ linewidths=1
310
+ )
311
+ ax.set_title("Correlation Heatmap", pad=20)
312
+ st.pyplot(fig)
313
+ # ML Predictions page
314
+ elif selected_page == "🤖 ML Predictions":
315
+ st.title("🤖 Machine Learning Predictions")
316
+
317
+ # Model configuration
318
+ st.markdown("### ⚙️ Model Configuration")
319
+
320
+ config_col1, config_col2 = st.columns(2)
321
+
322
+ with config_col1:
323
+ numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
324
+ target_column = st.selectbox(
325
+ "Target variable:",
326
+ numeric_cols,
327
+ help="Select the variable you want to predict"
328
+ )
329
+
330
+ with config_col2:
331
+ model_type = st.selectbox(
332
+ "Model type:",
333
+ ["📊 Linear Regression", "🌲 Random Forest"],
334
+ help="Choose the type of model to train"
335
+ )
336
+
337
+ # Feature selection with better UI
338
+ st.markdown("### 🎯 Feature Selection")
339
+ feature_cols = [col for col in numeric_cols if col != target_column]
340
+ selected_features = st.multiselect(
341
+ "Select features for the model:",
342
+ feature_cols,
343
+ default=feature_cols,
344
+ help="Choose the variables to use as predictors"
345
+ )
346
+
347
+ # Model training section
348
+ train_col1, train_col2 = st.columns([2, 1])
349
+
350
+ with train_col1:
351
+ if st.button("🚀 Train Model", use_container_width=True):
352
+ if len(selected_features) > 0:
353
+ with st.spinner("Training model..."):
354
+ # Prepare data
355
+ X = st.session_state.data[selected_features]
356
+ y = st.session_state.data[target_column]
357
+
358
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
359
+
360
+ scaler = StandardScaler()
361
+ X_train_scaled = scaler.fit_transform(X_train)
362
+ X_test_scaled = scaler.transform(X_test)
363
+
364
+ if "Linear" in model_type:
365
+ model = LinearRegression()
366
+ else:
367
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
368
+
369
+ model.fit(X_train_scaled, y_train)
370
+
371
+ # Store model and scaler in session state
372
+ st.session_state.model = model
373
+ st.session_state.scaler = scaler
374
+ st.session_state.features = selected_features
375
+
376
+ # Model evaluation
377
+ train_score = model.score(X_train_scaled, y_train)
378
+ test_score = model.score(X_test_scaled, y_test)
379
+
380
+ st.success("✨ Model trained successfully!")
381
+
382
+ # Display metrics
383
+ metric_col1, metric_col2 = st.columns(2)
384
+ with metric_col1:
385
+ st.metric("Training R² Score", f"{train_score:.4f}")
386
+ with metric_col2:
387
+ st.metric("Testing R² Score", f"{test_score:.4f}")
388
+
389
+ # Feature importance for Random Forest
390
+ if "Random" in model_type:
391
+ st.markdown("### 📊 Feature Importance")
392
+ importance = pd.DataFrame({
393
+ 'Feature': selected_features,
394
+ 'Importance': model.feature_importances_
395
+ }).sort_values('Importance', ascending=False)
396
+
397
+ fig, ax = plt.subplots(figsize=(10, 6))
398
+ sns.barplot(x='Importance', y='Feature', data=importance, ax=ax)
399
+ ax.set_title("Feature Importance")
400
+ st.pyplot(fig)
401
+ else:
402
+ st.error("⚠️ Please select at least one feature")
403
+
404
+ # Prediction section
405
+ st.markdown("### 🎯 Make Predictions")
406
+ if 'model' in st.session_state:
407
+ pred_col1, pred_col2 = st.columns([2, 1])
408
+
409
+ with pred_col1:
410
+ st.markdown("#### Input Features")
411
+ input_data = {}
412
+
413
+ # Create input fields for each feature
414
+ for feature in st.session_state.features:
415
+ min_val = float(st.session_state.data[feature].min())
416
+ max_val = float(st.session_state.data[feature].max())
417
+ mean_val = float(st.session_state.data[feature].mean())
418
+
419
+ input_data[feature] = st.slider(
420
+ f"{feature}:",
421
+ min_value=min_val,
422
+ max_value=max_val,
423
+ value=mean_val,
424
+ help=f"Range: {min_val:.2f} to {max_val:.2f}"
425
+ )
426
+
427
+ with pred_col2:
428
+ if st.button("🎯 Predict", use_container_width=True):
429
+ input_df = pd.DataFrame([input_data])
430
+ input_scaled = st.session_state.scaler.transform(input_df)
431
+ prediction = st.session_state.model.predict(input_scaled)[0]
432
+
433
+ st.success(f"Predicted {target_column}: {prediction:.2f}")
434
+ else:
435
+ st.info("ℹ️ Train a model first to make predictions")