Spaces:

bangaboy
/

pythonnew

Sleeping

App Files Files Community

bangaboy commited on Feb 19

Commit

3af41db

verified ·

1 Parent(s): 9b3e507

Update app.py

Browse files

Files changed (1) hide show

app.py +435 -0

app.py CHANGED Viewed

	@@ -0,0 +1,435 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.linear_model import LinearRegression
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+# Set page configuration with custom theme
+st.set_page_config(
+    page_title="Data Analytics Hub",
+    page_icon="📊",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+    .main {
+        padding-top: 2rem;
+    }
+    .stButton>button {
+        width: 100%;
+        border-radius: 5px;
+        height: 3em;
+        background-color: #ff4b4b;
+        color: white;
+        border: none;
+    }
+    .stButton>button:hover {
+        background-color: #ff6b6b;
+        color: white;
+    }
+    div[data-testid="stSidebarNav"] {
+        background-image: linear-gradient(#f0f2f6, #e0e2e6);
+        padding: 2rem 0;
+        border-radius: 10px;
+    }
+    .css-1d391kg {
+        padding: 2rem 1rem;
+    }
+    .stAlert {
+        padding: 1rem;
+        border-radius: 5px;
+    }
+    div[data-testid="stMetricValue"] {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 5px;
+    }
+    </style>
+""", unsafe_allow_html=True)
+# Initialize session state
+if 'data' not in st.session_state:
+    # Create sample data
+    np.random.seed(42)
+    dates = pd.date_range('2023-01-01', periods=100, freq='D')
+    st.session_state.data = pd.DataFrame({
+        'date': dates,
+        'sales': np.random.normal(1000, 200, 100),
+        'visitors': np.random.normal(500, 100, 100),
+        'conversion_rate': np.random.uniform(0.01, 0.05, 100),
+        'customer_satisfaction': np.random.normal(4.2, 0.5, 100),
+        'region': np.random.choice(['North', 'South', 'East', 'West'], 100)
+    })
+# Sidebar with enhanced styling
+with st.sidebar:
+    st.image("https://via.placeholder.com/150?text=Analytics+Hub", width=150)
+    st.title("Analytics Hub")
+    selected_page = st.radio(
+        "📑 Navigation",
+        ["🏠 Dashboard", "🔍 Data Explorer", "📊 Visualization", "🤖 ML Predictions"],
+        key="navigation"
+    )
+# Dashboard page
+if selected_page == "🏠 Dashboard":
+    st.title("📊 Data Analytics Dashboard")
+    # Quick stats in a grid
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric(
+            "Total Records",
+            f"{len(st.session_state.data):,}",
+            "Current dataset size"
+        )
+    with col2:
+        st.metric(
+            "Avg Sales",
+            f"${st.session_state.data['sales'].mean():,.2f}",
+            f"{st.session_state.data['sales'].pct_change().mean()*100:.1f}%"
+        )
+    with col3:
+        st.metric(
+            "Avg Visitors",
+            f"{st.session_state.data['visitors'].mean():,.0f}",
+            f"{st.session_state.data['visitors'].pct_change().mean()*100:.1f}%"
+        )
+    with col4:
+        st.metric(
+            "Satisfaction",
+            f"{st.session_state.data['customer_satisfaction'].mean():.2f}",
+            "Average rating"
+        )
+    # Data upload section with better styling
+    st.markdown("### 📁 Upload Your Dataset")
+    upload_col1, upload_col2 = st.columns([2, 3])
+    with upload_col1:
+        uploaded_file = st.file_uploader(
+            "Choose a CSV file",
+            type="csv",
+            help="Upload your CSV file to begin analysis"
+        )
+        if uploaded_file is not None:
+            try:
+                st.session_state.data = pd.read_csv(uploaded_file)
+                st.success("✅ Data uploaded successfully!")
+            except Exception as e:
+                st.error(f"❌ Error uploading file: {e}")
+    with upload_col2:
+        st.markdown("#### Dataset Preview")
+        st.dataframe(
+            st.session_state.data.head(3),
+            use_container_width=True
+        )
+# Data Explorer page
+elif selected_page == "🔍 Data Explorer":
+    st.title("🔍 Data Explorer")
+    # Enhanced data summary
+    col1, col2 = st.columns([1, 2])
+    with col1:
+        st.markdown("### 📊 Dataset Overview")
+        st.info(f"""
+        - **Rows:** {st.session_state.data.shape[0]:,}
+        - **Columns:** {st.session_state.data.shape[1]}
+        - **Memory Usage:** {st.session_state.data.memory_usage().sum() / 1024**2:.2f} MB
+        """)
+    with col2:
+        st.markdown("### 📈 Quick Stats")
+        st.dataframe(
+            st.session_state.data.describe(),
+            use_container_width=True
+        )
+    # Column analysis with better visualization
+    st.markdown("### 🔬 Column Analysis")
+    col1, col2, col3 = st.columns([1, 1, 2])
+    with col1:
+        column = st.selectbox(
+            "Select column:",
+            st.session_state.data.columns,
+            help="Choose a column to analyze"
+        )
+    with col2:
+        if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
+            analysis_type = st.selectbox(
+                "Analysis type:",
+                ["Distribution", "Time Series"] if "date" in column.lower() else ["Distribution"],
+                help="Choose type of analysis"
+            )
+        else:
+            analysis_type = "Value Counts"
+    with col3:
+        if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
+            stats_col1, stats_col2 = st.columns(2)
+            with stats_col1:
+                st.metric("Mean", f"{st.session_state.data[column].mean():.2f}")
+                st.metric("Std Dev", f"{st.session_state.data[column].std():.2f}")
+            with stats_col2:
+                st.metric("Median", f"{st.session_state.data[column].median():.2f}")
+                st.metric("IQR", f"{st.session_state.data[column].quantile(0.75) - st.session_state.data[column].quantile(0.25):.2f}")
+    # Enhanced visualization
+    fig, ax = plt.subplots(figsize=(12, 6))
+    if pd.api.types.is_numeric_dtype(st.session_state.data[column]):
+        sns.set_style("whitegrid")
+        sns.histplot(data=st.session_state.data, x=column, kde=True, ax=ax)
+        ax.set_title(f"Distribution of {column}", pad=20)
+    else:
+        value_counts = st.session_state.data[column].value_counts()
+        sns.barplot(x=value_counts.index, y=value_counts.values, ax=ax)
+        ax.set_title(f"Value Counts for {column}", pad=20)
+        plt.xticks(rotation=45)
+    st.pyplot(fig)
+# Visualization page
+elif selected_page == "📊 Visualization":
+    st.title("📊 Advanced Visualizations")
+    # Enhanced chart selection
+    chart_type = st.selectbox(
+        "Select visualization type:",
+        ["📊 Bar Chart", "📈 Line Chart", "🔵 Scatter Plot", "🌡️ Heatmap"],
+        help="Choose the type of visualization you want to create"
+    )
+    if chart_type in ["📊 Bar Chart", "📈 Line Chart"]:
+        col1, col2, col3 = st.columns([1, 1, 1])
+        with col1:
+            x_column = st.selectbox("X-axis:", st.session_state.data.columns)
+        with col2:
+            y_column = st.selectbox(
+                "Y-axis:",
+                [col for col in st.session_state.data.columns
+                 if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
+            )
+        with col3:
+            color_theme = st.selectbox(
+                "Color theme:",
+                ["viridis", "magma", "plasma", "inferno"]
+            )
+        # Create enhanced visualization
+        fig, ax = plt.subplots(figsize=(12, 6))
+        sns.set_style("whitegrid")
+        sns.set_palette(color_theme)
+        if not pd.api.types.is_numeric_dtype(st.session_state.data[x_column]):
+            agg_data = st.session_state.data.groupby(x_column)[y_column].mean().reset_index()
+            if "Bar" in chart_type:
+                sns.barplot(x=x_column, y=y_column, data=agg_data, ax=ax)
+            else:
+                sns.lineplot(x=x_column, y=y_column, data=agg_data, ax=ax, marker='o')
+        else:
+            if "Bar" in chart_type:
+                sns.barplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
+            else:
+                sns.lineplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
+        plt.xticks(rotation=45)
+        ax.set_title(f"{y_column} by {x_column}", pad=20)
+        st.pyplot(fig)
+    elif "Scatter" in chart_type:
+        col1, col2, col3 = st.columns([1, 1, 1])
+        with col1:
+            x_column = st.selectbox(
+                "X-axis:",
+                [col for col in st.session_state.data.columns
+                 if pd.api.types.is_numeric_dtype(st.session_state.data[col])]
+            )
+        with col2:
+            y_column = st.selectbox(
+                "Y-axis:",
+                [col for col in st.session_state.data.columns
+                 if pd.api.types.is_numeric_dtype(st.session_state.data[col]) and col != x_column]
+            )
+        with col3:
+            hue_column = st.selectbox(
+                "Color by:",
+                ["None"] + list(st.session_state.data.columns)
+            )
+        fig, ax = plt.subplots(figsize=(12, 6))
+        sns.set_style("whitegrid")
+        if hue_column != "None":
+            sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, hue=hue_column, ax=ax)
+        else:
+            sns.scatterplot(x=x_column, y=y_column, data=st.session_state.data, ax=ax)
+        ax.set_title(f"{y_column} vs {x_column}", pad=20)
+        st.pyplot(fig)
+    elif "Heatmap" in chart_type:
+        st.markdown("### 🌡️ Correlation Heatmap")
+        numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
+        correlation = st.session_state.data[numeric_cols].corr()
+        fig, ax = plt.subplots(figsize=(12, 8))
+        mask = np.triu(np.ones_like(correlation))
+        sns.heatmap(
+            correlation,
+            mask=mask,
+            annot=True,
+            cmap='coolwarm',
+            ax=ax,
+            center=0,
+            square=True,
+            fmt='.2f',
+            linewidths=1
+        )
+        ax.set_title("Correlation Heatmap", pad=20)
+        st.pyplot(fig)
+# ML Predictions page
+elif selected_page == "🤖 ML Predictions":
+    st.title("🤖 Machine Learning Predictions")
+    # Model configuration
+    st.markdown("### ⚙️ Model Configuration")
+    config_col1, config_col2 = st.columns(2)
+    with config_col1:
+        numeric_cols = st.session_state.data.select_dtypes(include=['number']).columns.tolist()
+        target_column = st.selectbox(
+            "Target variable:",
+            numeric_cols,
+            help="Select the variable you want to predict"
+        )
+    with config_col2:
+        model_type = st.selectbox(
+            "Model type:",
+            ["📊 Linear Regression", "🌲 Random Forest"],
+            help="Choose the type of model to train"
+        )
+    # Feature selection with better UI
+    st.markdown("### 🎯 Feature Selection")
+    feature_cols = [col for col in numeric_cols if col != target_column]
+    selected_features = st.multiselect(
+        "Select features for the model:",
+        feature_cols,
+        default=feature_cols,
+        help="Choose the variables to use as predictors"
+    )
+    # Model training section
+    train_col1, train_col2 = st.columns([2, 1])
+    with train_col1:
+        if st.button("🚀 Train Model", use_container_width=True):
+            if len(selected_features) > 0:
+                with st.spinner("Training model..."):
+                    # Prepare data
+                    X = st.session_state.data[selected_features]
+                    y = st.session_state.data[target_column]
+                    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+                    scaler = StandardScaler()
+                    X_train_scaled = scaler.fit_transform(X_train)
+                    X_test_scaled = scaler.transform(X_test)
+                    if "Linear" in model_type:
+                        model = LinearRegression()
+                    else:
+                        model = RandomForestRegressor(n_estimators=100, random_state=42)
+                    model.fit(X_train_scaled, y_train)
+                    # Store model and scaler in session state
+                    st.session_state.model = model
+                    st.session_state.scaler = scaler
+                    st.session_state.features = selected_features
+                    # Model evaluation
+                    train_score = model.score(X_train_scaled, y_train)
+                    test_score = model.score(X_test_scaled, y_test)
+                    st.success("✨ Model trained successfully!")
+                    # Display metrics
+                    metric_col1, metric_col2 = st.columns(2)
+                    with metric_col1:
+                        st.metric("Training R² Score", f"{train_score:.4f}")
+                    with metric_col2:
+                        st.metric("Testing R² Score", f"{test_score:.4f}")
+                    # Feature importance for Random Forest
+                    if "Random" in model_type:
+                        st.markdown("### 📊 Feature Importance")
+                        importance = pd.DataFrame({
+                            'Feature': selected_features,
+                            'Importance': model.feature_importances_
+                        }).sort_values('Importance', ascending=False)
+                        fig, ax = plt.subplots(figsize=(10, 6))
+                        sns.barplot(x='Importance', y='Feature', data=importance, ax=ax)
+                        ax.set_title("Feature Importance")
+                        st.pyplot(fig)
+            else:
+                st.error("⚠️ Please select at least one feature")
+    # Prediction section
+    st.markdown("### 🎯 Make Predictions")
+    if 'model' in st.session_state:
+        pred_col1, pred_col2 = st.columns([2, 1])
+        with pred_col1:
+            st.markdown("#### Input Features")
+            input_data = {}
+            # Create input fields for each feature
+            for feature in st.session_state.features:
+                min_val = float(st.session_state.data[feature].min())
+                max_val = float(st.session_state.data[feature].max())
+                mean_val = float(st.session_state.data[feature].mean())
+                input_data[feature] = st.slider(
+                    f"{feature}:",
+                    min_value=min_val,
+                    max_value=max_val,
+                    value=mean_val,
+                    help=f"Range: {min_val:.2f} to {max_val:.2f}"
+                )
+        with pred_col2:
+            if st.button("🎯 Predict", use_container_width=True):
+                input_df = pd.DataFrame([input_data])
+                input_scaled = st.session_state.scaler.transform(input_df)
+                prediction = st.session_state.model.predict(input_scaled)[0]
+                st.success(f"Predicted {target_column}: {prediction:.2f}")
+    else:
+        st.info("ℹ️ Train a model first to make predictions")