File size: 4,638 Bytes
4e27a52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc4c155
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import gradio as gr

# Load dataset
data = pd.read_csv('Life-Expectancy-Data-Updated.csv')

# Define the feature columns and target column - 'Country' and 'Region' removed
#feature_cols = [
#    'Infant_deaths', 'Under_five_deaths', 'Adult_mortality', 'Alcohol_consumption',
#    'Hepatitis_B', 'Measles', 'BMI', 'Polio', 'Diphtheria', 'Incidents_HIV', 'GDP_per_capita',
#    'Schooling', 'Economy_status_Developed', 'Economy_status_Developing'
#]
feature_cols = [ # Only 6 selected features used
    'Infant_deaths', 'Under_five_deaths', 'Adult_mortality',
    'BMI', 'Polio', 'Schooling'
]
target_col = 'Life_expectancy'
X = data[feature_cols]
y = data[target_col]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
#rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
# Based on CV tuning
rf_model = RandomForestRegressor(max_depth=10, max_features='sqrt', min_samples_leaf=1, min_samples_split=2, n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)

#-----------------------------------------------------------------------------------

# Define the feature columns - must match the columns used in the training script
#feature_cols = [
#    'Infant_deaths', 'Under_five_deaths', 'Adult_mortality', 'Alcohol_consumption',
#    'Hepatitis_B', 'Measles', 'BMI', 'Polio', 'Diphtheria', 'Incidents_HIV', 'GDP_per_capita',
#    'Schooling', 'Economy_status_Developed', 'Economy_status_Developing'
#]
feature_cols = [ # Only 6 selected features used
    'Infant_deaths', 'Under_five_deaths', 'Adult_mortality', 
    'BMI', 'Polio', 'Schooling'
]

# Load the trained model
#rf_model = joblib.load('rf_model.pkl')

# Define the prediction function
#def predict_life_expectancy(
#    Infant_deaths, Under_five_deaths, Adult_mortality, Alcohol_consumption,
#    Hepatitis_B, Measles, BMI, Polio, Diphtheria, Incidents_HIV, GDP_per_capita,
#    Schooling, Economy_status
#):
def predict_life_expectancy(
    Infant_deaths, Under_five_deaths, Adult_mortality,
    BMI, Polio, Schooling
):
    # Initialize Economy_status features
    #Economy_status_Developed = 0
    #Economy_status_Developing = 0
    
    # Set the appropriate status to 1 based on the dropdown selection
    #if Economy_status == "Developed":
    #    Economy_status_Developed = 1
    #else:  # economy_status == "Developing"
    #    Economy_status_Developing = 1

    # Convert the inputs to a DataFrame
    #input_data = pd.DataFrame([[
    #    Infant_deaths, Under_five_deaths, Adult_mortality, Alcohol_consumption,
    #    Hepatitis_B, Measles, BMI, Polio, Diphtheria, Incidents_HIV, GDP_per_capita,
    #    Schooling, Economy_status_Developed, Economy_status_Developing
    #]], columns=feature_cols)
    input_data = pd.DataFrame([[
        Infant_deaths, Under_five_deaths, Adult_mortality,
        BMI, Polio, Schooling
    ]], columns=feature_cols)

    # Make the prediction
    prediction = rf_model.predict(input_data)[0]
    return prediction

# Create Gradio inputs for each feature
inputs = [
    #gr.Number(label=col) for col in feature_cols
    gr.Slider(0, 1000, value=0, label="Infant_deaths",step=0.1),
    gr.Slider(0, 1000, value=0, label="Under_five_deaths",step=0.1),
    gr.Slider(0, 1000, value=0, label="Adult_mortality",step=0.1),
    #gr.Slider(0, 100, value=0, label="Alcohol_consumption",step=0.1),
    #gr.Slider(0, 100, value=99, label="Hepatitis_B", step=0.1),
    #gr.Slider(0, 100, value=99, label="Measles", step=0.1),
    gr.Slider(0, 50, value=25, label="BMI", step=0.1),
    gr.Slider(0, 100, value=99, label="Polio", step=0.1),
    #gr.Slider(0, 100, value=99, label="Diphtheria", step=0.1),
    #gr.Slider(0, 100, value=0, label="Incidents_HIV", step=0.1),
    #gr.Slider(0, 199999, value=0, label="GDP_per_capita", step=1),
    gr.Slider(0, 100, value=0, label="Schooling", step=0.1),
    #gr.Dropdown(value=0, label="Economy Status", choices=["Developed", "Developing"])  # Dropdown for economy status
]

# Create the Gradio interface
app = gr.Interface(
    fn=predict_life_expectancy,    # Function to use for predictions
    inputs=inputs,                 # The inputs for the model
    outputs=gr.Textbox(label="Life Expectancy Prediction:"), # The output is a single number (life expectancy)
    title="Life Expectancy Prediction",
    description="Enter values for the features to predict life expectancy.",
    theme=gr.themes.Base()
)

# Launch the Gradio app
app.launch()