Spaces:
Build error
Build error
import gradio as gr | |
import numpy as np | |
from sklearn.datasets import load_diabetes | |
from sklearn.linear_model import RidgeCV | |
from sklearn.feature_selection import SelectFromModel | |
from time import time | |
from sklearn.feature_selection import SequentialFeatureSelector | |
import matplotlib | |
matplotlib.use("Agg") | |
import matplotlib.pyplot as plt | |
def select_features(method,num_features): | |
diabetes = load_diabetes() | |
X, y = diabetes.data, diabetes.target | |
ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y) | |
feature_names = np.array(diabetes.feature_names) | |
if method == 'model': | |
importance = np.abs(ridge.coef_) | |
threshold = np.sort(importance)[-3] + 0.01 | |
tic = time() | |
sfm = SelectFromModel(ridge, threshold=threshold).fit(X, y) | |
toc = time() | |
selected_features = feature_names[sfm.get_support()] | |
if int(num_features) < len(selected_features): | |
selected_features = selected_features[:int(num_features)] | |
execution_time = toc - tic | |
elif method == 'sfs-forward': | |
tic_fwd = time() | |
sfs_forward = SequentialFeatureSelector( | |
ridge, n_features_to_select=int(num_features), direction="forward" | |
).fit(X, y) | |
toc_fwd = time() | |
selected_features = feature_names[sfs_forward.get_support()] | |
execution_time = toc_fwd - tic_fwd | |
elif method == 'sfs-backward': | |
tic_bwd = time() | |
sfs_backward = SequentialFeatureSelector( | |
ridge, n_features_to_select=int(num_features), direction="backward" | |
).fit(X, y) | |
toc_bwd = time() | |
selected_features = feature_names[sfs_backward.get_support()] | |
execution_time = toc_bwd - tic_bwd | |
return f"Selected the following features: {', '.join(selected_features)} in {execution_time:.3f} seconds" | |
title = "Selecting features with Sequential Feature Selection" | |
with gr.Blocks(title=title) as demo: | |
gr.Markdown(f"## {title}") | |
gr.Markdown(""" | |
This app demonstrates feature selection techniques using model based selection and sequential feature selection.\n\n | |
Model based selection is based on feature importance. Each feature is assigned a score on how much influence they have on the model output. | |
The feature with highest score is considered the most important feature.\n\n | |
Sequential feature selection is based on greedy approach. In greedy approach, the feature is added or removed to the selected features at each iteration | |
based on the model performance score.\n\n | |
This app uses Ridge estimator and the diabetes dataset from sklearn. Diabetes dataset consist of quantitative measure of diabetes progression and | |
10 following variables obtained from 442 diabetes patients: | |
1. Age (age) | |
2. Sex (sex) | |
3. Body mass index (bmi) | |
4. Average blood pressure (bp) | |
5. Total serum cholesterol (s1) | |
6. Low-density lipoproteins (s2) | |
7. High-density lipoproteins (s3) | |
8. Total cholesterol / HDL (s4) | |
9. Possibly log of serum triglycerides level (s5) | |
10. Blood sugar level (s6)\n\n | |
This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_select_from_model_diabetes.html#sphx-glr-auto-examples-feature-selection-plot-select-from-model-diabetes-py) | |
""") | |
method = gr.Radio(["model", "sfs-forward", "sfs-backward"], label="Method") | |
num_features = gr.Slider(minimum=2, maximum=10, step=1, label = "Number of features") | |
output = gr.Textbox(label="Output Box") | |
select_btn = gr.Button("Select") | |
select_btn.click(fn=select_features, inputs=[method,num_features], outputs=output) | |
demo.launch() | |