ts-lags / app.py
freddyaboulton's picture
Add app
e751b33
import gradio as gr
import pandas as pd
import numpy as np
from scipy.signal import find_peaks
from statsmodels.tsa import stattools
import plotly.graph_objects as go
import matplotlib.pyplot as plt
MIN_ALPHA = 0.01
MAX_ALPHA = 0.2
STEP = 0.01
target_names = {"SeoulBikeData": "Rented Bike Count",
"daily-total-female-births": "Births",
"southern_oscillation_evenly_spaced": "oscillation",
"monthly_co2": "CO2",
"monthly-sunspots": "Sunspots",
"daily_bike_shares": "cnt",
"AirPassengers": "#Passengers",
"Alcohol_Sales": "S4248SM144NCEN",
"yahoo_stock": "Adj Close",
"Miles_Traveled": "TRFVOLUSM227NFWA",
"DailyDelhiClimateTrain": "meantemp",
"peyton_manning": "y"}
def plot_significant_lags(conf_level: float, dataset: str):
df = pd.read_csv(dataset + ".csv")
y = df.pop(target_names[dataset])
acf_values, ci_intervals = stattools.acf(y, nlags=min(len(y) - 1, 400), fft=True, alpha=conf_level)
peaks, _ = find_peaks(acf_values)
index = np.arange(len(acf_values))
significant = np.logical_or(ci_intervals[:, 0] > 0, ci_intervals[:, 1] < 0)
first_significant_10 = index[:10][significant[:10]]
significant_lags = sorted(set(index[significant]).intersection(peaks).union(first_significant_10))
not_significant_lags = sorted(set(index).difference(significant_lags))
assert not set(significant_lags).intersection(not_significant_lags)
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=significant_lags,
y=acf_values[significant_lags],
mode = 'markers',
marker_color ='red',
marker_size = 10))
fig1.add_trace(go.Scatter(x=not_significant_lags,
y=acf_values[not_significant_lags],
mode = 'markers',
marker_color ='black',
opacity=0.2,
marker_size = 10))
fig1.add_trace(go.Scatter(x=np.arange(len(acf_values)), y=ci_intervals[:, 0] - acf_values,
line_color='blue', opacity=0.1))
fig1.add_trace(go.Scatter(x=np.arange(len(acf_values)), y=ci_intervals[:, 1] - acf_values,
line_color='blue', opacity=0.1, fill='tonexty'))
fig1.update_yaxes(range=[min(-0.2, min(acf_values), min(ci_intervals[:, 0])) - 0.1, 1.1])
fig1.update_layout(showlegend=False,
title_text=f"{dataset}<br>Statistically significant lags for a {int((1 - conf_level) * 100)}% confidence interval:<br> {significant_lags}")
return fig1
demo = gr.Blocks()
with demo:
with gr.Column():
conf_level_slider = gr.Slider(minimum=MIN_ALPHA, maximum=MAX_ALPHA, step=STEP)
dataset = gr.Dropdown(choices=["SeoulBikeData", "montly_co2", "daily_bike_shares", "peyton_manning"],
value="peyton_manning")
inputs = [conf_level_slider, dataset]
with gr.Column():
plot = gr.Plot()
button = gr.Button("Visualize")
button.click(plot_significant_lags, inputs=inputs, outputs=plot)
demo.launch()