personalized-forecasting

Sleeping

File size: 6,129 Bytes

b2108ae
e5e340b
 
700a890
e5e340b
 
 
c0809e5
700a890
7cecffc
4d6d97a
340176f
700a890
8d3eb2c
700a890
 
e5e340b
2b898e9
 
700a890
34960fe
 
700a890
34960fe
e5e340b
 
91a8ee3
 
 
 
 
 
700a890
91a8ee3
 
700a890
91a8ee3
 
 
 
700a890
 
 
91a8ee3
700a890
 
e5e340b
7cecffc
e5e340b
700a890
e5e340b
700a890
e5e340b
700a890
e5e340b
700a890
e5e340b
 
 
700a890
a4e353d
2b898e9
a4e353d
8d3eb2c
c0f7916
700a890
 
2b898e9
0a98de7
08750ad
1d5eb19
700a890
08750ad
700a890
8c26465
700a890
e5e340b
3127dc9
700a890
 
3127dc9
700a890
8c26465
700a890
 
 
8c26465
 
 
700a890
8c26465
f0efcd9
 
8c26465
e5e340b
d511c44
 
700a890
1258ec5
700a890
c65f546
700a890
 
1258ec5
700a890
 
ef2ab0c
 
1258ec5
700a890
ef2ab0c
 
700a890
1258ec5
c0f7916
700a890
65275be
 
c2b17c7
78d4c43
700a890
 
7cecffc
d511c44
932e273
b1decbe
7cecffc
d511c44
 
d066a0e

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import time
from neuralforecast.core import NeuralForecast
from neuralforecast.models import NHITS, TimesNet, LSTM, TFT
from neuralforecast.losses.pytorch import HuberMQLoss
from neuralforecast.utils import AirPassengersDF
import plotly.graph_objects as go

st.set_page_config(layout='wide')

def generate_forecast(model, df, tag=False):
    if tag == 'retrain':
        return model.predict()
    return model.predict(df=df)

def determine_frequency(df):
    df['ds'] = pd.to_datetime(df['ds'])
    df = df.drop_duplicates(subset='ds').set_index('ds')
    freq = pd.infer_freq(df.index)
    if not freq:
        st.warning('Defaulting to Daily frequency due to date inconsistencies. Please check your data.', icon="⚠️")
        freq = 'D'
    return freq

def plot_forecasts(forecast_df, train_df, title):
    plot_df = pd.concat([train_df, forecast_df]).set_index('ds')
    historical_col = 'y'
    forecast_col = next((col for col in plot_df.columns if 'median' in col), None)
    lo_col = next((col for col in plot_df.columns if 'lo-90' in col), None)
    hi_col = next((col for col in plot_df.columns if 'hi-90' in col), None)

    if forecast_col is None:
        raise KeyError("No forecast column found in the data.")

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=plot_df.index, y=plot_df[historical_col], mode='lines', name='Historical'))
    fig.add_trace(go.Scatter(x=plot_df.index, y=plot_df[forecast_col], mode='lines', name='Forecast'))

    if lo_col and hi_col:
        fig.add_trace(go.Scatter(x=plot_df.index, y=plot_df[hi_col], mode='lines', line=dict(color='rgba(0,100,80,0.2)'), showlegend=False))
        fig.add_trace(go.Scatter(x=plot_df.index, y=plot_df[lo_col], mode='lines', line=dict(color='rgba(0,100,80,0.2)'), fill='tonexty', fillcolor='rgba(0,100,80,0.2)', name='90% Confidence Interval'))

    fig.update_layout(title=title, xaxis_title='Timestamp [t]', yaxis_title='Value', template='plotly_white')
    st.plotly_chart(fig)

def select_model(horizon, model_type, max_steps=50):
    if model_type == 'NHITS':
        return NHITS(input_size=5 * horizon, h=horizon, max_steps=max_steps, stack_types=3*['identity'], n_blocks=3*[1], mlp_units=[[256, 256] for _ in range(3)], batch_size=32, scaler_type='standard', loss=HuberMQLoss(level=[90]))
    elif model_type == 'TimesNet':
        return TimesNet(h=horizon, input_size=horizon * 5, hidden_size=32, conv_hidden_size=64, loss=HuberMQLoss(level=[90]), scaler_type='standard', learning_rate=1e-3, max_steps=max_steps)
    elif model_type == 'LSTM':
        return LSTM(h=horizon, input_size=horizon * 5, loss=HuberMQLoss(level=[90]), scaler_type='standard', encoder_n_layers=3, encoder_hidden_size=256, context_size=10, decoder_hidden_size=256, decoder_layers=3, max_steps=max_steps)
    elif model_type == 'TFT':
        return TFT(h=horizon, input_size=horizon*5, hidden_size=96, loss=HuberMQLoss(level=[90]), learning_rate=0.005, scaler_type='standard', windows_batch_size=128, max_steps=max_steps)
    else:
        raise ValueError(f"Unsupported model type: {model_type}")

def model_train(df, model, freq):
    nf = NeuralForecast(models=[model], freq=freq)
    df['ds'] = pd.to_datetime(df['ds'])
    nf.fit(df)
    return nf

def forecast_time_series(df, model_type, horizon, max_steps, y_col):
    start_time = time.time()
    freq = determine_frequency(df)
    st.sidebar.write(f"Data frequency: {freq}")
    
    selected_model = select_model(horizon, model_type, max_steps)
    model = model_train(df, selected_model, freq)
    
    forecast_results = {model_type: generate_forecast(model, df, tag='retrain')}
    st.session_state.forecast_results = forecast_results

    for model_name, forecast_df in forecast_results.items():
        plot_forecasts(forecast_df, df, f'{model_name} Forecast for {y_col}')

    time_taken = time.time() - start_time
    st.success(f"Time taken for {model_type} forecast: {time_taken:.2f} seconds")
    
    if 'forecast_results' in st.session_state:
        st.markdown('Download Input and Forecast Data below')
        tab_insample, tab_forecast = st.tabs(["Input data", "Forecast"])
        
        with tab_insample:
            df_grid = df.drop(columns="unique_id")
            st.write(df_grid)
        
        with tab_forecast:
            if model_type in forecast_results:
                df_grid = forecast_results[model_type]
                st.write(df_grid)

@st.cache_data
def load_default():
    return AirPassengersDF.copy()

def personalized_forecasting():
    st.title("Personalized Neural Forecasting")
    st.markdown("Train a time series forecasting model from scratch using various deep neural network models.")

    with st.sidebar.expander("Upload and Configure Dataset", expanded=True):
        uploaded_file = st.file_uploader("Upload your time series data (CSV)", type=["csv"])
        df = pd.read_csv(uploaded_file) if uploaded_file else load_default()

        columns = df.columns.tolist()
        ds_col = st.selectbox("Select Date/Time column", options=columns, index=columns.index('ds') if 'ds' in columns else 0)
        target_columns = [col for col in columns if col != ds_col]
        y_col = st.selectbox("Select Target column", options=target_columns, index=0)

        df = df.rename(columns={ds_col: 'ds', y_col: 'y'}).assign(unique_id=1)[['unique_id', 'ds', 'y']]

    st.sidebar.subheader("Dynamic Model Selection and Forecasting")
    dynamic_model_choice = st.sidebar.selectbox("Select model", ["NHITS", "TimesNet", "LSTM", "TFT"], key="dynamic_model_choice")
    dynamic_horizon = st.sidebar.number_input("Forecast horizon", value=12)
    dynamic_max_steps = st.sidebar.number_input('Max steps', value=20)

    if st.sidebar.button("Submit"):
        with st.spinner('Training model...'):
            forecast_time_series(df, dynamic_model_choice, dynamic_horizon, dynamic_max_steps, y_col)

pg = st.navigation({
    "Neuralforecast": [
        st.Page(personalized_forecasting, title="Personalized Forecasting", icon=":material/query_stats:")
    ],
})

pg.run()