import streamlit as st import pandas as pd import numpy as np from PIL import Image import seaborn as sns import codecs import streamlit.components.v1 as components import dagshub import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor from sklearn import metrics from shapash.explainer.smart_explainer import SmartExplainer from prophet import Prophet import mlflow import mlflow.sklearn from mlflow import log_metric, log_param, log_artifact from sklearn.model_selection import train_test_split, GridSearchCV import joblib from datetime import datetime from prophet.plot import plot_components_plotly # Add custom CSS for U.S.-themed colors st.markdown( """ """, unsafe_allow_html=True ) # Main title with an image st.title("U.S. Fed's Next Move Prediction") image_path = Image.open("fed.png") st.image(image_path, width=400) # Sidebar with U.S.-themed style app_page = st.sidebar.selectbox( "Select Page", ['Business Case', 'Data Exploration & Visualization', 'Prediction & Feature Importance', 'Hyperparameter Tuning', 'Conclusions and Data Insights'] ) df = pd.read_csv('econdataset.csv') if app_page == 'Business Case': st.title("1. Business Case") st.subheader("Objective:") st.write("The purpose of this dashboard is to analyze the Federal Reserve’s interest rate decisions from 1955 to 2024 and explore potential relationships between various economic indicators and the Federal Funds Rate. The dashboard applies time series analysis to forecast future Federal Funds Rate changes and utilizes linear regression to identify and quantify relationships between key economic variables and monetary policy decisions. These tools provide a comprehensive view of historical trends and predictive insights, enabling users to better understand the dynamics of Federal Reserve interest rate adjustments.") st.subheader("Key Questions:") st.write("1. How have key economic indicators (such as unemployment rate, inflation rate, CPI, and GDP) evolved over the past decades, and how do these trends correlate with the Federal Reserve’s interest rate decisions?") st.write("2. What patterns emerge in the Federal Funds Rate over time, and how do other variables such as stock market indices (S&P 500, DOW Jones), bond yields, and GDP relate to these changes?") st.write("3. Can we establish reliable relationships between economic indicators (e.g., unemployment rate, inflation, CPI, S&P 500, Bond Yield, Real GDP) to predict future Federal Reserve interest rate adjustments?") st.subheader("Use of Analytical Models") st.write("This dashboard showcases two key analytical approaches to provide insights:") st.subheader("1.Time Series Analysis:") st.write("Time series models are employed to forecast future Federal Funds Rate changes by analyzing historical trends in the data. These forecasts help identify patterns that may indicate upcoming rate hikes or cuts.") st.subheader("2.Linear Regression:") st.write("Linear regression models are used to uncover relationships between variables such as unemployment rate, inflation, bond yields, and the Federal Funds Rate. This analysis highlights the economic factors most closely associated with interest rate changes, providing a framework for understanding how these decisions are influenced by broader economic conditions.") st.subheader("Relevance and Value") st.write("By integrating predictive analytics, the dashboard provides valuable insights for businesses and decision-makers:") st.write("For Businesses:") st.write("Understanding and predicting interest rates is crucial for managing borrowing costs, planning capital investments, and making strategic financial decisions.") st.write("For Financial Institutions:") st.write("Insights into how economic conditions drive rate changes can improve decision-making around interest-sensitive products, such as loans and mortgages.") st.write("For Investors:") st.write("Predicting rate changes provides an advantage in portfolio management, particularly in optimizing the allocation of equities, bonds, and other interest rate-sensitive assets. Ultimately, understanding the dynamics of Federal Reserve decisions can inform strategies across multiple sectors.") if app_page == 'Data Exploration & Visualization': st.title("2. Data Exploration & Visualization") st.write("Sample dataset loaded:") st.dataframe(df.head(5)) # Cleaning the data st.write("Cleaning the data:") # Convert numeric columns stored as objects by removing special characters like "$" and commas for column in ['S&P 500 Price', 'Nominal GDP Index (in billion USD)', 'Real GDP Index', 'DOW Jones Price']: df[column] = df[column].replace('[\$,]', '', regex=True).astype(float) # Check the updated data types and missing values after conversion for year in df[df['Year'] <= 1991]['Year'].unique(): # Get the GDP value for January of the current year jan_gdp_nominal = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Nominal GDP Index (in billion USD)'].values[0] jan_gdp_real = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Real GDP Index'].values[0] # Assign January's GDP values to all other months in the same year df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Nominal GDP Index (in billion USD)'] = jan_gdp_nominal df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Real GDP Index'] = jan_gdp_real df.dropna(inplace=True) # This drops a single row in 2024 on which we don't have data cleaning_summary = { "dtypes_after_conversion": df.dtypes, "missing_values_after_conversion": df.isnull().sum() } st.write(cleaning_summary) st.subheader("01 Description of the dataset") st.dataframe(df.describe()) st.write("This dataset contains key statistics from various observations.") st.subheader("02 Missing values") dfnull = df.isnull().sum() / len(df) * 100 st.write(dfnull) if dfnull.sum() == 0: st.success("No missing values found!") st.write("Generate an automated report:") if st.button("Generate Report"): st.balloons() def read_html_report(file_path): with codecs.open(file_path, 'r', encoding="utf-8") as f: return f.read() html_report = read_html_report("report.html") st.components.v1.html(html_report, height=1000, scrolling=True) # Placeholder visualization options st.write("Visualize key relationships and importance of variables.") # Add color schemes reflecting the U.S. list_columns = df.columns values = st.multiselect("Select two variables to compare:", list_columns, ["Fed Effective Funds Rate", "Bond Yield (US 10Y TN)"]) st.line_chart(df, x=values[0], y=values[1]) st.bar_chart(df, x=values[0], y=values[1]) if app_page == 'Prediction & Feature Importance': st.title("3. Prediction") for column in ['S&P 500 Price', 'Nominal GDP Index (in billion USD)', 'Real GDP Index', 'DOW Jones Price']: df[column] = df[column].replace('[\$,]', '', regex=True).astype(float) df2 = df.dropna() for year in df[df['Year'] <= 1991]['Year'].unique(): jan_gdp_nominal = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Nominal GDP Index (in billion USD)'].values[0] jan_gdp_real = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Real GDP Index'].values[0] df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Nominal GDP Index (in billion USD)'] = jan_gdp_nominal df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Real GDP Index'] = jan_gdp_real df.dropna(inplace=True) # This drops a single row in 2024 on which we don't have data # Convert Month to a categorical column df['Month'] = pd.Categorical(df['Month'], categories=[ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ], ordered=True) # Get numerical codes for each month df['Month'] = df['Month'].cat.codes + 1 # Adding 1 to start from 1 instead of 0 st.write("Now, we have all our numerical features ready to be used in our model") st.dataframe(df.head()) list_columns = df.columns.drop("Fed Effective Funds Rate") input_lr = st.multiselect("Select variables:",list_columns,["Bond Yield (US 10Y TN)", "Unemployment Rate"]) df_lr = df[input_lr] # Step 1 splitting the dataset into X and y X= df_lr # target variable y= df["Fed Effective Funds Rate"] # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Linear Regression model lr_model = LinearRegression() lr_model.fit(X_train, y_train) # Random Forest model rf_model = RandomForestRegressor(random_state=42) rf_model.fit(X_train, y_train) # Predictions lr_pred = lr_model.predict(X_test) rf_pred = rf_model.predict(X_test) predictions_df_lr = pd.Series(lr_pred, index=y_test.index, name="LR Predictions") predictions_df_rf = pd.Series(rf_pred, index=y_test.index, name="RF Predictions") result = pd.concat([predictions_df_lr, y_test,predictions_df_rf], axis=1) st.dataframe(result) # Compare model performance st.subheader("Model Performance on Test Data") st.write(f"Linear Regression RMSE: **{metrics.root_mean_squared_error(y_test, lr_pred):.2f}**") st.write(f"Random Forest RMSE: **{metrics.root_mean_squared_error(y_test, rf_pred):.2f}**") st.bar_chart(result) # Time Series model st.subheader("Time Series Modeling using FB Prophet") st.write("Dropping all values before January 1992") df2['Day'] = 15 df2['ds'] = pd.to_datetime(df2[['Year', 'Month', 'Day']].astype(str).agg('-'.join, axis=1)) df2 = df2[(df2['ds'] >= "1992-01-15 00:00:00")] df_fp = pd.concat([df2['ds'], df2['Fed Effective Funds Rate']], axis=1) df_fp.rename(columns={'ds': 'ds', 'Fed Effective Funds Rate': 'y'}, inplace=True) st.dataframe(df_fp) st.write("Future predictions after September 2024") fp_model = Prophet(weekly_seasonality=True) fp_model.fit(df_fp) future = fp_model.make_future_dataframe(periods=48, freq='M') fp_pred = fp_model.predict(future) fp_pred = fp_pred[["ds","trend"]] fp_pred = fp_pred[fp_pred['ds'] > "2024-09-15 00:00:00"] st.dataframe(fp_pred) st.line_chart(fp_pred, x='ds', y='trend') st.write("Time Series Predictions between 2020 and 2024") df2 = df2[(df2['ds'] >= "1992-01-15 00:00:00") & (df2['ds'] <= "2020-01-15 00:00:00")] df_fp = pd.concat([df2['ds'], df2['Fed Effective Funds Rate']], axis=1) df_fp.rename(columns={'ds': 'ds', 'Fed Effective Funds Rate': 'y'}, inplace=True) st.dataframe(df_fp) fp_model = Prophet() fp_model.fit(df_fp) future = fp_model.make_future_dataframe(periods=57, freq='M') fp_pred = fp_model.predict(future) chart_fp_pred = fp_pred fp_pred = fp_pred[["ds","trend"]] fp_pred = fp_pred[fp_pred['ds'] > "2020-01-15 00:00:00"] st.dataframe(fp_pred) # Extract seasonality components fig = fp_model.plot_components(chart_fp_pred) st.pyplot(fig) st.title("4. Feature Importance") # Initialize SmartExplainer xpl = SmartExplainer(rf_model) y_pred = pd.Series(rf_pred) # Reset X_test index X_test = X_test.reset_index(drop=True) # Compile the explainer with test data and predictions xpl.compile(x=X_test, y_pred=y_pred) # Display overall feature importance st.subheader("Overall Feature Importance") fig = xpl.plot.features_importance() st.plotly_chart(fig) # Select a subset of rows for detailed importance st.subheader("Feature Importance for a Subset") subset = X_test.sample(n=50, random_state=42).index fig_subset = xpl.plot.features_importance(selection=subset) st.plotly_chart(fig_subset) # Show contribution plot for a specific feature st.subheader("Contribution Plot: Bond Yield (US 10Y TN)") fig_contribution = xpl.plot.contribution_plot('Bond Yield (US 10Y TN)') st.plotly_chart(fig_contribution) if app_page == 'Hyperparameter Tuning': st.title("5. Hyperparameter Tuning") dagshub.init(repo_owner='shreykharbanda31', repo_name='fed-interest-rate-prediction', mlflow=True) def process_data(X, y, test_size=0.3, random_state=42): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state) return X_train, X_test, y_train, y_test def train_and_optimize_model(X_train, y_train, model, param_grid, cv=5): grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=cv) grid_search.fit(X_train, y_train) return grid_search def evaluate_model(model, X_test, y_test): y_pred = model.predict(X_test) rmse = metrics.root_mean_squared_error(y_test, y_pred) mlflow.log_metric("RMSE", rmse) # Usage of the functions within an MLflow run with mlflow.start_run(): for column in ['S&P 500 Price', 'Nominal GDP Index (in billion USD)', 'Real GDP Index', 'DOW Jones Price']: df[column] = df[column].replace('[\$,]', '', regex=True).astype(float) for year in df[df['Year'] <= 1991]['Year'].unique(): jan_gdp_nominal = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Nominal GDP Index (in billion USD)'].values[0] jan_gdp_real = df.loc[(df['Year'] == year) & (df['Month'] == 'January'), 'Real GDP Index'].values[0] df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Nominal GDP Index (in billion USD)'] = jan_gdp_nominal df.loc[(df['Year'] == year) & (df['Month'] != 'January'), 'Real GDP Index'] = jan_gdp_real df.dropna(inplace=True) # This drops a single row in 2024 on which we don't have data # Convert Month to a categorical column df['Month'] = pd.Categorical(df['Month'], categories=[ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December' ], ordered=True) # Get numerical codes for each month df['Month'] = df['Month'].cat.codes + 1 # Adding 1 to start from 1 instead of 0 list_columns = df.columns.drop("Fed Effective Funds Rate") X, y = df[list_columns], df["Fed Effective Funds Rate"] X_train, X_test, y_train, y_test = process_data(X, y) rfg_param_grid = { 'n_estimators': [50, 100, 200], 'max_depth': [10, 20, None], 'min_samples_split': [2, 5, 10] } rf_model = RandomForestRegressor(random_state=42) rf_grid_search = train_and_optimize_model(X_train, y_train, rf_model, rfg_param_grid) best_rf = rf_grid_search.best_estimator_ mlflow.log_params(rf_grid_search.best_params_) mlflow.sklearn.log_model(best_rf, datetime.now().strftime("%Y-%m-%d %H:%M:%S")+"best_rf") mlflow.sklearn.save_model(best_rf, datetime.now().strftime("%Y-%m-%d %H:%M:%S")+"best_rf_model") evaluate_model(best_rf, X_test, y_test) if app_page == 'Conclusions and Data Insights': st.title("6. Conclusions and Data Insights") st.subheader("Linear Regression & Random Forest Regressor") st.write("While these two go about creating the models in different ways their use cases are similarly two fold. The first use case for these models would be as a sort of extension of a correlation matrix. And the second use case would be as a hypothetical predictive model.") st.write("The models and the correlation matrix are similar in that they are both very effective at giving the correlation between variables, so you could decipher from either which economic factor creates the largest impact on the interest rate. The value in this instance specifically for the models is that they can give correlation for a set of variables, while a correlation matrix can only compare one value to another at a given time.") st.write("For the second use case this would come into play if you wanted to construct a hypothetical scenario, and see what the resulting change in the interest rate would be. For example if you wanted to predict the interest rate after the economic changes of covid had reverted back to the “base” form that we expect from the US economy, you could input those values into the model and spit out a prediction for what the interest rate would be.") st.subheader("Prophet Time Series Model") st.write("The time series model is primarily useful compared to the other models for its ability to predict the future. But this comes at the cost of accuracy.") st.write("Based on the limited number of variables used it is particularly blind to socio-political factors that influence economic outcomes. For example the model’s prediction is entirely decimated covid, and resulting policy changes. The model is going to be behind humans in the ability to predict the depressionary effect that a pandemic like covid would have on an economy before government intervention, and because it cannot look at other economic factors like inflation rate is unable to see the precise point at which interest rate would shift.") st.write("A time series is meant to look at slightly longer term trends, because typically small changes aren’t too telling, but the interest rate is a number set intentionally by the Fed, meaning that small changes are quite telling.")