Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
from sklearn.preprocessing import MinMaxScaler | |
import pickle | |
import Streamlit_functions as sf | |
from utilities import (load_authenticator) | |
from utilities_with_panel import (set_header, | |
overview_test_data_prep_panel, | |
overview_test_data_prep_nonpanel, | |
initialize_data, | |
load_local_css, | |
create_channel_summary, | |
create_contribution_pie, | |
create_contribuion_stacked_plot, | |
create_channel_spends_sales_plot, | |
format_numbers, | |
channel_name_formating) | |
import plotly.graph_objects as go | |
import streamlit_authenticator as stauth | |
import yaml | |
from yaml import SafeLoader | |
import time | |
from datetime import datetime,timedelta | |
from pptx import Presentation | |
from pptx.util import Inches | |
from io import BytesIO | |
import plotly.io as pio | |
import response_curves_model_quality as rc1 | |
st.set_page_config(layout='wide') | |
load_local_css('styles.css') | |
set_header() | |
st.title("Model Result Overview") | |
def add_plotly_chart_to_slide(slide, fig, left, top, width, height): | |
img_stream = BytesIO() | |
pio.write_image(fig, img_stream, format='png',engine="orca") | |
slide.shapes.add_picture(img_stream, left, top, width, height) | |
def save_table(df,prs): | |
# Add a blank slide | |
slide = prs.slides.add_slide(prs.slide_layouts[6]) | |
rows, cols = df.shape[0] + 1, df.shape[1] # +1 for the header row | |
table = slide.shapes.add_table(rows, cols, Inches(1), Inches(1), Inches(10), Inches(7)).table | |
# Set the header row | |
for col_idx, col_name in enumerate(df.columns): | |
table.cell(0, col_idx).text = col_name | |
# Add the DataFrame rows to the table | |
for row_idx, row in df.iterrows(): | |
for col_idx, value in enumerate(row): | |
# # print(value) | |
if isinstance(value, int): | |
table.cell(row_idx + 1, col_idx).text = str(value) | |
def save_ppt_file(fig1,fig2,fig3,fig4,fig6,fig7,figw,start_date,end_date,shares_df1,shares_df2): | |
# Initialize PowerPoint presentation | |
prs = Presentation() | |
# save_table(shares_df1,prs) | |
# save_table(shares_df2,prs) | |
# Slide 1: Model Quality with Chart | |
slide_1 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_1 = slide_1.shapes.title | |
# title_1.text = "Distribution Of Spends And Prospects" | |
# Add the Plotly chart to the slide | |
add_plotly_chart_to_slide(slide_1, sf.pie_contributions(start_date,end_date), Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
add_plotly_chart_to_slide(prs.slides.add_slide(prs.slide_layouts[6]), sf.pie_spend(start_date,end_date), Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
# Slide 2: Media Data Elasticity | |
slide_2 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_2 = slide_2.shapes.title | |
# title_2.text = "Media Contribution" | |
add_plotly_chart_to_slide(slide_2, fig2, Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
slide_3 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_3 = slide_3.shapes.title | |
# title_3.text = "Media Spends" | |
add_plotly_chart_to_slide(slide_3, fig3, Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
slide_4 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_4 = slide_4.shapes.title | |
# title_4.text = "CPP Distribution" | |
add_plotly_chart_to_slide(slide_4, fig4, Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
if figw != None: | |
slide_5 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_5 = slide_5.shapes.title | |
# title_5.text = "Change in MMM Estimated Prospect Contributions" | |
figw.update_layout( | |
# title="Distribution Of Spends" | |
title={ | |
'text': "Change In MMM Estimated Prospect Contribution", | |
'font': { | |
'size': 24, | |
'family': 'Arial', | |
'color': 'black', | |
# 'bold': True | |
} | |
} | |
) | |
add_plotly_chart_to_slide(slide_5, figw, Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
else : | |
slide_5 = prs.slides.add_slide(prs.slide_layouts[5]) | |
title_5 = slide_5.shapes.title | |
title_5.text = "Change in MMM Estimated Prospect Contributions" | |
slide_6 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_6 = slide_6.shapes.title | |
# title_6.text = "Base Decomposition" | |
add_plotly_chart_to_slide(slide_6, fig6, Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
slide_7 = prs.slides.add_slide(prs.slide_layouts[6]) | |
# title_7 = slide_7.shapes.title | |
# title_7.text = "Media Decomposition" | |
add_plotly_chart_to_slide(slide_7, fig7, Inches(0.25), Inches(0.25), width=Inches(9.25), height=Inches(6.75)) | |
# prs.save('MMM_Model_Result Overview.pptx') | |
# print("PowerPoint slides created successfully.") | |
# Save to a BytesIO object | |
ppt_stream = BytesIO() | |
prs.save(ppt_stream) | |
ppt_stream.seek(0) | |
return ppt_stream.getvalue() | |
def get_random_effects(media_data, panel_col, mdf): | |
random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"]) | |
for i, market in enumerate(media_data[panel_col].unique()): | |
# # print(i, end='\r') | |
intercept = mdf.random_effects[market].values[0] | |
random_eff_df.loc[i, 'random_effect'] = intercept | |
random_eff_df.loc[i, panel_col] = market | |
return random_eff_df | |
def process_train_and_test(train, test, features, panel_col, target_col): | |
X1 = train[features] | |
ss = MinMaxScaler() | |
X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns) | |
X1[panel_col] = train[panel_col] | |
X1[target_col] = train[target_col] | |
if test is not None: | |
X2 = test[features] | |
X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns) | |
X2[panel_col] = test[panel_col] | |
X2[target_col] = test[target_col] | |
return X1, X2 | |
return X1 | |
def mdf_predict(X_df, mdf, random_eff_df) : | |
X=X_df.copy() | |
X=pd.merge(X, random_eff_df[[panel_col,'random_effect']], on=panel_col, how='left') | |
X['pred_fixed_effect'] = mdf.predict(X) | |
X['pred'] = X['pred_fixed_effect'] + X['random_effect'] | |
X.to_csv('Test/merged_df_contri.csv',index=False) | |
X.drop(columns=['pred_fixed_effect', 'random_effect'], inplace=True) | |
return X | |
target_col='Prospects' | |
target='Prospects' | |
# is_panel=False | |
# is_panel = st.session_state['is_panel'] | |
#panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['bin_dict']['Panel Level 1'] ] [0]# set the panel column | |
panel_col='Panel' | |
date_col = 'date' | |
#st.write(media_data) | |
is_panel = True | |
# panel_col='markets' | |
date_col = 'date' | |
for k, v in st.session_state.items(): | |
if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'): | |
st.session_state[k] = v | |
authenticator = st.session_state.get('authenticator') | |
if authenticator is None: | |
authenticator = load_authenticator() | |
name, authentication_status, username = authenticator.login('Login', 'main') | |
auth_status = st.session_state['authentication_status'] | |
if auth_status: | |
authenticator.logout('Logout', 'main') | |
is_state_initiaized = st.session_state.get('initialized',False) | |
if not is_state_initiaized: | |
a=1 | |
with st.expander("View Channel Wise Spend And Prospect Analysis "): | |
# Create two columns for start date and end date input | |
col1, col2 = st.columns(2) | |
min_date,max_date = sf.get_date_range() | |
# st.write(min_date,max_date) | |
# min_date = datetime(2023, 1, 1) | |
# max_date = datetime(2024, 12, 31) | |
default_date1,default_date2 = sf.get_default_dates() | |
# st.write(default_date1,default_date2) | |
with col1: | |
start_date = st.date_input("Start Date: ",value=default_date1,min_value=min_date, | |
max_value=max_date) | |
with col2: | |
end_date = st.date_input("End Date: ",value = default_date2,min_value=min_date, | |
max_value=max_date) | |
# col1, col2 = st.columns(2) | |
# with col1: | |
# fig = sf.pie_spend(start_date,end_date) | |
# st.plotly_chart(fig,use_container_width=True) | |
# with col2: | |
# fig = sf.pie_contributions(start_date,end_date) | |
# st.plotly_chart(fig,use_container_width=True) | |
# st.header("Distribution of Spends and Contributions") | |
fig1 = sf.pie_charts(start_date,end_date) | |
st.plotly_chart(fig1,use_container_width=True) | |
## Channel Contribution Bar Chart | |
fig2 =sf.channel_contribution(start_date,end_date) | |
st.plotly_chart(fig2,use_container_width=True) | |
fig3 = sf.chanel_spends(start_date,end_date) | |
st.plotly_chart(fig3,use_container_width=True) | |
# Format first three rows in percentage format | |
# styled_df = sf.shares_table_func(shares_df) | |
# # styled_df = styled_df.round(0).astype(int) | |
# styled_df.iloc[:3] = (styled_df.iloc[:3]).astype(int) | |
# # Round next two rows to two decimal places | |
# styled_df.iloc[3:5] = styled_df.iloc[3:5].round(0).astype(str) | |
# st.table(styled_df) | |
shares_df = sf.shares_df_func(start_date,end_date) | |
shares_df1 = sf.shares_table_func(shares_df) | |
st.dataframe(sf.shares_table_func(shares_df),use_container_width=True) | |
shares_df2 = sf.eff_table_func(shares_df) | |
st.dataframe(sf.eff_table_func(shares_df).style.format({"TOTAL SPEND": "{:,.0f}", "TOTAL SUPPORT": "{:,.0f}", "TOTAL CONTRIBUTION": "{:,.0f}"}),use_container_width=True) | |
### CPP CHART | |
fig4 = sf.cpp(start_date,end_date) | |
st.plotly_chart(fig4,use_container_width=True) | |
with st.expander("View Change in MMM Estimated Prospect Contributions Analysis"): | |
data_selection_type = st.radio("Select Input Type",["Compare Monthly Change", "Compare Custom Range"]) | |
waterfall_start_date,waterfall_end_date = start_date,end_date | |
# Dropdown menu options | |
st.markdown("<h1 style='font-size:28px;'>Change in MMM Estimated Prospect Contributions</h1>", unsafe_allow_html=True) | |
if data_selection_type == "Compare Monthly Change": | |
options = [ | |
"Month on Month", | |
"Year on Year"] | |
col1, col2 = st.columns(2) | |
# Create a dropdown menu | |
with col1: | |
selected_option = st.selectbox('Select a comparison', options) | |
with col2: | |
st.markdown("""</br>""",unsafe_allow_html=True) | |
if selected_option == "Month on Month" : | |
st.markdown( | |
f""" | |
<div style="padding: 5px; border-radius: 5px; background-color: #FFFFE0; width: fit-content; display: inline-block;"> | |
<strong> Comparision of current month spends to previous month spends</strong> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |
else : | |
st.markdown( | |
f""" | |
<div style="padding: 5px; border-radius: 5px; background-color: #FFFFE0; width: fit-content; display: inline-block;"> | |
<strong> Comparision of current month spends to the same month in previous year</strong> | |
</div> | |
""", | |
unsafe_allow_html=True | |
) | |
# Waterfall chart | |
def get_month_year_list(start_date, end_date): | |
# Generate a range of dates from start_date to end_date with a monthly frequency | |
dates = pd.date_range(start=start_date, end=end_date, freq='MS') # 'MS' is month start frequency | |
# Extract month and year from each date and create a list of tuples | |
month_year_list = [(date.month, date.year) for date in dates] | |
return month_year_list | |
def get_start_end_dates(month, year): | |
start_date = datetime(year, month, 1).date() | |
if month == 12: | |
end_date = datetime(year + 1, 1, 1).date() - timedelta(days=1) | |
else: | |
end_date = datetime(year, month + 1, 1).date() - timedelta(days=1) | |
return start_date, end_date | |
month_year_list = get_month_year_list(start_date, end_date) | |
dropdown_options = [f"{date.strftime('%B %Y')}" for date in pd.date_range(start=start_date, end=end_date, freq='MS')] | |
waterfall_option = st.selectbox("Select a month:", dropdown_options) | |
waterfall_date = datetime.strptime(waterfall_option, "%B %Y") | |
waterfall_month = waterfall_date.month | |
waterfall_year = waterfall_date.year | |
waterfall_start_date, waterfall_end_date = get_start_end_dates(waterfall_month, waterfall_year) | |
# st.write("abc") | |
# figw = sf.waterfall(waterfall_start_date,waterfall_end_date) | |
figw= sf.waterfall(waterfall_start_date,waterfall_end_date,selected_option) | |
st.plotly_chart(figw,use_container_width=True) | |
elif data_selection_type == "Compare Custom Range": | |
col1, col2 = st.columns(2) | |
min_date,max_date = sf.get_date_range() | |
with col1: | |
st.write("Select Time Period 1") | |
sc1,sc2 = st.columns(2) | |
with sc1: | |
waterfall_start_date1 = st.date_input("Start Date 1: ",value=start_date,min_value=min_date, | |
max_value=max_date) | |
with sc2: | |
waterfall_end_date1 = st.date_input("End Date 1: ",value = end_date,min_value=min_date, | |
max_value=max_date) | |
with col2: | |
st.write("Select Time Period 2") | |
ec1,ec2 = st.columns(2) | |
with ec1: | |
waterfall_start_date2 = st.date_input("Start Date 2: ",value=end_date-timedelta(days = -1),min_value=min_date, | |
max_value=max_date) | |
with ec2: | |
diff = min((start_date-end_date).days,-30) | |
waterfall_end_date2 = st.date_input("End Date 2: ",value = start_date,min_value=min_date, | |
max_value=max_date) | |
try: | |
figw= sf.waterfall2(waterfall_start_date1,waterfall_end_date1,waterfall_start_date2,waterfall_end_date2) | |
st.plotly_chart(figw,use_container_width=True) | |
except: | |
st.warning("Previous data does not exist") | |
# Waterfall table | |
# shares_df = sf.shares_df_func(waterfall_start_date,waterfall_end_date) | |
st.table(sf.waterfall_table_func(shares_df).style.format("{:.0%}")) | |
with st.expander("View Decomposition Analysis"): | |
### Base decomp CHART | |
fig6 = sf.base_decomp() | |
st.plotly_chart(fig6,use_container_width=True) | |
### Media decomp CHART | |
fig7 = sf.media_decomp() | |
st.plotly_chart(fig7,use_container_width=True) | |
if st.button("Prepare Download Of Analysis"): | |
ppt_file = save_ppt_file(fig1,fig2,fig3,fig4,fig6,fig7,figw,start_date,end_date,shares_df1,shares_df2) | |
# Add a download button | |
st.download_button( | |
label="Download Analysis", | |
data=ppt_file, | |
file_name="MMM_Model_Result Overview.pptx", | |
mime="application/vnd.openxmlformats-officedocument.presentationml.presentation" | |
) | |