import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy.optimize import curve_fit from sklearn.preprocessing import MinMaxScaler import warnings import warnings warnings.filterwarnings("ignore") import os import plotly.graph_objects as go from datetime import datetime,timedelta from plotly.subplots import make_subplots import pandas as pd import json from numerize.numerize import numerize # working_directory = r"C:\Users\PragyaJatav\Downloads\Deliverables\Deliverables\Response Curves 09_07_24\Response Curves Resources" # os.chdir(working_directory) ## reading input data df= pd.read_csv('response_curves_input_file.csv') df.dropna(inplace=True) df['Date'] = pd.to_datetime(df['Date']) df.reset_index(inplace=True) # df spend_cols = ['tv_broadcast_spend', 'tv_cable_spend', 'stream_video_spend', 'olv_spend', 'disp_prospect_spend', 'disp_retarget_spend', 'social_prospect_spend', 'social_retarget_spend', 'search_brand_spend', 'search_nonbrand_spend', 'cm_spend', 'audio_spend', 'email_spend'] spend_cols2 = ['tv_broadcast_spend', 'tv_cable_spend', 'stream_video_spend', 'olv_spend', 'disp_prospect_spend', 'disp_retarget_spend', 'social_prospect_spend', 'social_retarget_spend', 'search_brand_spend', 'search_nonbrand_spend', 'cm_spend', 'audio_spend', 'email_spend', 'Date'] metric_cols = ['tv_broadcast_grp', 'tv_cable_grp', 'stream_video_imp', 'olv_imp', 'disp_prospect_imp', 'disp_retarget_imp', 'social_prospect_imp', 'social_retarget_imp', 'search_brand_imp', 'search_nonbrand_imp', 'cm_spend', 'audio_imp', 'email_imp'] channels = [ 'BROADCAST TV', 'CABLE TV', 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING', 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING', 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO', 'EMAIL'] channels2 = [ 'BROADCAST TV', 'CABLE TV', 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING', 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING', 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO', 'EMAIL','Date'] contribution_cols = [ 'Broadcast TV_Prospects', 'Cable TV_Prospects', 'Connected & OTT TV_Prospects', 'Video_Prospects', 'Display Prospecting_Prospects', 'Display Retargeting_Prospects', 'Social Prospecting_Prospects', 'Social Retargeting_Prospects', 'Search Brand_Prospects', 'Search Non-brand_Prospects', 'Digital Partners_Prospects', 'Audio_Prospects', 'Email_Prospects'] def pie_charts(start_date,end_date): start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) import plotly.graph_objects as go from plotly.subplots import make_subplots cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] data1 = pd.DataFrame(cur_data[spend_cols].sum().transpose()) data2 = pd.DataFrame(cur_data[contribution_cols].sum().transpose()) data1.index = channels data1.columns = ["p"] data2.index = channels data2.columns = ["p"] fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]]) fig.add_trace(go.Pie(labels=channels, values=data1["p"], name="t2", hoverinfo='label+percent', textinfo= 'label+percent', showlegend= False,textfont=dict(size =10), # title="Distribution of Spends" ), 1, 1) fig.add_trace(go.Pie(labels=channels, values=data2["p"], name="t2", hoverinfo='label+percent', textinfo= 'label+percent', showlegend= False, textfont=dict(size = 10), # title = "Distribution of Contributions" ), 1, 2) fig.update_layout( title="Distribution Of Spends And Contributions" ) return fig def pie_spend(start_date,end_date): start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] data = pd.DataFrame(cur_data[spend_cols].sum().transpose()) data.index = channels data.columns = ["p"] # Create a pie chart with custom options fig = go.Figure(data=[go.Pie( labels=channels, values=data["p"],#ype(str)+'
'+data.index, hoverinfo='label+percent', textinfo= 'label+percent', showlegend= False, textfont=dict(size = 10) )]) # Customize the layout fig.update_layout( title="Distribution Of Spends" ) # Show the figure return fig def pie_contributions(start_date,end_date): start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] data = pd.DataFrame(cur_data[contribution_cols].sum().transpose()) data.index = channels data.columns = ["p"] # Create a pie chart with custom options fig = go.Figure(data=[go.Pie( labels=channels, values=data["p"],#ype(str)+'
'+data.index, hoverinfo='label+percent', textinfo= 'label+percent', textposition='auto', showlegend= False, textfont=dict(size = 10) )]) # fig.add_annotation(showarrow=False) # Customize the layout fig.update_layout( title="Distribution Of Contributions", # margin=dict(t=0, b=0, l=0, r=0) ) # Show the figure return fig def waterfall(start_date,end_date,btn_chart): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) # start_date = datetime.strptime(start_date, "%Y-%m-%d") # end_date = datetime.strptime(end_date, "%Y-%m-%d") # start_date = start_date.datetime.data # end_date = end_date.datetime.data start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) if btn_chart == "Month on Month": start_date_prev = start_date +timedelta(weeks=-4) end_date_prev = start_date +timedelta(days=-1) else: start_date_prev = start_date +timedelta(weeks=-52) end_date_prev = start_date_prev +timedelta(weeks=4) +timedelta(days=-1) prev_data = df[(df['Date'] >= start_date_prev) & (df['Date'] <= end_date_prev)] cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] # Example data for the waterfall chart data = [ {'label': 'Previous Period', 'value': round(prev_data[contribution_cols].values.sum())}, {'label': 'Broadcast TV', 'value': round(cur_data['Broadcast TV_Prospects'].sum()-prev_data['Broadcast TV_Prospects'].sum())}, {'label': 'Cable TV', 'value': round(cur_data['Cable TV_Prospects'].sum()-prev_data['Cable TV_Prospects'].sum())}, {'label': 'Connected & OTT TV', 'value': round(cur_data['Connected & OTT TV_Prospects'].sum()-prev_data['Connected & OTT TV_Prospects'].sum())}, {'label': 'Video', 'value': round(cur_data['Video_Prospects'].sum()-prev_data['Video_Prospects'].sum())}, {'label': 'Display Prospecting', 'value': round(cur_data['Display Prospecting_Prospects'].sum()-prev_data['Display Prospecting_Prospects'].sum())}, {'label': 'Display Retargeting', 'value': round(cur_data['Display Retargeting_Prospects'].sum()-prev_data['Display Retargeting_Prospects'].sum())}, {'label': 'Social Prospecting', 'value': round(cur_data['Social Prospecting_Prospects'].sum()-prev_data['Social Prospecting_Prospects'].sum())}, {'label': 'Social Retargeting', 'value': round(cur_data['Social Retargeting_Prospects'].sum()-prev_data['Social Retargeting_Prospects'].sum())}, {'label': 'Search Brand', 'value': round(cur_data['Search Brand_Prospects'].sum()-prev_data['Search Brand_Prospects'].sum())}, {'label': 'Search Non-brand', 'value': round(cur_data['Search Non-brand_Prospects'].sum()-prev_data['Search Non-brand_Prospects'].sum())}, {'label': 'Digital Partners', 'value': round(cur_data['Digital Partners_Prospects'].sum()-prev_data['Digital Partners_Prospects'].sum())}, {'label': 'Audio', 'value': round(cur_data['Audio_Prospects'].sum()-prev_data['Audio_Prospects'].sum())}, {'label': 'Email', 'value': round(cur_data['Email_Prospects'].sum()-prev_data['Email_Prospects'].sum())}, {'label': 'Current Period', 'value': round(cur_data[contribution_cols].values.sum())} ] # Calculate cumulative values for the waterfall chart cumulative = [0] for i in range(len(data)): cumulative.append(cumulative[-1] + data[i]['value']) # Adjusting values to start from zero for both first and last columns cumulative[-1] = 0 # Set the last cumulative value to zero # Extracting labels and values labels = [item['label'] for item in data] values = [item['value'] for item in data] # Plotting the waterfall chart using go.Bar bars = [] for i in range(len(data)): color = '#4A88D9' if i == 0 or i == len(data) - 1 else '#DC5537' # Blue for first and last, gray for others hover_text = f"{labels[i]}
Value: {abs(values[i])}" bars.append(go.Bar( x=[labels[i]], y=[cumulative[i+1] - cumulative[i]], base=[cumulative[i]], text=[f"{abs(values[i]):,}"], textposition='auto', hovertemplate=hover_text, marker=dict(color=color), showlegend=False )) # Creating the figure fig = go.Figure(data=bars) # Updating layout for black background and gray gridlines if btn_chart == "Month on Month": fig.update_layout( title=f"Change In MMM Estimated Prospect Contribution" ,showlegend=False, # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="Prospects", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line # range=[18000, max(max(cumulative), max(values)) + 1000] # Setting the y-axis range from 19k to slightly above the maximum value ) ) fig.add_annotation( text=f"{start_date_prev.strftime('%m-%d-%Y')} to {end_date_prev.strftime('%m-%d-%Y')} vs. {start_date.strftime('%m-%d-%Y')} To {end_date.strftime('%m-%d-%Y')}", x=0, y=1.15, xref="x domain", yref="y domain", showarrow=False, font=dict(size=16), # align='left' ) # fig.update_xaxes( # tickmode="array", # # categoryorder="total ascending", # tickvals=[f"{abs(values[i])}"], # ticktext=[f"{abs(values[i])}"], # ticklabelposition="outside", # tickfont=dict(color="white"), # ) else : fig.update_layout( title="Change In MMM Estimated Prospect Contribution " ,showlegend=False, # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="Prospects", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line # range=[10000, max(cumulative)+1000] # Setting the y-axis range from 19k to slightly above the maximum value ) ) fig.add_annotation( text=f"{start_date_prev.strftime('%m-%d-%Y')} to {end_date_prev.strftime('%m-%d-%Y')} vs. {start_date.strftime('%m-%d-%Y')} To {end_date.strftime('%m-%d-%Y')}", x=0, y=1.15, xref="x domain", yref="y domain", showarrow=False, font=dict(size=16), # align='left' ) # # print(cur_data) # # print(prev_data) # fig.show() return fig def shares_df_func(start_date,end_date): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) start_date_prev = start_date +timedelta(weeks=-4) end_date_prev = start_date +timedelta(days=-1) prev_data = df[(df['Date'] >= start_date_prev) & (df['Date'] <= end_date_prev)] cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] cur_df1 = pd.DataFrame(cur_data[spend_cols].sum()).reset_index() cur_df2 = pd.DataFrame(cur_data[metric_cols].sum()).reset_index() cur_df3 = pd.DataFrame(cur_data[contribution_cols].sum()).reset_index() cur_df1.columns = ["channels","cur_total_spend"] cur_df2.columns = ["channels","cur_total_support"] cur_df3.columns = ["channels","cur_total_contributions"] cur_df1["channels"] = channels cur_df2["channels"] = channels cur_df3["channels"] = channels cur_df1["cur_spend_share"] = (cur_df1["cur_total_spend"]/cur_df1["cur_total_spend"].sum())*100 cur_df2["cur_support_share"] = (cur_df2["cur_total_support"]/cur_df2["cur_total_support"].sum())*100 cur_df3["cur_contributions_share"] = (cur_df3["cur_total_contributions"]/cur_df3["cur_total_contributions"].sum())*100 prev_df1 = pd.DataFrame(prev_data[spend_cols].sum()).reset_index() prev_df2 = pd.DataFrame(prev_data[metric_cols].sum()).reset_index() prev_df3 = pd.DataFrame(prev_data[contribution_cols].sum()).reset_index() prev_df1.columns = ["channels","prev_total_spend"] prev_df2.columns = ["channels","prev_total_support"] prev_df3.columns = ["channels","prev_total_contributions"] prev_df1["channels"] = channels prev_df2["channels"] = channels prev_df3["channels"] = channels prev_df1["prev_spend_share"] = (prev_df1["prev_total_spend"]/prev_df1["prev_total_spend"].sum())*100 prev_df2["prev_support_share"] = (prev_df2["prev_total_support"]/prev_df2["prev_total_support"].sum())*100 prev_df3["prev_contributions_share"] = (prev_df3["prev_total_contributions"]/prev_df3["prev_total_contributions"].sum())*100 cur_df = cur_df1.merge(cur_df2,on="channels",how = "inner") cur_df = cur_df.merge(cur_df3,on="channels",how = "inner") prev_df = prev_df1.merge(prev_df2,on="channels",how = "inner") prev_df = prev_df.merge(prev_df3,on="channels",how = "inner") shares_df = cur_df.merge(prev_df,on = "channels",how = "inner") shares_df["Contribution Change"] = (-shares_df["prev_contributions_share"]+shares_df["cur_contributions_share"])/shares_df["prev_contributions_share"] shares_df["Support Change"] = (-shares_df["prev_support_share"]+shares_df["cur_support_share"])/shares_df["prev_support_share"] shares_df["Spend Change"] = (-shares_df["prev_spend_share"]+shares_df["cur_spend_share"])/shares_df["prev_spend_share"] shares_df["Efficiency Index"] = shares_df["cur_contributions_share"]/shares_df["cur_spend_share"] shares_df["Effectiveness Index"] = shares_df["cur_support_share"]/shares_df["cur_spend_share"] return shares_df def waterfall_table_func(shares_df): ### waterfall delta table # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) waterfall_delta_df = shares_df[["channels","Contribution Change","Support Change","Spend Change"]] waterfall_delta_df = waterfall_delta_df.rename(columns = {"channels":"METRIC"}) waterfall_delta_df.index = waterfall_delta_df["METRIC"] waterfall_delta_df = waterfall_delta_df.round(2) return (waterfall_delta_df[["Contribution Change","Support Change","Spend Change"]].transpose()) def channel_contribution(start_date,end_date): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] channel_df = pd.DataFrame(cur_data[contribution_cols].sum()).reset_index() channel_df.columns = ["channels","contributions"] channel_df["channels"] = channels # Creating the bar chart fig = go.Figure(data=[go.Bar( x=channel_df['channels'], y=round(channel_df['contributions']), marker=dict(color='rgb(74, 136, 217)'), # Blue color for all bars text=(channel_df['contributions']).astype(int).apply(lambda x: f"{x:,}"), textposition='outside' )]) # Updating layout for better visualization fig.update_layout( title=f"Media Contribution", # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, gridcolor='gray', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="Prospect", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ) ) fig.add_annotation( text=f"{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}", x=0, y=1.15, xref="x domain", yref="y domain", showarrow=False, font=dict(size=16), # align='left' ) return fig def chanel_spends(start_date,end_date): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] channel_df = pd.DataFrame(cur_data[spend_cols].sum()).reset_index() channel_df.columns = ["channels","spends"] channel_df["channels"] = channels # Creating the bar chart fig = go.Figure(data=[go.Bar( x=channel_df['channels'], y=round(channel_df['spends']), marker=dict(color='rgb(74, 136, 217)'), # Blue color for all bars text=channel_df['spends'].apply(numerize), # text = (channel_df['spends']).astype(int).apply(lambda x: f"{x:,}"), textposition='outside' )]) # Updating layout for better visualization fig.update_layout( title=f"Media Spends", # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, gridcolor='gray', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="Spends ($)", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ) ) fig.add_annotation( text=f"{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}", x=0, y=1.15, xref="x domain", yref="y domain", showarrow=False, font=dict(size=16), # align='left' ) return fig def shares_table_func(shares_df): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) ### Shares tables shares_table_df = shares_df[["channels","cur_spend_share","cur_support_share","cur_contributions_share","Efficiency Index","Effectiveness Index"]] shares_table_df = shares_table_df.rename(columns = {"channels":"METRIC", "cur_spend_share":"Spend Share", "cur_support_share":"Support Share", "cur_contributions_share":"Contribution Share"}) shares_table_df.index = shares_table_df["METRIC"] for c in ["Spend Share","Support Share","Contribution Share"]: shares_table_df[c] = shares_table_df[c].astype(int) shares_table_df[c] = shares_table_df[c].astype(str)+'%' for c in ["Efficiency Index","Effectiveness Index"]: shares_table_df[c] = shares_table_df[c].round(2).astype(str) shares_table_df = shares_table_df[["Spend Share","Support Share","Contribution Share","Efficiency Index","Effectiveness Index"]].transpose() return (shares_table_df) def eff_table_func(shares_df): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) media_df = shares_df[['channels', 'cur_total_spend',"cur_total_support", "cur_total_contributions" ,'cur_spend_share', 'cur_support_share', 'cur_contributions_share', 'Efficiency Index', 'Effectiveness Index']] media_df = media_df.rename(columns = {"channels":"MEDIA", "cur_total_spend":"TOTAL SPEND", "cur_total_support":"TOTAL SUPPORT", "cur_total_contributions":"TOTAL CONTRIBUTION", "cur_spend_share":"SPEND SHARE", "cur_support_share":"SUPPORT SHARE", "cur_contributions_share":"CONTRIBUTION SHARE", 'Efficiency Index':'EFFICIENCY INDEX', 'Effectiveness Index' :'EFFECTIVENESS INDEX' }) media_df.index = media_df["MEDIA"] media_df.drop(columns = ["MEDIA"],inplace = True) for c in ["TOTAL SPEND","TOTAL SUPPORT","TOTAL CONTRIBUTION"]: media_df[c] = media_df[c].astype(int) for c in ["SPEND SHARE","SUPPORT SHARE","CONTRIBUTION SHARE"]: media_df[c] = media_df[c].astype(int) media_df[c] = media_df[c].astype(str)+'%' for c in ['EFFICIENCY INDEX','EFFECTIVENESS INDEX']: media_df[c] = media_df[c].round(2).astype(str) return (media_df) def cpp(start_date,end_date): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] fig = go.Figure() colors = [ 'rgba(74, 136, 217, 0.8)', # Blue 'rgba(220, 85, 55, 0.8)', # Red 'rgba(67, 150, 80, 0.8)', # Green 'rgba(237, 151, 35, 0.8)', # Orange 'rgba(145, 68, 255, 0.8)', # Purple 'rgba(128, 128, 128, 0.8)', # Gray 'rgba(255, 165, 0, 0.8)', # Amber 'rgba(255, 192, 203, 0.8)', # Pink 'rgba(0, 191, 255, 0.8)', # Deep Sky Blue 'rgba(127, 255, 0, 0.8)', # Chartreuse 'rgba(255, 69, 0, 0.8)', # Red-Orange 'rgba(75, 0, 130, 0.8)', # Indigo 'rgba(240, 230, 140, 0.8)', # Khaki 'rgba(218, 112, 214, 0.8)' ] for i in range(0,13): cpp_df = cur_data[['Date',spend_cols[i],contribution_cols[i]]] cpp_df[channels[i]+"_cpp"] = cpp_df[spend_cols[i]]/cpp_df[contribution_cols[i]] # Add each line trace fig.add_trace(go.Scatter(x=cpp_df['Date'], y=cpp_df[channels[i]+"_cpp"], mode='lines', name=channels[i])) # Update layout for better visualization fig.update_layout( title=f"CPP Distribution" , # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, gridcolor='lightgray', griddash='dot', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="CPP", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ), hovermode='x' # Show hover info for all lines at a single point ) fig.add_annotation( text=f"{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}", x=0, y=1.15, xref="x domain", yref="y domain", showarrow=False, font=dict(size=16), # align='left' ) return fig def base_decomp(): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) base_decomp_df = df[['Date','Unemployment', 'Competition','Trend','Seasonality','Base_0']] fig = go.Figure() # Add each line trace fig.add_trace(go.Scatter(x=base_decomp_df['Date'], y=base_decomp_df['Base_0'], mode='lines', name='Trend and Seasonality')) fig.add_trace(go.Scatter(x=base_decomp_df['Date'], y=base_decomp_df['Unemployment'], mode='lines', name='Unemployment')) fig.add_trace(go.Scatter(x=base_decomp_df['Date'], y=base_decomp_df['Competition'], mode='lines', name='Competition')) # Update layout for better visualization fig.update_layout( title=f"Base Decomposition" #
{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}" , # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, gridcolor='gray', # Setting x-axis gridline color to gray zeroline=True, # Hiding the x-axis zero line ), yaxis=dict( title="Prospect", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ), hovermode='x' # Show hover info for all lines at a single point ) return fig def media_decomp(): # if pd.isnull(start_date) == True : # start_date = datetime(2024, 1, 28) # if pd.isnull(end_date) == True : # end_date = datetime(2024, 2, 24) df['base'] = df[ 'Base_0']+df['Unemployment']+df['Competition'] cols = ['Date', 'base', 'Broadcast TV_Prospects', 'Cable TV_Prospects', 'Connected & OTT TV_Prospects', 'Video_Prospects', 'Display Prospecting_Prospects', 'Display Retargeting_Prospects', 'Social Prospecting_Prospects', 'Social Retargeting_Prospects', 'Search Brand_Prospects', 'Search Non-brand_Prospects', 'Digital Partners_Prospects', 'Audio_Prospects', 'Email_Prospects', ] media_decomp_df = df[cols] # Calculating the cumulative sum for stacking cumulative_df = media_decomp_df.copy() # for channel in media_decomp_df.columns[1:]: # cumulative_df[channel] = cumulative_df[channel] + cumulative_df[channel].shift(1, fill_value=0) media_cols = media_decomp_df.columns for i in range(2,len(media_cols)): # # print(media_cols[i]) cumulative_df[media_cols[i]] = cumulative_df[media_cols[i]] + cumulative_df[media_cols[i-1]] # cumulative_df # Creating the stacked area chart fig = go.Figure() colors =colors = [ 'rgba(74, 136, 217, 0.8)', # Blue 'rgba(220, 85, 55, 0.8)', # Red 'rgba(67, 150, 80, 0.8)', # Green 'rgba(237, 151, 35, 0.8)', # Orange 'rgba(145, 68, 255, 0.8)', # Purple 'rgba(128, 128, 128, 0.8)', # Gray 'rgba(255, 165, 0, 0.8)', # Amber 'rgba(255, 192, 203, 0.8)', # Pink 'rgba(0, 191, 255, 0.8)', # Deep Sky Blue 'rgba(127, 255, 0, 0.8)', # Chartreuse 'rgba(255, 69, 0, 0.8)', # Red-Orange 'rgba(75, 0, 130, 0.8)', # Indigo 'rgba(240, 230, 140, 0.8)', # Khaki 'rgba(218, 112, 214, 0.8)' ] for idx, channel in enumerate(media_decomp_df.columns[1:]): fig.add_trace(go.Scatter( x=media_decomp_df['Date'], y=cumulative_df[channel], fill='tonexty' if idx > 0 else 'tozeroy', # Fill to the previous curve mode='none', name=str.split(channel,'_')[0], text=media_decomp_df[channel], # Adding text for each point hoverinfo='x+y+text', fillcolor=colors[idx] # Different color for each channel )) # Updating layout for better visualization fig.update_layout( title=f"Media Decomposition",#
{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}", # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, gridcolor='gray', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="Prospect", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ) ) return fig def mmm_model_quality(): base_df = df[['Date',"Y_hat","Y"]] fig = go.Figure() # Add each line trace fig.add_trace(go.Scatter(x=base_df['Date'], y=base_df['Y_hat'], mode='lines', name='Predicted')) fig.add_trace(go.Scatter(x=base_df['Date'], y=base_df['Y'], mode='lines', name='Actual (Prospect)')) # Update layout for better visualization fig.update_layout( title=f"Model Predicted v/s Actual Prospects" , # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='white'), # Changing font color to white for better contrast xaxis=dict( showgrid=False, gridcolor='gray', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( title="Prospects", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ), hovermode='x' # Show hover info for all lines at a single point ) return(fig) def media_data(): # Path to your JSON file json_file_path = "all_solutions_2024-05-09.json" # Read the JSON file with open(json_file_path, 'r') as file: json_data = json.load(file) # Initialize a list to store the extracted data extracted_data = [] # Extract half_life and coeff from media_params for params_type in ["control_params","other_params","media_params"]: for media, params in json_data['solution_0']['solution'][params_type].items(): try: extracted_data.append({ 'category': media,# str.split(params_type,'_')[0], 'half_life': params['half_life'], 'coeff': params['coeff'] }) except: extracted_data.append({ 'category':media,# str.split(params_type,'_')[0], 'half_life': None, 'coeff': params['coeff'] }) media_df = pd.DataFrame(extracted_data) return media_df def elasticity(media_df): fig = go.Figure() # media_df = media_df[["category","coeff"]] fig.add_trace(go.Bar( x=media_df['coeff'], y=media_df['category'], orientation='h', # Setting the orientation to horizontal marker_color='rgba(75, 136, 257, 1)', text= media_df['coeff'].round(2), textposition="outside" )) # Updating layout for better visualization fig.update_layout( title="Media And Baseline Elasticity", xaxis=dict( title="Elasticity (coefficient)", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( showgrid=False, gridcolor='gray', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ), # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='lightgray') # Changing font color to white for better contrast ) return fig def half_life(media_df): fig = go.Figure() # media_df = media_df[["category","coeff"]] fig.add_trace(go.Bar( x=media_df[media_df['half_life'].isnull()==False]['half_life'], y=media_df[media_df['half_life'].isnull()==False]['category'], orientation='h', # Setting the orientation to horizontal marker_color='rgba(75, 136, 257, 1)', text= media_df['coeff'].round(2), textposition="outside" )) # Updating layout for better visualization fig.update_layout( title="Media Half-life", xaxis=dict( title="Weeks", showgrid=True, gridcolor='lightgray', griddash='dot', # Setting x-axis gridline color to gray zeroline=False, # Hiding the x-axis zero line ), yaxis=dict( showgrid=False, gridcolor='gray', # Setting y-axis gridline color to gray zeroline=False, # Hiding the y-axis zero line ), # plot_bgcolor='black', # paper_bgcolor='black', # font=dict(color='lightgray') # Changing font color to white for better contrast ) return fig # media metrics table n = 104 k = 18 def calculate_aic(y, y_hat): n = len(y) sse = np.sum((y - y_hat) ** 2) aic = n * np.log(sse / n) + 2 * k return aic def calculate_bic(y, y_hat): n = len(y) sse = np.sum((y - y_hat) ** 2) bic = n * np.log(sse / n) + k * np.log(n) return bic def calculate_r_squared(y, y_hat): ss_total = np.sum((y - np.mean(y)) ** 2) ss_residual = np.sum((y - y_hat) ** 2) r_squared = 1 - (ss_residual / ss_total) return r_squared # Function to calculate Adjusted R-squared def calculate_adjusted_r_squared(y, y_hat): n = len(y) r_squared = calculate_r_squared(y, y_hat) adjusted_r_squared = 1 - ((1 - r_squared) * (n - 1) / (n - k - 1)) return adjusted_r_squared # Function to calculate MAPE def calculate_mape(y, y_hat): mape = np.mean(np.abs((y - y_hat) / y)) * 100 return mape def model_metrics_table_func(): model_metrics_df = pd.DataFrame([calculate_r_squared(df["Y"], df["Y_hat"]), calculate_adjusted_r_squared(df["Y"], df["Y_hat"]), calculate_mape(df["Y"], df["Y_hat"]), calculate_aic(df["Y"], df["Y_hat"]), calculate_bic(df["Y"], df["Y_hat"])]) model_metrics_df.index = ["R-squared","Adjusted R-squared","MAPE","AIC","BIC"] model_metrics_df = model_metrics_df.transpose() # model_metrics_df.index = model_metrics_df["R-squared"] # model_metrics_df = model_metrics_df.drop(columns=["R-squared"]) model_metrics_df2 = pd.DataFrame(model_metrics_df.values,columns=["R-squared","Adjusted R-squared","MAPE","AIC","BIC"] ) model_metrics_df2 = model_metrics_df2.round(2) model_metrics_df2["AIC"] = model_metrics_df2["AIC"].round(0) model_metrics_df2["BIC"] = model_metrics_df2["BIC"].round(0) model_metrics_df2.index = [" "] # model_metrics_df2 = model_metrics_df2.reset_index(drop = True) return model_metrics_df2 def scenario_spend_forecasting(delta_df,start_date,end_date): key_df = pd.DataFrame() key_df["Channel_name"] = ["Email", "DisplayRetargeting", "\xa0Video", "BroadcastTV", "SocialRetargeting", "Connected&OTTTV", "SearchBrand", "Audio", "SocialProspecting", "CableTV", "DisplayProspecting", "SearchNon-brand", "DigitalPartners"] key_df["Channels"] = [ "EMAIL", "DISPLAY RETARGETING", "VIDEO", "BROADCAST TV", "SOCIAL RETARGETING", "CONNECTED & OTT TV", "SEARCH BRAND", "AUDIO", "SOCIAL PROSPECTING", "CABLE TV", "DISPLAY PROSPECTING", "SEARCH NON-BRAND", "DIGITAL PARTNERS" ] start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)] cur_data = cur_data[spend_cols] cur_data.columns = channels data1 = pd.DataFrame(cur_data[channels].sum().transpose()).reset_index() data1.columns = ["Channels","last_year_spends"] df_modified = delta_df.merge(key_df,on = "Channel_name",how = "inner") df_modified2 = df_modified.merge(data1,on = "Channels",how ="outer") # df_modified2["Forecasted Spends"] =( df_modified2["last_year_spends"]*(1+df_modified2["Delta_percent"]/100)).astype(int) df_modified2["Forecasted Spends"] =( df_modified2["last_year_spends"]*(1+df_modified2["Delta_percent"]/100)).astype(int) df_modified2.index = df_modified2["Channels"] df_modified2["Spend Change"] = df_modified2["Delta_percent"].astype(int) # df_modified2["Forecasted Spends"] = df_modified2["Forecasted Spends"].astype(int) df_modified2["Last Year Spends"] = df_modified2["last_year_spends"].astype(int) df_modified3 = df_modified2[["Last Year Spends","Forecasted Spends","Spend Change"]].transpose() # df_modified2["forecasted_spends"] = # # df_modified = delta_percent # # df_modified["Optimised Spends"] = df_modified["Current Spends"]* return df_modified3 def scenario_spend_forecasting2(delta_df,start_date,end_date): key_df = pd.DataFrame() key_df["Channel_name"] = ["Email", "DisplayRetargeting", "\xa0Video", "BroadcastTV", "SocialRetargeting", "Connected&OTTTV", "SearchBrand", "Audio", "SocialProspecting", "CableTV", "DisplayProspecting", "SearchNon-brand", "DigitalPartners"] key_df["Channels"] = [ "EMAIL", "DISPLAY RETARGETING", "VIDEO", "BROADCAST TV", "SOCIAL RETARGETING", "CONNECTED & OTT TV", "SEARCH BRAND", "AUDIO", "SOCIAL PROSPECTING", "CABLE TV", "DISPLAY PROSPECTING", "SEARCH NON-BRAND", "DIGITAL PARTNERS" ] import math start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) cur_data = df[(df['Date'] >= start_date) & (df['Date'] < end_date)] cur_data = cur_data[spend_cols2] cur_data.columns = channels2 cur_data["Date2"] = cur_data["Date"]+ pd.Timedelta(days=6) cur_data["Month"] = cur_data["Date"].dt.month # cur_data["Date"] = delta_df["Date"] # cur_data["Date_diff"] = (cur_data["Date"]-start_date).dt.days # cur_data["Date_diff_months"] =(np.ceil(cur_data["Date_diff"] / 30)) data2 = cur_data.groupby("Month").agg({ 'Date':"min", "Date2":"max", 'BROADCAST TV':"sum", 'CABLE TV':"sum", 'CONNECTED & OTT TV':"sum", 'VIDEO':"sum", 'DISPLAY PROSPECTING':"sum", 'DISPLAY RETARGETING':"sum", 'SOCIAL PROSPECTING':"sum", 'SOCIAL RETARGETING':"sum", 'SEARCH BRAND':"sum", 'SEARCH NON-BRAND':"sum", 'DIGITAL PARTNERS':"sum", 'AUDIO':"sum", 'EMAIL':"sum" }).reset_index() def get_month_name(month_number): months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"] if 1 <= month_number <= 12: return months[month_number - 1] else: return "Invalid month number" data2["Month year"] = data2["Month"].apply(get_month_name) + ' ' +(data2["Date"].dt.year+1).astype(str) print(data2.columns) data2 = data2[['Month year' ,'Date', 'Date2', 'BROADCAST TV', 'CABLE TV', 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING', 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING', 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO', 'EMAIL']] data2.columns = ['Month ','Base Data Start Date', 'Base Data End Date', 'BROADCAST TV', 'CABLE TV', 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING', 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING', 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO', 'EMAIL'] data2['Base Data Start Date'] = data2['Base Data Start Date'].dt.date data2['Base Data End Date'] = data2['Base Data End Date'].dt.date #.transpose() # st.dataframe(data2) # st.dataframe(data1) # months_list = cur_data["Month"].unique() # data1["Channels"]=data1.index # df_modified = delta_df.merge(key_df,on = "Channel_name",how = "inner") # df_modified2 = df_modified.merge(data1,on = "Channels",how ="outer") # df_modified2.index = df_modified2["Channels"] # data3 = pd.DataFrame(index = data1.index) # for c in months_list: # data3[c] = df_modified2[c]*(1+df_modified2["Delta_percent"]/100) # df1 = df_modified2[months_list].transpose() # df1["Metrics"] = "Last Year Spends" # data3 = data3.transpose() # data3 = data3.astype(int) # # data2.index = data2["Date_diff_months"] # data2.columns = ["start date","end date"] # data3["start date"] = data2["start date"].dt.date # data3["end date"] = data2["end date"].dt.date # data3["Month"] = data3.index # cols = ["Month","start date","end date",'BROADCAST TV', # 'CABLE TV', # 'CONNECTED & OTT TV', # 'VIDEO', # 'DISPLAY PROSPECTING', # 'DISPLAY RETARGETING', # 'SOCIAL PROSPECTING', # 'SOCIAL RETARGETING', # 'SEARCH BRAND', # 'SEARCH NON-BRAND', # 'DIGITAL PARTNERS', # 'AUDIO', # 'EMAIL'] # data3["Metrics"] = "Forecasted Year Spends" # df2 = df_modified2["Delta_percent"].transpose() # df2["Metrics"] = "Percent Change" # df_modified2["last_year_spends"] = # data3 = pd.DataFrame(index = data1.index) # for c in months_list: # for idx in data3.index: # data3[c][idx] = df_modified2[c][idx]*df_modified2["Delta_percent"] # data1 = data1[['Date',"Date2",'BROADCAST TV', # 'CABLE TV', # 'CONNECTED & OTT TV', # 'VIDEO','DISPLAY PROSPECTING', # 'DISPLAY RETARGETING', # 'SOCIAL PROSPECTING', # 'SOCIAL RETARGETING', # 'SEARCH BRAND', # 'SEARCH NON-BRAND', # 'DIGITAL PARTNERS', # 'AUDIO', # 'EMAIL', # ]] # data1[channels] = data1[channels].astype(int) # data1["Date"] = data1["Date"].dt.date # data1["Date2"] = data1["Date2"].dt.date # # pd.DataFrame(cur_data[channels].groupby("Date_diff_months").sum().transpose()).reset_index() # # # data1.columns = ["Channels","last_year_spends"] # # df_modified = delta_df.merge(key_df,on = "Channel_name",how = "inner") # # df_modified2 = df_modified.merge(data1,on = "Channels",how ="outer") # # df_modified2["Forecasted Spends"] =( df_modified2["last_year_spends"]*(1+df_modified2["Delta_percent"]/100)).apply(numerize) # # df_modified2.index = df_modified2["Channels"] # # df_modified2["Spend Change"] = df_modified2["Delta_percent"] # # df_modified2["Last Year Spends"] = df_modified2["last_year_spends"].apply(numerize) # # df_modified3 = df_modified2[["Last Year Spends","Forecasted Spends","Spend Change"]].transpose() # # # df_modified2["forecasted_spends"] = # # # # df_modified = delta_percent # # # # df_modified["Optimised Spends"] = df_modified["Current Spends"]* # # spend_cols1 = pd.DataFrame(spend_cols)[0].to_list() data2.set_index('Month ', inplace=True) return data2