import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from sklearn.preprocessing import MinMaxScaler
import warnings
import warnings
warnings.filterwarnings("ignore")
import os
import plotly.graph_objects as go
from datetime import datetime,timedelta
from plotly.subplots import make_subplots
import pandas as pd
import json
from numerize.numerize import numerize
# working_directory = r"C:\Users\PragyaJatav\Downloads\Deliverables\Deliverables\Response Curves 09_07_24\Response Curves Resources"
# os.chdir(working_directory)
## reading input data
df= pd.read_csv('response_curves_input_file.csv')
df.dropna(inplace=True)
df['Date'] = pd.to_datetime(df['Date'])
df.reset_index(inplace=True)
# df
spend_cols = ['tv_broadcast_spend',
'tv_cable_spend',
'stream_video_spend',
'olv_spend',
'disp_prospect_spend',
'disp_retarget_spend',
'social_prospect_spend',
'social_retarget_spend',
'search_brand_spend',
'search_nonbrand_spend',
'cm_spend',
'audio_spend',
'email_spend']
spend_cols2 = ['tv_broadcast_spend',
'tv_cable_spend',
'stream_video_spend',
'olv_spend',
'disp_prospect_spend',
'disp_retarget_spend',
'social_prospect_spend',
'social_retarget_spend',
'search_brand_spend',
'search_nonbrand_spend',
'cm_spend',
'audio_spend',
'email_spend', 'Date']
metric_cols = ['tv_broadcast_grp',
'tv_cable_grp',
'stream_video_imp',
'olv_imp',
'disp_prospect_imp',
'disp_retarget_imp',
'social_prospect_imp',
'social_retarget_imp',
'search_brand_imp',
'search_nonbrand_imp',
'cm_spend',
'audio_imp',
'email_imp']
channels = [
'BROADCAST TV',
'CABLE TV',
'CONNECTED & OTT TV',
'VIDEO',
'DISPLAY PROSPECTING',
'DISPLAY RETARGETING',
'SOCIAL PROSPECTING',
'SOCIAL RETARGETING',
'SEARCH BRAND',
'SEARCH NON-BRAND',
'DIGITAL PARTNERS',
'AUDIO',
'EMAIL']
channels2 = [
'BROADCAST TV',
'CABLE TV',
'CONNECTED & OTT TV',
'VIDEO',
'DISPLAY PROSPECTING',
'DISPLAY RETARGETING',
'SOCIAL PROSPECTING',
'SOCIAL RETARGETING',
'SEARCH BRAND',
'SEARCH NON-BRAND',
'DIGITAL PARTNERS',
'AUDIO',
'EMAIL','Date']
contribution_cols = [
'Broadcast TV_Prospects',
'Cable TV_Prospects',
'Connected & OTT TV_Prospects',
'Video_Prospects',
'Display Prospecting_Prospects',
'Display Retargeting_Prospects',
'Social Prospecting_Prospects',
'Social Retargeting_Prospects',
'Search Brand_Prospects',
'Search Non-brand_Prospects',
'Digital Partners_Prospects',
'Audio_Prospects',
'Email_Prospects']
def get_date_range():
return df['Date'].min(),df['Date'].max()+ timedelta(days=7)
def get_default_dates():
return df['Date'].max()- timedelta(days=21),df['Date'].max()+ timedelta(days=6)
def pie_charts(start_date,end_date):
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
import plotly.graph_objects as go
from plotly.subplots import make_subplots
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
data1 = pd.DataFrame(cur_data[spend_cols].sum().transpose())
data2 = pd.DataFrame(cur_data[contribution_cols].sum().transpose())
data1.index = channels
data1.columns = ["p"]
data2.index = channels
data2.columns = ["p"]
colors = ['#ff2b2b', # Pastel Peach
'#0068c9', # Pastel Blue
'#83c9ff', # Pastel Pink
'#ffabab', # Pastel Purple
'#29b09d', # Pastel Green
'#7defa1', # Pastel Yellow
'#ff8700', # Pastel Gray
'#ffd16a', # Pastel Red
'#6d3fc0', # Pastel Rose
'#d5dae5', # Pastel Lavender
'#309bff', # Pastel Mauve
'#e9f5ff', # Pastel Beige
'#BEBADA' # Pastel Lilac
]
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=channels,
values=data1["p"],
name="t2",
hoverinfo='label+percent',
textinfo= 'label+percent',
showlegend= False,textfont=dict(size =10),
title="Distribution of Spends"
, marker=dict(colors=colors)
), 1, 1)
fig.add_trace(go.Pie(labels=channels,
values=data2["p"],
name="t2",
hoverinfo='label+percent',
textinfo= 'label+percent',
showlegend= False,
textfont=dict(size = 10),
title = "Distribution of Prospect Contributions", marker=dict(colors=colors)
), 1, 2)
# fig.update_layout(
# title="Distribution Of Spends And Prospect Contributions"
# )
fig.update_layout(
# title="Distribution Of Spends"
title={
'text': "Distribution Of Spends And Prospects",
'font': {
'size': 24,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
}
)
fig.add_annotation(
text=f"{start_date.strftime('%m-%d-%Y')} to {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=18),
# align='left'
)
return fig
def pie_spend(start_date,end_date):
colors = ['#ff2b2b', # Pastel Peach
'#0068c9', # Pastel Blue
'#83c9ff', # Pastel Pink
'#ffabab', # Pastel Purple
'#29b09d', # Pastel Green
'#7defa1', # Pastel Yellow
'#ff8700', # Pastel Gray
'#ffd16a', # Pastel Red
'#6d3fc0', # Pastel Rose
'#d5dae5', # Pastel Lavender
'#309bff', # Pastel Mauve
'#e9f5ff', # Pastel Beige
'#BEBADA' # Pastel Lilac
]
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
data = pd.DataFrame(cur_data[spend_cols].sum().transpose())
data.index = channels
data.columns = ["p"]
# Create a pie chart with custom options
fig = go.Figure(data=[go.Pie(
labels=channels,
values=data["p"],#ype(str)+'
'+data.index,
hoverinfo='label+percent',
textinfo= 'label+percent',
showlegend= False,
textfont=dict(size = 10)
, marker=dict(colors=colors)
)])
# Customize the layout
fig.update_layout(
# title="Distribution Of Spends"
title={
'text': "Distribution Of Spends",
'font': {
'size': 24,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
}
)
fig.add_annotation(
text=f"{start_date.strftime('%m-%d-%Y')} to {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=18),
# align='left'
)
# Show the figure
return fig
def pie_contributions(start_date,end_date):
colors = ['#ff2b2b', # Pastel Peach
'#0068c9', # Pastel Blue
'#83c9ff', # Pastel Pink
'#ffabab', # Pastel Purple
'#29b09d', # Pastel Green
'#7defa1', # Pastel Yellow
'#ff8700', # Pastel Gray
'#ffd16a', # Pastel Red
'#6d3fc0', # Pastel Rose
'#d5dae5', # Pastel Lavender
'#309bff', # Pastel Mauve
'#e9f5ff', # Pastel Beige
'#BEBADA' # Pastel Lilac
]
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
data = pd.DataFrame(cur_data[contribution_cols].sum().transpose())
data.index = channels
data.columns = ["p"]
# Create a pie chart with custom options
fig = go.Figure(data=[go.Pie(
labels=channels,
values=data["p"],#ype(str)+'
'+data.index,
hoverinfo='label+percent',
textinfo= 'label+percent',
textposition='auto',
showlegend= False,
textfont=dict(size = 10)
, marker=dict(colors=colors)
)])
# fig.add_annotation(showarrow=False)
# Customize the layout
fig.update_layout(
# title="Distribution Of Contributions",
title={
'text': "Distribution of Prospects",
'font': {
'size': 24,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
}
# margin=dict(t=0, b=0, l=0, r=0)
)
fig.add_annotation(
text=f"{start_date.strftime('%m-%d-%Y')} to {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=18),
# align='left'
)
# Show the figure
return fig
def waterfall2(start_date1,end_date1,start_date2,end_date2):
btn_chart = "Month on Month"
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
# start_date = datetime.strptime(start_date, "%Y-%m-%d")
# end_date = datetime.strptime(end_date, "%Y-%m-%d")
# start_date = start_date.datetime.data
# end_date = end_date.datetime.data
start_date1 = pd.to_datetime(start_date1)
end_date1 = pd.to_datetime(end_date1)
start_date2 = pd.to_datetime(start_date2)
end_date2 = pd.to_datetime(end_date2)
# if btn_chart == "Month on Month":
# start_date_prev = start_date +timedelta(weeks=-4)
# end_date_prev = start_date +timedelta(days=-1)
# else:
# start_date_prev = start_date +timedelta(weeks=-52)
# end_date_prev = start_date_prev +timedelta(weeks=4) +timedelta(days=-1)
if start_date1 < df['Date'].min() :
return "a"
cur_data = df[(df['Date'] >= start_date2) & (df['Date'] <= end_date2)]
prev_data = df[(df['Date'] >= start_date1) & (df['Date'] <= end_date1)]
# Example data for the waterfall chart
data = [
{'label': 'Previous Period', 'value': round(prev_data[contribution_cols].values.sum())},
{'label': 'Broadcast TV', 'value': round(cur_data['Broadcast TV_Prospects'].sum()-prev_data['Broadcast TV_Prospects'].sum())},
{'label': 'Cable TV', 'value': round(cur_data['Cable TV_Prospects'].sum()-prev_data['Cable TV_Prospects'].sum())},
{'label': 'Connected & OTT TV', 'value': round(cur_data['Connected & OTT TV_Prospects'].sum()-prev_data['Connected & OTT TV_Prospects'].sum())},
{'label': 'Video', 'value': round(cur_data['Video_Prospects'].sum()-prev_data['Video_Prospects'].sum())},
{'label': 'Display Prospecting', 'value': round(cur_data['Display Prospecting_Prospects'].sum()-prev_data['Display Prospecting_Prospects'].sum())},
{'label': 'Display Retargeting', 'value': round(cur_data['Display Retargeting_Prospects'].sum()-prev_data['Display Retargeting_Prospects'].sum())},
{'label': 'Social Prospecting', 'value': round(cur_data['Social Prospecting_Prospects'].sum()-prev_data['Social Prospecting_Prospects'].sum())},
{'label': 'Social Retargeting', 'value': round(cur_data['Social Retargeting_Prospects'].sum()-prev_data['Social Retargeting_Prospects'].sum())},
{'label': 'Search Brand', 'value': round(cur_data['Search Brand_Prospects'].sum()-prev_data['Search Brand_Prospects'].sum())},
{'label': 'Search Non-brand', 'value': round(cur_data['Search Non-brand_Prospects'].sum()-prev_data['Search Non-brand_Prospects'].sum())},
{'label': 'Digital Partners', 'value': round(cur_data['Digital Partners_Prospects'].sum()-prev_data['Digital Partners_Prospects'].sum())},
{'label': 'Audio', 'value': round(cur_data['Audio_Prospects'].sum()-prev_data['Audio_Prospects'].sum())},
{'label': 'Email', 'value': round(cur_data['Email_Prospects'].sum()-prev_data['Email_Prospects'].sum())},
{'label': 'Current Period', 'value': round(cur_data[contribution_cols].values.sum())}
]
# Calculate cumulative values for the waterfall chart
cumulative = [0]
for i in range(len(data)):
cumulative.append(cumulative[-1] + data[i]['value'])
# Adjusting values to start from zero for both first and last columns
cumulative[-1] = 0 # Set the last cumulative value to zero
# Extracting labels and values
labels = [item['label'] for item in data]
values = [item['value'] for item in data]
# Plotting the waterfall chart using go.Bar
bars = []
for i in range(len(data)):
color = '#4A88D9' if i == 0 or i == len(data) - 1 else '#DC5537' # Blue for first and last, gray for others
hover_text = f"{labels[i]}
Value: {abs(values[i])}"
bars.append(go.Bar(
x=[labels[i]],
y=[cumulative[i+1] - cumulative[i]],
base=[cumulative[i]],
text=[f"{abs(values[i]):,}"],
textposition='auto',
hovertemplate=hover_text,
marker=dict(color=color),
showlegend=False
))
# Creating the figure
fig = go.Figure(data=bars)
# Updating layout for black background and gray gridlines
if btn_chart == "Month on Month":
fig.update_layout(
title=f"Change In MMM Estimated Prospect Contribution"
,showlegend=False,
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospects",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
# range=[18000, max(max(cumulative), max(values)) + 1000] # Setting the y-axis range from 19k to slightly above the maximum value
)
)
fig.add_annotation(
text=f"{start_date2.strftime('%m-%d-%Y')} to {end_date2.strftime('%m-%d-%Y')} vs. {start_date1.strftime('%m-%d-%Y')} To {end_date1.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
# fig.update_xaxes(
# tickmode="array",
# # categoryorder="total ascending",
# tickvals=[f"{abs(values[i])}"],
# ticktext=[f"{abs(values[i])}"],
# ticklabelposition="outside",
# tickfont=dict(color="white"),
# )
else :
fig.update_layout(
showlegend=False,
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospects",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
# range=[10000, max(cumulative)+1000] # Setting the y-axis range from 19k to slightly above the maximum value
)
)
fig.add_annotation(
text=f"{start_date_prev.strftime('%m-%d-%Y')} to {end_date_prev.strftime('%m-%d-%Y')} vs. {start_date.strftime('%m-%d-%Y')} To {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
# # # # print(cur_data)
# # # # print(prev_data)
# fig.show()
return fig
def waterfall(start_date,end_date,btn_chart):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
# start_date = datetime.strptime(start_date, "%Y-%m-%d")
# end_date = datetime.strptime(end_date, "%Y-%m-%d")
# start_date = start_date.datetime.data
# end_date = end_date.datetime.data
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
if btn_chart == "Month on Month":
start_date_prev = start_date +timedelta(weeks=-4)
end_date_prev = start_date +timedelta(days=-1)
else:
start_date_prev = start_date +timedelta(weeks=-52)
end_date_prev = start_date_prev +timedelta(weeks=4) +timedelta(days=-1)
# if start_date_prev < df['Date'].min() :
# return "a"
prev_data = df[(df['Date'] >= start_date_prev) & (df['Date'] <= end_date_prev)]
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
# Example data for the waterfall chart
data = [
{'label': 'Previous Period', 'value': round(prev_data[contribution_cols].values.sum())},
{'label': 'Broadcast TV', 'value': round(cur_data['Broadcast TV_Prospects'].sum()-prev_data['Broadcast TV_Prospects'].sum())},
{'label': 'Cable TV', 'value': round(cur_data['Cable TV_Prospects'].sum()-prev_data['Cable TV_Prospects'].sum())},
{'label': 'Connected & OTT TV', 'value': round(cur_data['Connected & OTT TV_Prospects'].sum()-prev_data['Connected & OTT TV_Prospects'].sum())},
{'label': 'Video', 'value': round(cur_data['Video_Prospects'].sum()-prev_data['Video_Prospects'].sum())},
{'label': 'Display Prospecting', 'value': round(cur_data['Display Prospecting_Prospects'].sum()-prev_data['Display Prospecting_Prospects'].sum())},
{'label': 'Display Retargeting', 'value': round(cur_data['Display Retargeting_Prospects'].sum()-prev_data['Display Retargeting_Prospects'].sum())},
{'label': 'Social Prospecting', 'value': round(cur_data['Social Prospecting_Prospects'].sum()-prev_data['Social Prospecting_Prospects'].sum())},
{'label': 'Social Retargeting', 'value': round(cur_data['Social Retargeting_Prospects'].sum()-prev_data['Social Retargeting_Prospects'].sum())},
{'label': 'Search Brand', 'value': round(cur_data['Search Brand_Prospects'].sum()-prev_data['Search Brand_Prospects'].sum())},
{'label': 'Search Non-brand', 'value': round(cur_data['Search Non-brand_Prospects'].sum()-prev_data['Search Non-brand_Prospects'].sum())},
{'label': 'Digital Partners', 'value': round(cur_data['Digital Partners_Prospects'].sum()-prev_data['Digital Partners_Prospects'].sum())},
{'label': 'Audio', 'value': round(cur_data['Audio_Prospects'].sum()-prev_data['Audio_Prospects'].sum())},
{'label': 'Email', 'value': round(cur_data['Email_Prospects'].sum()-prev_data['Email_Prospects'].sum())},
{'label': 'Current Period', 'value': round(cur_data[contribution_cols].values.sum())}
]
# Calculate cumulative values for the waterfall chart
cumulative = [0]
for i in range(len(data)):
cumulative.append(cumulative[-1] + data[i]['value'])
# Adjusting values to start from zero for both first and last columns
cumulative[-1] = 0 # Set the last cumulative value to zero
# Extracting labels and values
labels = [item['label'] for item in data]
values = [item['value'] for item in data]
# Plotting the waterfall chart using go.Bar
bars = []
for i in range(len(data)):
color = '#4A88D9' if i == 0 or i == len(data) - 1 else '#DC5537' # Blue for first and last, gray for others
hover_text = f"{labels[i]}
Value: {abs(values[i])}"
bars.append(go.Bar(
x=[labels[i]],
y=[cumulative[i+1] - cumulative[i]],
base=[cumulative[i]],
text=[f"{abs(values[i]):,}"],
textposition='auto',
hovertemplate=hover_text,
marker=dict(color=color),
showlegend=False
))
# Creating the figure
fig = go.Figure(data=bars)
# Updating layout for black background and gray gridlines
if btn_chart == "Month on Month":
fig.update_layout(
title=f"Change In MMM Estimated Prospect Contribution"
,showlegend=False,
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospects",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
# range=[18000, max(max(cumulative), max(values)) + 1000] # Setting the y-axis range from 19k to slightly above the maximum value
)
)
fig.add_annotation(
text=f"{start_date_prev.strftime('%m-%d-%Y')} to {end_date_prev.strftime('%m-%d-%Y')} vs. {start_date.strftime('%m-%d-%Y')} To {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
# fig.update_xaxes(
# tickmode="array",
# # categoryorder="total ascending",
# tickvals=[f"{abs(values[i])}"],
# ticktext=[f"{abs(values[i])}"],
# ticklabelposition="outside",
# tickfont=dict(color="white"),
# )
else :
fig.update_layout(
showlegend=False,
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospects",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
# range=[10000, max(cumulative)+1000] # Setting the y-axis range from 19k to slightly above the maximum value
)
)
fig.add_annotation(
text=f"{start_date_prev.strftime('%m-%d-%Y')} to {end_date_prev.strftime('%m-%d-%Y')} vs. {start_date.strftime('%m-%d-%Y')} To {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
# # # # print(cur_data)
# # # # print(prev_data)
# fig.show()
return fig
def shares_df_func(start_date,end_date):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
start_date_prev = start_date +timedelta(weeks=-4)
end_date_prev = start_date +timedelta(days=-1)
prev_data = df[(df['Date'] >= start_date_prev) & (df['Date'] <= end_date_prev)]
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
cur_df1 = pd.DataFrame(cur_data[spend_cols].sum()).reset_index()
cur_df2 = pd.DataFrame(cur_data[metric_cols].sum()).reset_index()
cur_df3 = pd.DataFrame(cur_data[contribution_cols].sum()).reset_index()
cur_df1.columns = ["channels","cur_total_spend"]
cur_df2.columns = ["channels","cur_total_support"]
cur_df3.columns = ["channels","cur_total_contributions"]
cur_df1["channels"] = channels
cur_df2["channels"] = channels
cur_df3["channels"] = channels
cur_df1["cur_spend_share"] = (cur_df1["cur_total_spend"]/cur_df1["cur_total_spend"].sum())*100
cur_df2["cur_support_share"] = (cur_df2["cur_total_support"]/cur_df2["cur_total_support"].sum())*100
cur_df3["cur_contributions_share"] = (cur_df3["cur_total_contributions"]/cur_df3["cur_total_contributions"].sum())*100
prev_df1 = pd.DataFrame(prev_data[spend_cols].sum()).reset_index()
prev_df2 = pd.DataFrame(prev_data[metric_cols].sum()).reset_index()
prev_df3 = pd.DataFrame(prev_data[contribution_cols].sum()).reset_index()
prev_df1.columns = ["channels","prev_total_spend"]
prev_df2.columns = ["channels","prev_total_support"]
prev_df3.columns = ["channels","prev_total_contributions"]
prev_df1["channels"] = channels
prev_df2["channels"] = channels
prev_df3["channels"] = channels
prev_df1["prev_spend_share"] = (prev_df1["prev_total_spend"]/prev_df1["prev_total_spend"].sum())*100
prev_df2["prev_support_share"] = (prev_df2["prev_total_support"]/prev_df2["prev_total_support"].sum())*100
prev_df3["prev_contributions_share"] = (prev_df3["prev_total_contributions"]/prev_df3["prev_total_contributions"].sum())*100
cur_df = cur_df1.merge(cur_df2,on="channels",how = "inner")
cur_df = cur_df.merge(cur_df3,on="channels",how = "inner")
prev_df = prev_df1.merge(prev_df2,on="channels",how = "inner")
prev_df = prev_df.merge(prev_df3,on="channels",how = "inner")
shares_df = cur_df.merge(prev_df,on = "channels",how = "inner")
shares_df["Contribution Change"] = (-shares_df["prev_contributions_share"]+shares_df["cur_contributions_share"])/shares_df["prev_contributions_share"]
shares_df["Support Change"] = (-shares_df["prev_support_share"]+shares_df["cur_support_share"])/shares_df["prev_support_share"]
shares_df["Spend Change"] = (-shares_df["prev_spend_share"]+shares_df["cur_spend_share"])/shares_df["prev_spend_share"]
shares_df["Efficiency Index"] = shares_df["cur_contributions_share"]/shares_df["cur_spend_share"]
shares_df["Effectiveness Index"] = shares_df["cur_support_share"]/shares_df["cur_spend_share"]
return shares_df
def waterfall_table_func(shares_df):
### waterfall delta table
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
waterfall_delta_df = shares_df[["channels","Contribution Change","Support Change","Spend Change"]]
waterfall_delta_df = waterfall_delta_df.rename(columns = {"channels":"METRIC"})
waterfall_delta_df.index = waterfall_delta_df["METRIC"]
waterfall_delta_df = waterfall_delta_df.round(2)
return (waterfall_delta_df[["Contribution Change","Support Change","Spend Change"]].transpose())
def channel_contribution(start_date,end_date):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
channel_df = pd.DataFrame(cur_data[contribution_cols].sum()).reset_index()
channel_df.columns = ["channels","contributions"]
channel_df["channels"] = channels
# Creating the bar chart
fig = go.Figure(data=[go.Bar(
x=channel_df['channels'],
y=round(channel_df['contributions']),
marker=dict(color='rgb(74, 136, 217)'), # Blue color for all bars
text=(channel_df['contributions']).astype(int).apply(lambda x: f"{x:,}"),
textposition='outside'
)])
# Updating layout for better visualization
fig.update_layout(
# title=f"Media Contribution",
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
title=
{
'text': "Media Contribution",
'font': {
'size': 28,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
},
xaxis=dict(
showgrid=False,
gridcolor='gray', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospect",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
)
)
fig.add_annotation(
text=f"{start_date.strftime('%m-%d-%Y')} to {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
return fig
def chanel_spends(start_date,end_date):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
channel_df = pd.DataFrame(cur_data[spend_cols].sum()).reset_index()
channel_df.columns = ["channels","spends"]
channel_df["channels"] = channels
# Creating the bar chart
fig = go.Figure(data=[go.Bar(
x=channel_df['channels'],
y=round(channel_df['spends']),
marker=dict(color='rgb(74, 136, 217)'), # Blue color for all bars
text=channel_df['spends'].apply(numerize),
# text = (channel_df['spends']).astype(int).apply(lambda x: f"{x:,}"),
textposition='outside'
)])
# Updating layout for better visualization
fig.update_layout(
# title=f"Media Spends",
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
title=
{
'text': "Media Spends",
'font': {
'size': 28,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
},
xaxis=dict(
showgrid=False,
gridcolor='gray', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Spends ($)",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
)
)
fig.add_annotation(
text=f"{start_date.strftime('%m-%d-%Y')} to {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
return fig
def shares_table_func(shares_df):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
### Shares tables
shares_table_df = shares_df[["channels","cur_spend_share","cur_support_share","cur_contributions_share","Efficiency Index","Effectiveness Index"]]
shares_table_df = shares_table_df.rename(columns = {"channels":"METRIC",
"cur_spend_share":"Spend Share",
"cur_support_share":"Support Share",
"cur_contributions_share":"Contribution Share"})
shares_table_df.index = shares_table_df["METRIC"]
for c in ["Spend Share","Support Share","Contribution Share"]:
shares_table_df[c] = shares_table_df[c].astype(int)
shares_table_df[c] = shares_table_df[c].astype(str)+'%'
for c in ["Efficiency Index","Effectiveness Index"]:
shares_table_df[c] = shares_table_df[c].round(2).astype(str)
shares_table_df = shares_table_df[["Spend Share","Support Share","Contribution Share","Efficiency Index","Effectiveness Index"]].transpose()
return (shares_table_df)
def eff_table_func(shares_df):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
media_df = shares_df[['channels', 'cur_total_spend',"cur_total_support", "cur_total_contributions" ,'cur_spend_share',
'cur_support_share', 'cur_contributions_share', 'Efficiency Index', 'Effectiveness Index']]
media_df = media_df.rename(columns = {"channels":"MEDIA",
"cur_total_spend":"TOTAL SPEND",
"cur_total_support":"TOTAL SUPPORT",
"cur_total_contributions":"TOTAL CONTRIBUTION",
"cur_spend_share":"SPEND SHARE",
"cur_support_share":"SUPPORT SHARE",
"cur_contributions_share":"CONTRIBUTION SHARE",
'Efficiency Index':'EFFICIENCY INDEX',
'Effectiveness Index' :'EFFECTIVENESS INDEX'
})
media_df.index = media_df["MEDIA"]
media_df.drop(columns = ["MEDIA"],inplace = True)
for c in ["TOTAL SPEND","TOTAL SUPPORT","TOTAL CONTRIBUTION"]:
media_df[c] = media_df[c].astype(int)
for c in ["SPEND SHARE","SUPPORT SHARE","CONTRIBUTION SHARE"]:
media_df[c] = media_df[c].astype(int)
media_df[c] = media_df[c].astype(str)+'%'
for c in ['EFFICIENCY INDEX','EFFECTIVENESS INDEX']:
media_df[c] = media_df[c].round(2).astype(str)
return (media_df)
def cpp(start_date,end_date):
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
fig = go.Figure()
colors = [
'rgba(74, 136, 217, 0.8)', # Blue
'rgba(220, 85, 55, 0.8)', # Red
'rgba(67, 150, 80, 0.8)', # Green
'rgba(237, 151, 35, 0.8)', # Orange
'rgba(145, 68, 255, 0.8)', # Purple
'rgba(128, 128, 128, 0.8)', # Gray
'rgba(255, 165, 0, 0.8)', # Amber
'rgba(255, 192, 203, 0.8)', # Pink
'rgba(0, 191, 255, 0.8)', # Deep Sky Blue
'rgba(127, 255, 0, 0.8)', # Chartreuse
'rgba(255, 69, 0, 0.8)', # Red-Orange
'rgba(75, 0, 130, 0.8)', # Indigo
'rgba(240, 230, 140, 0.8)', # Khaki
'rgba(218, 112, 214, 0.8)'
]
colors = ['#ff2b2b', # Pastel Peach
'#0068c9', # Pastel Blue
'#83c9ff', # Pastel Pink
'#ffabab', # Pastel Purple
'#29b09d', # Pastel Green
'#7defa1', # Pastel Yellow
'#ff8700', # Pastel Gray
'#ffd16a', # Pastel Red
'#6d3fc0', # Pastel Rose
'#d5dae5', # Pastel Lavender
'#309bff', # Pastel Mauve
'#e9f5ff', # Pastel Beige
'#BEBADA' # Pastel Lilac
]
for i in range(0,13):
cpp_df = cur_data[['Date',spend_cols[i],contribution_cols[i]]]
cpp_df[channels[i]+"_cpp"] = cpp_df[spend_cols[i]]/cpp_df[contribution_cols[i]]
# Add each line trace
fig.add_trace(go.Scatter(x=cpp_df['Date'], y=cpp_df[channels[i]+"_cpp"], mode='lines', name=channels[i], line=dict(color=colors[i])))
# Update layout for better visualization
fig.update_layout(
# title=f"CPP Distribution"
# ,
title=
{
'text': "Cost Per Prospect Distribution",
'font': {
'size': 28,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
},
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
gridcolor='lightgray',
griddash='dot', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="CPP",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
),
hovermode='x' # Show hover info for all lines at a single point
)
fig.add_annotation(
text=f"{start_date.strftime('%m-%d-%Y')} to {end_date.strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
return fig
def base_decomp():
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
base_decomp_df = df[['Date','Unemployment', 'Competition','Trend','Seasonality','Base_0']]
fig = go.Figure()
colors = ['#ff2b2b', # Pastel Peach
'#0068c9', # Pastel Blue
'#83c9ff', # Pastel Pink
]
# Add each line trace
fig.add_trace(go.Scatter(x=base_decomp_df['Date'], y=base_decomp_df['Base_0'], mode='lines', name='Trend and Seasonality',line=dict(color=colors[0])))
fig.add_trace(go.Scatter(x=base_decomp_df['Date'], y=base_decomp_df['Unemployment'], mode='lines', name='Unemployment',line=dict(color=colors[1])))
fig.add_trace(go.Scatter(x=base_decomp_df['Date'], y=base_decomp_df['Competition'], mode='lines', name='Competition',line=dict(color=colors[2])))
# Update layout for better visualization
fig.update_layout(
# title=f"Base Decomposition"
#
{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}"
# ,
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
title=
{
'text': "Base Decomposition",
'font': {
'size': 28,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
},
xaxis=dict(
showgrid=False,
gridcolor='gray', # Setting x-axis gridline color to gray
zeroline=True, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospect",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
),
hovermode='x' # Show hover info for all lines at a single point
)
fig.add_annotation(
text=f"{base_decomp_df['Date'].min().strftime('%m-%d-%Y')} to {(base_decomp_df['Date'].max()+timedelta(days=6)).strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
return fig
def media_decomp():
# if pd.isnull(start_date) == True :
# start_date = datetime(2024, 1, 28)
# if pd.isnull(end_date) == True :
# end_date = datetime(2024, 2, 24)
df['base'] = df[ 'Base_0']+df['Unemployment']+df['Competition']
cols = ['Date',
'base',
'Broadcast TV_Prospects',
'Cable TV_Prospects',
'Connected & OTT TV_Prospects',
'Video_Prospects',
'Display Prospecting_Prospects',
'Display Retargeting_Prospects',
'Social Prospecting_Prospects',
'Social Retargeting_Prospects',
'Search Brand_Prospects',
'Search Non-brand_Prospects',
'Digital Partners_Prospects',
'Audio_Prospects',
'Email_Prospects',
]
media_decomp_df = df[cols]
# Calculating the cumulative sum for stacking
cumulative_df = media_decomp_df.copy()
# for channel in media_decomp_df.columns[1:]:
# cumulative_df[channel] = cumulative_df[channel] + cumulative_df[channel].shift(1, fill_value=0)
media_cols = media_decomp_df.columns
for i in range(2,len(media_cols)):
# # # # print(media_cols[i])
cumulative_df[media_cols[i]] = cumulative_df[media_cols[i]] + cumulative_df[media_cols[i-1]]
# cumulative_df
# Creating the stacked area chart
fig = go.Figure()
colors =colors = [
'rgba(74, 136, 217, 0.8)', # Blue
'rgba(220, 85, 55, 0.8)', # Red
'rgba(67, 150, 80, 0.8)', # Green
'rgba(237, 151, 35, 0.8)', # Orange
'rgba(145, 68, 255, 0.8)', # Purple
'rgba(128, 128, 128, 0.8)', # Gray
'rgba(255, 165, 0, 0.8)', # Amber
'rgba(255, 192, 203, 0.8)', # Pink
'rgba(0, 191, 255, 0.8)', # Deep Sky Blue
'rgba(127, 255, 0, 0.8)', # Chartreuse
'rgba(255, 69, 0, 0.8)', # Red-Orange
'rgba(75, 0, 130, 0.8)', # Indigo
'rgba(240, 230, 140, 0.8)', # Khaki
'rgba(218, 112, 214, 0.8)'
]
for idx, channel in enumerate(media_decomp_df.columns[1:]):
fig.add_trace(go.Scatter(
x=media_decomp_df['Date'],
y=cumulative_df[channel],
fill='tonexty' if idx > 0 else 'tozeroy', # Fill to the previous curve
mode='none',
name=str.split(channel,'_')[0],
text=media_decomp_df[channel], # Adding text for each point
hoverinfo='x+y+text',
fillcolor=colors[idx] # Different color for each channel
))
# Updating layout for better visualization
fig.update_layout(
# title=f"Media Decomposition",#
{cur_data['Date'].min().strftime('%m-%d-%Y')} to {cur_data['Date'].max().strftime('%m-%d-%Y')}",
title=
{
'text': "Media Decomposition",
'font': {
'size': 28,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
},
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
gridcolor='gray', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospect",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
)
)
fig.add_annotation(
text=f"{media_decomp_df['Date'].min().strftime('%m-%d-%Y')} to {(media_decomp_df['Date'].max()+timedelta(days=6)).strftime('%m-%d-%Y')}",
x=0,
y=1.15,
xref="x domain",
yref="y domain",
showarrow=False,
font=dict(size=16),
# align='left'
)
return fig
def mmm_model_quality():
base_df = df[['Date',"Y_hat","Y"]]
fig = go.Figure()
# Add each line trace
fig.add_trace(go.Scatter(x=base_df['Date'], y=base_df['Y_hat'], mode='lines', name='Predicted',line=dict(color='#CC5500') ))
fig.add_trace(go.Scatter(x=base_df['Date'], y=base_df['Y'], mode='lines', name='Actual (Prospect)',line=dict(color='#4B88FF')))
# Update layout for better visualization
fig.update_layout(
title={
'text': "Model Predicted v/s Actual Prospects",
'font': {
'size': 24,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
}
# title=f"Model Predicted v/s Actual Prospects"
,
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='white'), # Changing font color to white for better contrast
xaxis=dict(
showgrid=False,
gridcolor='gray', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
title="Prospects",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
),
hovermode='x' # Show hover info for all lines at a single point
)
return(fig)
def media_data():
# Path to your JSON file
json_file_path = "all_solutions_2024-05-09.json"
# Read the JSON file
with open(json_file_path, 'r') as file:
json_data = json.load(file)
# Initialize a list to store the extracted data
extracted_data = []
# Extract half_life and coeff from media_params
for params_type in ["control_params","other_params","media_params"]:
for media, params in json_data['solution_0']['solution'][params_type].items():
try:
extracted_data.append({
'category': media,# str.split(params_type,'_')[0],
'half_life': params['half_life'],
'coeff': params['coeff']
})
except:
extracted_data.append({
'category':media,# str.split(params_type,'_')[0],
'half_life': None,
'coeff': params['coeff']
})
media_df = pd.DataFrame(extracted_data)
return media_df
def elasticity_and_media(media_df):
# Create subplots
fig = make_subplots(rows=1, cols=2, subplot_titles=("Chart 1", "Chart 2"))
fig.add_trace(
go.Bar(
x=media_df['coeff'],
y=media_df['category'],
orientation='h', # Setting the orientation to horizontal
marker_color='rgba(75, 136, 257, 1)',
text= media_df['coeff'].round(2),
textposition="outside"
),row=1, col=1
)
fig.add_trace(
go.Bar(
x=media_df[media_df['half_life'].isnull()==False]['half_life'],
y=media_df[media_df['half_life'].isnull()==False]['category'],
orientation='h', # Setting the orientation to horizontal
marker_color='rgba(75, 136, 257, 1)',
# text= media_df[media_df['half_life'].isnull()==False]['half_life'].round(2),
textposition="outside"
),row=1, col=2
)
fig.update_layout(
margin=dict(l=40, r=40, t=40, b=40), # Adjust the margins
)
return fig
def elasticity(media_df):
fig = go.Figure()
# media_df = media_df[["category","coeff"]]
fig.add_trace(go.Bar(
x=media_df['coeff'],
y=media_df['category'],
orientation='h', # Setting the orientation to horizontal
marker_color='rgba(75, 136, 257, 1)',
text= media_df['coeff'].round(2),
textposition="outside"
))
# Updating layout for better visualization
fig.update_layout(
title={
'text': "Media And Baseline Elasticity",
'font': {
'size': 24,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
}
,
# title="Media And Baseline Elasticity",
xaxis=dict(
title="Elasticity (coefficient)",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
showgrid=False,
gridcolor='gray', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
),
margin=dict(r=10)
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='lightgray') # Changing font color to white for better contrast
)
return fig
def half_life(media_df):
fig = go.Figure()
# media_df = media_df[["category","coeff"]]
fig.add_trace(go.Bar(
x=media_df[media_df['half_life'].isnull()==False]['half_life'],
y=media_df[media_df['half_life'].isnull()==False]['category'],
orientation='h', # Setting the orientation to horizontal
marker_color='rgba(75, 136, 257, 1)',
text= media_df[media_df['half_life'].isnull()==False]['half_life'].round(2),
textposition="outside"
))
# Updating layout for better visualization
fig.update_layout(
title={
'text': "Media Half-life",
'font': {
'size': 24,
'family': 'Arial',
'color': 'black',
# 'bold': True
}
}
,
xaxis=dict(
title="Weeks",
showgrid=True,
gridcolor='lightgray',
griddash='dot', # Setting x-axis gridline color to gray
zeroline=False, # Hiding the x-axis zero line
),
yaxis=dict(
showgrid=False,
gridcolor='gray', # Setting y-axis gridline color to gray
zeroline=False, # Hiding the y-axis zero line
),margin=dict(l=20)
# plot_bgcolor='black',
# paper_bgcolor='black',
# font=dict(color='lightgray') # Changing font color to white for better contrast
)
return fig
# media metrics table
n = 104
k = 18
def calculate_aic(y, y_hat):
n = len(y)
sse = np.sum((y - y_hat) ** 2)
aic = n * np.log(sse / n) + 2 * k
return aic
def calculate_bic(y, y_hat):
n = len(y)
sse = np.sum((y - y_hat) ** 2)
bic = n * np.log(sse / n) + k * np.log(n)
return bic
def calculate_r_squared(y, y_hat):
ss_total = np.sum((y - np.mean(y)) ** 2)
ss_residual = np.sum((y - y_hat) ** 2)
r_squared = 1 - (ss_residual / ss_total)
return r_squared
# Function to calculate Adjusted R-squared
def calculate_adjusted_r_squared(y, y_hat):
n = len(y)
r_squared = calculate_r_squared(y, y_hat)
adjusted_r_squared = 1 - ((1 - r_squared) * (n - 1) / (n - k - 1))
return adjusted_r_squared
# Function to calculate MAPE
def calculate_mape(y, y_hat):
mape = np.mean(np.abs((y - y_hat) / y)) * 100
return mape
def model_metrics_table_func():
model_metrics_df = pd.DataFrame([calculate_r_squared(df["Y"], df["Y_hat"]),
calculate_adjusted_r_squared(df["Y"], df["Y_hat"]),
calculate_mape(df["Y"], df["Y_hat"]),
calculate_aic(df["Y"], df["Y_hat"]),
calculate_bic(df["Y"], df["Y_hat"])])
model_metrics_df.index = ["R-squared","Adjusted R-squared","MAPE","AIC","BIC"]
model_metrics_df = model_metrics_df.transpose()
# model_metrics_df.index = model_metrics_df["R-squared"]
# model_metrics_df = model_metrics_df.drop(columns=["R-squared"])
model_metrics_df2 = pd.DataFrame(model_metrics_df.values,columns=["R-squared","Adjusted R-squared","MAPE","AIC","BIC"] )
# model_metrics_df2 = model_metrics_df2.round(2)
model_metrics_df2["R-squared"] = model_metrics_df2["R-squared"].apply(lambda x: "{:.2%}".format(x))
model_metrics_df2["Adjusted R-squared"] = model_metrics_df2["Adjusted R-squared"].apply(lambda x: "{:.2%}".format(x))
model_metrics_df2["MAPE"] = (model_metrics_df2["MAPE"]/100).apply(lambda x: "{:.2%}".format(x))
model_metrics_df2["AIC"] = model_metrics_df2["AIC"].round(0)
model_metrics_df2["BIC"] = model_metrics_df2["BIC"].round(0)
model_metrics_df2.index = [" "]
# model_metrics_df2 = model_metrics_df2.reset_index(drop = True)
return model_metrics_df2
def get_month_name(month_number):
months = ["January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December"]
if 1 <= month_number <= 12:
return months[month_number - 1]
else:
return "Invalid month number"
def scenario_spend_forecasting(delta_df,start_date,end_date):
key_df = pd.DataFrame()
key_df["Channel_name"] = ["Email",
"DisplayRetargeting",
"\xa0Video",
"BroadcastTV",
"SocialRetargeting",
"Connected&OTTTV",
"SearchBrand",
"Audio",
"SocialProspecting",
"CableTV",
"DisplayProspecting",
"SearchNon-brand",
"DigitalPartners"]
key_df["Channels"] = [
"EMAIL",
"DISPLAY RETARGETING",
"VIDEO",
"BROADCAST TV",
"SOCIAL RETARGETING",
"CONNECTED & OTT TV",
"SEARCH BRAND",
"AUDIO",
"SOCIAL PROSPECTING",
"CABLE TV",
"DISPLAY PROSPECTING",
"SEARCH NON-BRAND",
"DIGITAL PARTNERS"
]
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)
cur_data = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
cur_data["Month"] = cur_data["Date"].dt.month
# cur_data["Year"] = cur_data["Date"].dt.year
cur_data["Month year"] = cur_data["Month"].apply(get_month_name) + ' ' +(cur_data["Date"].dt.year+1).astype(str)
grp_cols = ['tv_broadcast_spend',
'tv_cable_spend',
'stream_video_spend',
'olv_spend',
'disp_prospect_spend',
'disp_retarget_spend',
'social_prospect_spend',
'social_retarget_spend',
'search_brand_spend',
'search_nonbrand_spend',
'cm_spend',
'audio_spend',
'email_spend',
"Month",
"Month year"]
data2 = cur_data[grp_cols].groupby("Month year").sum()
data2.columns = [
'BROADCAST TV',
'CABLE TV',
'CONNECTED & OTT TV',
'VIDEO',
'DISPLAY PROSPECTING',
'DISPLAY RETARGETING',
'SOCIAL PROSPECTING',
'SOCIAL RETARGETING',
'SEARCH BRAND',
'SEARCH NON-BRAND',
'DIGITAL PARTNERS',
'AUDIO',
'EMAIL',
"Month"]
data2 = data2.sort_values("Month")
data2.drop(columns = ["Month"], inplace = True)
key_df = pd.DataFrame()
key_df["Channel_name"] = ["Email","DisplayRetargeting","\xa0Video","BroadcastTV","SocialRetargeting","Connected&OTTTV","SearchBrand","Audio","SocialProspecting","CableTV","DisplayProspecting","SearchNon-brand","DigitalPartners"]
key_df["Channels"] = ["EMAIL","DISPLAY RETARGETING","VIDEO","BROADCAST TV","SOCIAL RETARGETING","CONNECTED & OTT TV","SEARCH BRAND","AUDIO","SOCIAL PROSPECTING","CABLE TV","DISPLAY PROSPECTING","SEARCH NON-BRAND","DIGITAL PARTNERS"]
delta_df = delta_df.merge(key_df,on = "Channel_name",how = "inner")
# # print(delta_df)
data3 = data2.copy()
for channel in delta_df["Channels"]:
# # print(channel)
delta_percent = delta_df[delta_df["Channels"]==channel]["Delta_percent"].iloc[0]
# # print(delta_percent)
data3[channel] = data3[channel]*(1+delta_percent/100)
# # print(data2)
# # print(data3)
###### output dataframes
output_df2 = data3.copy()
#### percent change dataframe
delta_df2 = pd.DataFrame(data = delta_df["Delta_percent"].values,index = delta_df["Channels"])
# # print(delta_df2)
output_df1 = (pd.DataFrame(data2.sum()).transpose()).append(pd.DataFrame(data3.sum()).transpose()).append(delta_df2.transpose())
output_df1.index = ["Last Year Spends", "Forecasted Spends","Spends Change"]
# # print(output_df1)
#
# # print (data3)
# data3 = data2.append(key_df)
# # print (data2)
# cur_data = cur_data[spend_cols]
# cur_data.columns = channels
# data1 = pd.DataFrame(cur_data[channels].sum().transpose()).reset_index()
# data1.columns = ["Channels","last_year_spends"]
# df_modified = delta_df.merge(key_df,on = "Channel_name",how = "inner")
# df_modified2 = df_modified.merge(data1,on = "Channels",how ="outer")
# # df_modified2["Forecasted Spends"] =( df_modified2["last_year_spends"]*(1+df_modified2["Delta_percent"]/100)).astype(int)
# df_modified2["Forecasted Spends"] =( df_modified2["last_year_spends"]*(1+df_modified2["Delta_percent"]/100)).apply(lambda x: "${:,.0f}".format(x))
# df_modified2.index = df_modified2["Channels"]
# df_modified2["Spend Change"] = (df_modified2["Delta_percent"]/100).apply(lambda x: "{:.0%}".format(x))
# # df_modified2["Forecasted Spends"] = df_modified2["Forecasted Spends"].astype(int)
# df_modified2["Last Year Spends"] = df_modified2["last_year_spends"].apply(lambda x: "${:,.0f}".format(x))
# df_modified3 = df_modified2[["Last Year Spends","Forecasted Spends","Spend Change"]].transpose()
# # df_modified2["forecasted_spends"] =
# # # df_modified = delta_percent
# # # df_modified["Optimised Spends"] = df_modified["Current Spends"]*
# df_modified3 = df_modified3[['BROADCAST TV', 'CABLE TV',
# 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING',
# 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING',
# 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO',
# 'EMAIL']]
return output_df1,output_df2
def scenario_spend_forecasting2(delta_df,start_date,end_date):
key_df = pd.DataFrame()
key_df["Channel_name"] = ["Email",
"DisplayRetargeting",
"\xa0Video",
"BroadcastTV",
"SocialRetargeting",
"Connected&OTTTV",
"SearchBrand",
"Audio",
"SocialProspecting",
"CableTV",
"DisplayProspecting",
"SearchNon-brand",
"DigitalPartners"]
key_df["Channels"] = [
"EMAIL",
"DISPLAY RETARGETING",
"VIDEO",
"BROADCAST TV",
"SOCIAL RETARGETING",
"CONNECTED & OTT TV",
"SEARCH BRAND",
"AUDIO",
"SOCIAL PROSPECTING",
"CABLE TV",
"DISPLAY PROSPECTING",
"SEARCH NON-BRAND",
"DIGITAL PARTNERS"
]
# import math
# start_date = pd.to_datetime(start_date)
# end_date = pd.to_datetime(end_date)
# cur_data = df[(df['Date'] >= start_date) & (df['Date'] < end_date)]
# cur_data = cur_data[spend_cols2]
# cur_data.columns = channels2
# cur_data["Date2"] = cur_data["Date"]+ pd.Timedelta(days=6)
# cur_data["Month"] = cur_data["Date"].dt.month
# # cur_data["Date"] = delta_df["Date"]
# # cur_data["Date_diff"] = (cur_data["Date"]-start_date).dt.days
# # cur_data["Date_diff_months"] =(np.ceil(cur_data["Date_diff"] / 30))
# data2 = cur_data.groupby("Month").agg({
# 'BROADCAST TV':"sum",
# 'CABLE TV':"sum",
# 'CONNECTED & OTT TV':"sum",
# 'VIDEO':"sum",
# 'DISPLAY PROSPECTING':"sum",
# 'DISPLAY RETARGETING':"sum",
# 'SOCIAL PROSPECTING':"sum",
# 'SOCIAL RETARGETING':"sum",
# 'SEARCH BRAND':"sum",
# 'SEARCH NON-BRAND':"sum",
# 'DIGITAL PARTNERS':"sum",
# 'AUDIO':"sum",
# 'EMAIL':"sum"
# }).reset_index()
# def get_month_name(month_number):
# months = ["January", "February", "March", "April", "May", "June",
# "July", "August", "September", "October", "November", "December"]
# if 1 <= month_number <= 12:
# return months[month_number - 1]
# else:
# return "Invalid month number"
# data2["Month year"] = data2["Month"].apply(get_month_name) + ' ' +(data2["Date"].dt.year+1).astype(str)
# # # # print(data2.columns)
# data2 = data2[['Month year' ,'BROADCAST TV', 'CABLE TV',
# 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING',
# 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING',
# 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO',
# 'EMAIL']]
# data2.columns = ['Month ','BROADCAST TV', 'CABLE TV',
# 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING',
# 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING',
# 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO',
# 'EMAIL']
# data2.set_index('Month ', inplace=True)
# for c in ['BROADCAST TV', 'CABLE TV',
# 'CONNECTED & OTT TV', 'VIDEO', 'DISPLAY PROSPECTING',
# 'DISPLAY RETARGETING', 'SOCIAL PROSPECTING', 'SOCIAL RETARGETING',
# 'SEARCH BRAND', 'SEARCH NON-BRAND', 'DIGITAL PARTNERS', 'AUDIO',
# 'EMAIL']:
# data2[c] = data2[c].apply(lambda x: "${:,.0f}".format(x))
return key_df