Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import numpy as np | |
from datetime import datetime | |
# Page configuration | |
st.set_page_config( | |
page_title="GPT-4o mini Pricing Calculator", | |
page_icon="🤖", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom styling | |
st.markdown(""" | |
<style> | |
.main { | |
background-color: #f9f9f9; | |
padding: 1rem; | |
} | |
.title-container { | |
background-color: #f0f2f6; | |
padding: 1rem; | |
border-radius: 10px; | |
margin-bottom: 1rem; | |
} | |
.metric-container { | |
background-color: white; | |
padding: 1rem; | |
border-radius: 10px; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.1); | |
margin-bottom: 1rem; | |
} | |
.sub-header { | |
font-weight: bold; | |
color: #3366CC; | |
margin-bottom: 0.5rem; | |
} | |
.footer { | |
text-align: center; | |
margin-top: 2rem; | |
font-size: 0.8rem; | |
color: #666; | |
} | |
.stTabs [data-baseweb="tab-list"] { | |
gap: 24px; | |
} | |
.stTabs [data-baseweb="tab"] { | |
height: 50px; | |
white-space: pre-wrap; | |
background-color: #f9f9f9; | |
border-radius: 4px 4px 0px 0px; | |
padding: 10px; | |
} | |
.stTabs [aria-selected="true"] { | |
background-color: #3366CC; | |
color: white; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Title section | |
st.markdown('<div class="title-container">', unsafe_allow_html=True) | |
st.title("GPT-4o mini Pricing Calculator") | |
st.markdown("Interactive cost analysis for text and audio processing using GPT-4o mini") | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Sidebar navigation | |
st.sidebar.title("Navigation") | |
page = st.sidebar.radio("Select Page", ["Dashboard", "Text Analysis", "Audio Analysis", "Comparative Analysis", "Cost Calculator", "Documentation"]) | |
# GPT-4o mini pricing constants | |
TEXT_INPUT_PRICE = 0.60 # $ per 1M tokens | |
TEXT_OUTPUT_PRICE = 2.40 # $ per 1M tokens | |
TEXT_CACHED_PRICE = 0.30 # $ per 1M tokens | |
AUDIO_INPUT_PRICE = 10.00 # $ per 1M tokens | |
AUDIO_OUTPUT_PRICE = 20.00 # $ per 1M tokens | |
AUDIO_CACHED_PRICE = 0.30 # $ per 1M tokens | |
# Helper functions for calculations | |
def calculate_text_costs(users, msgs_per_user, input_tokens, output_tokens, cached_pct=0): | |
# Calculate total token counts | |
total_input_tokens = users * msgs_per_user * input_tokens | |
total_output_tokens = users * msgs_per_user * output_tokens | |
# Apply caching | |
cached_input_tokens = total_input_tokens * (cached_pct / 100) | |
standard_input_tokens = total_input_tokens - cached_input_tokens | |
# Calculate costs | |
input_cost = (standard_input_tokens * TEXT_INPUT_PRICE / 1000000) + (cached_input_tokens * TEXT_CACHED_PRICE / 1000000) | |
output_cost = total_output_tokens * TEXT_OUTPUT_PRICE / 1000000 | |
total_cost = input_cost + output_cost | |
return { | |
'input_tokens': total_input_tokens, | |
'output_tokens': total_output_tokens, | |
'input_cost': input_cost, | |
'output_cost': output_cost, | |
'total_cost': total_cost, | |
'cost_per_message': total_cost / (users * msgs_per_user) if users * msgs_per_user > 0 else 0 | |
} | |
def calculate_audio_costs(users, audio_minutes, tokens_per_sec=600, output_ratio=0.05, cached_pct=0): | |
# Calculate token counts | |
seconds = audio_minutes * 60 | |
total_input_tokens = users * seconds * tokens_per_sec | |
total_output_tokens = total_input_tokens * output_ratio | |
# Apply caching | |
cached_input_tokens = total_input_tokens * (cached_pct / 100) | |
standard_input_tokens = total_input_tokens - cached_input_tokens | |
# Calculate costs | |
input_cost = (standard_input_tokens * AUDIO_INPUT_PRICE / 1000000) + (cached_input_tokens * AUDIO_CACHED_PRICE / 1000000) | |
output_cost = total_output_tokens * AUDIO_OUTPUT_PRICE / 1000000 | |
total_cost = input_cost + output_cost | |
return { | |
'input_tokens': total_input_tokens, | |
'output_tokens': total_output_tokens, | |
'input_cost': input_cost, | |
'output_cost': output_cost, | |
'total_cost': total_cost, | |
'cost_per_minute': total_cost / audio_minutes if audio_minutes > 0 else 0 | |
} | |
# Dashboard page | |
if page == "Dashboard": | |
# Key metrics overview | |
st.header("GPT-4o mini Pricing Overview") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.markdown('<div class="metric-container">', unsafe_allow_html=True) | |
st.markdown('<p class="sub-header">Text Processing</p>', unsafe_allow_html=True) | |
st.metric("Input Cost", f"${TEXT_INPUT_PRICE:.2f}/1M tokens") | |
st.metric("Output Cost", f"${TEXT_OUTPUT_PRICE:.2f}/1M tokens") | |
st.metric("Cached Input", f"${TEXT_CACHED_PRICE:.2f}/1M tokens") | |
st.markdown('</div>', unsafe_allow_html=True) | |
with col2: | |
st.markdown('<div class="metric-container">', unsafe_allow_html=True) | |
st.markdown('<p class="sub-header">Audio Processing</p>', unsafe_allow_html=True) | |
st.metric("Input Cost", f"${AUDIO_INPUT_PRICE:.2f}/1M tokens") | |
st.metric("Output Cost", f"${AUDIO_OUTPUT_PRICE:.2f}/1M tokens") | |
st.metric("Cached Input", f"${AUDIO_CACHED_PRICE:.2f}/1M tokens") | |
st.markdown('</div>', unsafe_allow_html=True) | |
with col3: | |
st.markdown('<div class="metric-container">', unsafe_allow_html=True) | |
st.markdown('<p class="sub-header">Average Costs</p>', unsafe_allow_html=True) | |
# Calculate example costs | |
text_example = calculate_text_costs(1, 100, 15, 20) | |
audio_example = calculate_audio_costs(1, 10) | |
st.metric("Avg Text Cost/Message", f"${text_example['cost_per_message']:.6f}") | |
st.metric("Avg Audio Cost/Minute", f"${audio_example['cost_per_minute']:.4f}") | |
st.metric("Audio/Text Cost Ratio", f"{audio_example['cost_per_minute'] / (text_example['cost_per_message'] * 60):.1f}x") | |
st.markdown('</div>', unsafe_allow_html=True) | |
# Quick comparison chart | |
st.subheader("Cost Comparison: Text vs. Audio") | |
comparison_data = pd.DataFrame({ | |
'Model Type': ['Text', 'Text', 'Audio', 'Audio'], | |
'Cost Component': ['Input', 'Output', 'Input', 'Output'], | |
'Cost per 1M Tokens': [TEXT_INPUT_PRICE, TEXT_OUTPUT_PRICE, AUDIO_INPUT_PRICE, AUDIO_OUTPUT_PRICE] | |
}) | |
fig = px.bar(comparison_data, x='Model Type', y='Cost per 1M Tokens', color='Cost Component', | |
barmode='group', title="Cost Comparison per 1M Tokens", | |
color_discrete_sequence=["#3366CC", "#FF9900"]) | |
fig.update_layout(yaxis_title="Cost ($)") | |
st.plotly_chart(fig, use_container_width=True) | |
# Usage scenarios | |
st.subheader("Common Usage Scenarios") | |
scenarios = pd.DataFrame({ | |
'Scenario': ['Customer Support Chat', 'Document Analysis', 'Meeting Transcription', 'Podcast Analysis', 'Phone Call Analysis'], | |
'Type': ['Text', 'Text', 'Audio', 'Audio', 'Audio'], | |
'Avg Monthly Cost': [10.50, 25.75, 185.00, 740.00, 370.00], | |
'Suitable Plan': ['Basic', 'Basic', 'Premium', 'Enterprise', 'Premium'] | |
}) | |
st.dataframe(scenarios, use_container_width=True) | |
# Text Model Analysis | |
elif page == "Text Analysis": | |
st.header("GPT-4o mini Text Model Analysis") | |
st.info(f""" | |
**Text Model Pricing**: | |
- Input: ${TEXT_INPUT_PRICE:.2f} per 1M tokens | |
- Output: ${TEXT_OUTPUT_PRICE:.2f} per 1M tokens | |
- Cached Input: ${TEXT_CACHED_PRICE:.2f} per 1M tokens | |
""") | |
# Parameters section with input widgets | |
st.subheader("Usage Parameters") | |
col1, col2 = st.columns(2) | |
with col1: | |
users = st.number_input("Number of Users", min_value=100, value=5000, step=100) | |
free_pct = st.slider("% Free Tier Users", min_value=0, max_value=100, value=80) | |
basic_pct = st.slider("% Basic Tier Users (\$12.99)", min_value=0, max_value=100, value=15) | |
pro_pct = st.slider("% Pro Tier Users (\$24.99)", min_value=0, max_value=100, value=5) | |
with col2: | |
msgs_per_user_free = st.number_input("Free Tier Messages/Month", min_value=10, value=100, step=10) | |
msgs_per_user_basic = st.number_input("Basic Tier Messages/Month", min_value=10, value=300, step=10) | |
msgs_per_user_pro = st.number_input("Pro Tier Messages/Month", min_value=10, value=500, step=10) | |
input_tokens = st.slider("Input Tokens per Message", min_value=5, max_value=100, value=15) | |
output_tokens = st.slider("Output Tokens per Message", min_value=5, max_value=100, value=20) | |
cached_pct = st.slider("% Cached Input Tokens", min_value=0, max_value=100, value=0) | |
# Calculate user distribution | |
total_pct = free_pct + basic_pct + pro_pct | |
if total_pct != 100: | |
st.warning(f"Tier percentages sum to {total_pct}%. Please adjust to equal 100%.") | |
free_users = int(users * free_pct / 100) | |
basic_users = int(users * basic_pct / 100) | |
pro_users = int(users * pro_pct / 100) | |
# Token cost calculations | |
free_costs = calculate_text_costs(free_users, msgs_per_user_free, input_tokens, output_tokens, cached_pct) | |
basic_costs = calculate_text_costs(basic_users, msgs_per_user_basic, input_tokens, output_tokens, cached_pct) | |
pro_costs = calculate_text_costs(pro_users, msgs_per_user_pro, input_tokens, output_tokens, cached_pct) | |
# Calculate revenue | |
free_revenue = 0 | |
basic_revenue = basic_users * 12.99 | |
pro_revenue = pro_users * 24.99 | |
total_revenue = free_revenue + basic_revenue + pro_revenue | |
total_cost = free_costs['total_cost'] + basic_costs['total_cost'] + pro_costs['total_cost'] | |
# Display metrics | |
st.subheader("Cost Analysis") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Total Monthly Cost", f"${total_cost:.2f}") | |
st.metric("Total Monthly Revenue", f"${total_revenue:.2f}") | |
with col2: | |
profit = total_revenue - total_cost | |
margin = (profit / total_revenue * 100) if total_revenue > 0 else 0 | |
st.metric("Monthly Profit", f"${profit:.2f}") | |
st.metric("Profit Margin", f"{margin:.1f}%") | |
with col3: | |
avg_cost_per_user = total_cost / users if users > 0 else 0 | |
st.metric("Avg. Cost per User", f"${avg_cost_per_user:.4f}") | |
st.metric("Total Messages/Month", f"{free_users * msgs_per_user_free + basic_users * msgs_per_user_basic + pro_users * msgs_per_user_pro:,}") | |
# Create visualizations | |
st.subheader("Cost Distribution") | |
# Cost breakdown by tier | |
tier_costs = pd.DataFrame({ | |
'Tier': ['Free', 'Basic', 'Pro'], | |
'Cost': [free_costs['total_cost'], basic_costs['total_cost'], pro_costs['total_cost']], | |
'Users': [free_users, basic_users, pro_users] | |
}) | |
col1, col2 = st.columns(2) | |
with col1: | |
fig = px.pie(tier_costs, values='Cost', names='Tier', title="Cost Distribution by Tier", | |
color_discrete_sequence=px.colors.qualitative.Plotly) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
# Create revenue vs cost comparison | |
comparison_data = pd.DataFrame({ | |
'Tier': ['Free', 'Basic', 'Pro'], | |
'Revenue': [free_revenue, basic_revenue, pro_revenue], | |
'Cost': [free_costs['total_cost'], basic_costs['total_cost'], pro_costs['total_cost']] | |
}) | |
fig = px.bar(comparison_data, x='Tier', y=['Revenue', 'Cost'], barmode='group', | |
title="Revenue vs Cost by Tier", | |
color_discrete_sequence=["#3366CC", "#FF9900"]) | |
st.plotly_chart(fig, use_container_width=True) | |
# Token usage breakdown | |
st.subheader("Token Usage Analysis") | |
token_data = pd.DataFrame({ | |
'Tier': ['Free', 'Basic', 'Pro'], | |
'Input Tokens (M)': [free_costs['input_tokens']/1000000, basic_costs['input_tokens']/1000000, pro_costs['input_tokens']/1000000], | |
'Output Tokens (M)': [free_costs['output_tokens']/1000000, basic_costs['output_tokens']/1000000, pro_costs['output_tokens']/1000000] | |
}) | |
fig = px.bar(token_data, x='Tier', y=['Input Tokens (M)', 'Output Tokens (M)'], barmode='group', | |
title="Monthly Token Usage by Tier (Millions)", | |
color_discrete_sequence=["#4CAF50", "#2196F3"]) | |
st.plotly_chart(fig, use_container_width=True) | |
# Break-even analysis | |
st.subheader("Break-even Analysis") | |
# Calculate fixed costs (assumed) | |
fixed_costs = 2000 | |
# Calculate contribution margin per user type | |
cm_basic = 12.99 - (basic_costs['total_cost'] / basic_users if basic_users > 0 else 0) | |
cm_pro = 24.99 - (pro_costs['total_cost'] / pro_users if pro_users > 0 else 0) | |
# Calculate break-even point | |
total_cm = (cm_basic * basic_users) + (cm_pro * pro_users) | |
break_even_users = int(fixed_costs / (total_cm / (basic_users + pro_users))) if basic_users + pro_users > 0 else 0 | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Fixed Monthly Costs", f"${fixed_costs:.2f}") | |
st.metric("Contribution Margin (Basic)", f"${cm_basic:.2f}/user") | |
st.metric("Contribution Margin (Pro)", f"${cm_pro:.2f}/user") | |
with col2: | |
st.metric("Break-even Point", f"{break_even_users:,} paid users") | |
be_conversion = break_even_users / (users * (basic_pct + pro_pct) / 100) if users * (basic_pct + pro_pct) / 100 > 0 else 0 | |
st.metric("Required Conversion Rate", f"{be_conversion:.1%}") | |
# Audio Model Analysis | |
elif page == "Audio Analysis": | |
st.header("GPT-4o mini Audio Model Analysis") | |
st.info(f""" | |
**Audio Model Pricing**: | |
- Input: ${AUDIO_INPUT_PRICE:.2f} per 1M tokens | |
- Output: ${AUDIO_OUTPUT_PRICE:.2f} per 1M tokens | |
- Cached Input: ${AUDIO_CACHED_PRICE:.2f} per 1M tokens | |
""") | |
# Audio model parameters | |
st.subheader("Audio Processing Parameters") | |
col1, col2 = st.columns(2) | |
with col1: | |
audio_minutes = st.number_input("Average Minutes of Audio/Month/User", min_value=1, value=10, step=1) | |
tokens_per_sec = st.number_input("Audio Tokens per Second", min_value=100, value=600, step=10) | |
users = st.number_input("Number of Users", min_value=10, value=1000, step=10) | |
with col2: | |
output_tokens_ratio = st.slider("Output:Input Token Ratio", min_value=0.01, max_value=0.20, value=0.05, step=0.01) | |
cached_ratio = st.slider("% Input Tokens Cached", min_value=0, max_value=100, value=20) | |
pricing_tier = st.selectbox("Pricing Model", ["B2C App (\$12.99/month)", | |
"B2B Service (\$299/month)", | |
"Enterprise (\$2500/month)"]) | |
# Calculate costs | |
audio_costs = calculate_audio_costs(users, audio_minutes, tokens_per_sec, output_tokens_ratio, cached_ratio) | |
# Pricing model revenue | |
if pricing_tier == "B2C App (\$12.99/month)": | |
price_per_user = 12.99 | |
elif pricing_tier == "B2B Service (\$299/month)": | |
price_per_user = 299 | |
else: # Enterprise | |
price_per_user = 2500 | |
revenue = users * price_per_user | |
cost_per_user = audio_costs['total_cost'] / users if users > 0 else 0 | |
profit = revenue - audio_costs['total_cost'] | |
margin = (profit / revenue) * 100 if revenue > 0 else 0 | |
# Display metrics and charts | |
st.subheader("Cost Metrics") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Cost per Minute", f"${(audio_costs['total_cost']/audio_minutes/users):.4f}") | |
st.metric("Total Monthly Cost", f"${audio_costs['total_cost']:.2f}") | |
with col2: | |
st.metric("Monthly Revenue", f"${revenue:.2f}") | |
st.metric("Monthly Profit", f"${profit:.2f}") | |
with col3: | |
st.metric("Profit Margin", f"{margin:.1f}%") | |
st.metric("Cost per User", f"${cost_per_user:.2f}") | |
# Visualization - Cost breakdown | |
st.subheader("Cost Breakdown") | |
# Calculate components | |
standard_input_cost = audio_costs['input_tokens'] * (1 - cached_ratio/100) * AUDIO_INPUT_PRICE / 1000000 | |
cached_input_cost = audio_costs['input_tokens'] * (cached_ratio/100) * AUDIO_CACHED_PRICE / 1000000 | |
output_cost = audio_costs['output_cost'] | |
cost_components = pd.DataFrame({ | |
'Component': ['Standard Input Cost', 'Cached Input Cost', 'Output Cost'], | |
'Cost': [standard_input_cost, cached_input_cost, output_cost] | |
}) | |
col1, col2 = st.columns(2) | |
with col1: | |
fig = px.pie(cost_components, values='Cost', names='Component', title="Audio Processing Cost Distribution", | |
color_discrete_sequence=px.colors.qualitative.Pastel) | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
fig = px.bar(cost_components, x='Component', y='Cost', title="Cost Component Comparison", | |
color_discrete_sequence=["#4CAF50", "#2196F3", "#FF9800"]) | |
st.plotly_chart(fig, use_container_width=True) | |
# Caching impact analysis | |
st.subheader("Impact of Caching on Costs") | |
cache_options = [0, 20, 40, 60, 80, 100] | |
cache_costs = [] | |
for cache_pct in cache_options: | |
cache_result = calculate_audio_costs(users, audio_minutes, tokens_per_sec, output_tokens_ratio, cache_pct) | |
cache_costs.append(cache_result['total_cost']) | |
cache_data = pd.DataFrame({ | |
'Cache Percentage': cache_options, | |
'Total Cost': cache_costs, | |
'Savings': [audio_costs['total_cost'] - cost for cost in cache_costs], | |
'Savings Percentage': [(audio_costs['total_cost'] - cost) / audio_costs['total_cost'] * 100 if audio_costs['total_cost'] > 0 else 0 for cost in cache_costs] | |
}) | |
fig = px.line(cache_data, x='Cache Percentage', y='Total Cost', markers=True, | |
title="Effect of Caching on Total Cost", | |
labels={'Cache Percentage': 'Cached Input Tokens (%)', 'Total Cost': 'Total Cost ($)'}, | |
color_discrete_sequence=["#FF5722"]) | |
st.plotly_chart(fig, use_container_width=True) | |
# Optimization recommendations | |
if margin < 50: | |
st.warning("Warning: Low profit margin detected. Consider optimization strategies below.") | |
with st.expander("📈 Cost Optimization Strategies"): | |
st.markdown(""" | |
1. **Increase Caching**: Boost cached input ratio to reduce costs by up to 97% | |
2. **Hybrid Processing**: Use specialized audio services for initial transcription | |
3. **Input Token Optimization**: Filter silence and implement smart chunking | |
4. **Tiered Processing**: Apply different processing depths based on user needs | |
""") | |
# Calculate hybrid model savings | |
hybrid_cost = (audio_costs['input_tokens'] * 0.006 / 1000000) + (audio_costs['output_tokens'] * TEXT_OUTPUT_PRICE / 1000000) | |
hybrid_savings = audio_costs['total_cost'] - hybrid_cost | |
hybrid_savings_pct = (hybrid_savings / audio_costs['total_cost']) * 100 if audio_costs['total_cost'] > 0 else 0 | |
st.info(f""" | |
**Hybrid Model Potential Savings**: ${hybrid_savings:.2f} ({hybrid_savings_pct:.1f}%) | |
By using specialized transcription services (like Whisper) at \$0.006/min and processing the resulting text with GPT-4o mini text pricing. | |
""") | |
# Comparative Analysis | |
elif page == "Comparative Analysis": | |
st.header("Text vs. Audio Comparative Analysis") | |
# Cost comparison by use case | |
st.subheader("Cost Analysis by Use Case") | |
use_cases = pd.DataFrame({ | |
'Use Case': ['Customer Service', 'Content Creation', 'Data Analysis', 'Meeting Transcription'], | |
'Text Cost ($)': [0.05, 0.12, 0.08, 0.15], | |
'Audio Cost ($)': [1.85, 4.20, 2.10, 11.10], | |
'Cost Ratio': [37, 35, 26, 74], | |
'Recommended Model': ['Text', 'Text', 'Text', 'Hybrid'] | |
}) | |
st.dataframe(use_cases, use_container_width=True) | |
# Cost scaling visualization | |
st.subheader("Cost Scaling with User Count") | |
# Toggle for linear/log scale | |
scale_type = st.radio("Scale Type", ["Linear", "Logarithmic"], horizontal=True) | |
# Generate data for comparison | |
users_range = [100, 500, 1000, 5000, 10000, 50000, 100000] | |
text_costs = [users * 0.0001 * 300 for users in users_range] # 300 msgs avg | |
audio_costs = [users * 0.37 * 10 for users in users_range] # 10 minutes avg | |
scaling_data = pd.DataFrame({ | |
'Users': users_range, | |
'Text Processing Cost': text_costs, | |
'Audio Processing Cost': audio_costs | |
}) | |
# Create the chart | |
fig = px.line(scaling_data, x='Users', y=['Text Processing Cost', 'Audio Processing Cost'], | |
markers=True, title="Cost Scaling by User Count", | |
color_discrete_sequence=["#3366CC", "#FF9900"]) | |
if scale_type == "Logarithmic": | |
fig.update_layout(yaxis_type="log") | |
st.plotly_chart(fig, use_container_width=True) | |
# Break-even analysis | |
st.subheader("Break-even Analysis") | |
col1, col2 = st.columns(2) | |
with col1: | |
monthly_subscription = st.slider("Monthly Subscription ($)", | |
min_value=5.0, max_value=50.0, value=12.99, step=0.99) | |
text_usage = st.slider("Avg. Messages per User", | |
min_value=50, max_value=1000, value=300, step=50) | |
with col2: | |
audio_mins = st.slider("Avg. Audio Minutes per User", | |
min_value=1, max_value=60, value=10, step=1) | |
fixed_costs = st.number_input("Monthly Fixed Costs ($)", | |
min_value=0, value=2000, step=100) | |
# Calculate break-even points | |
text_cost_per_user = calculate_text_costs(1, text_usage, 15, 20)['total_cost'] | |
audio_cost_per_user = calculate_audio_costs(1, audio_mins)['total_cost'] | |
text_contribution = monthly_subscription - text_cost_per_user | |
audio_contribution = monthly_subscription - audio_cost_per_user | |
text_break_even = fixed_costs / text_contribution if text_contribution > 0 else float('inf') | |
audio_break_even = fixed_costs / audio_contribution if audio_contribution > 0 else float('inf') | |
# Display break-even metrics | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Text Break-even Users", f"{int(text_break_even)}") | |
st.metric("Text Margin per User", | |
f"${text_contribution:.2f} ({text_contribution/monthly_subscription*100:.1f}%)") | |
with col2: | |
st.metric("Audio Break-even Users", f"{int(audio_break_even)}") | |
st.metric("Audio Margin per User", | |
f"${audio_contribution:.2f} ({audio_contribution/monthly_subscription*100:.1f}%)") | |
# Create a combined visualization | |
st.subheader("Profit Analysis") | |
user_counts = list(range(0, 10001, 500)) | |
text_profits = [(monthly_subscription - text_cost_per_user) * users - fixed_costs for users in user_counts] | |
audio_profits = [(monthly_subscription - audio_cost_per_user) * users - fixed_costs for users in user_counts] | |
profit_data = pd.DataFrame({ | |
'Users': user_counts, | |
'Text Profit': text_profits, | |
'Audio Profit': audio_profits | |
}) | |
fig = px.line(profit_data, x='Users', y=['Text Profit', 'Audio Profit'], | |
title="Profit by User Count", | |
labels={'value': 'Profit ($)', 'Users': 'Number of Users'}, | |
color_discrete_sequence=["#3366CC", "#FF9900"]) | |
fig.add_hline(y=0, line_dash="dash", line_color="red") | |
st.plotly_chart(fig, use_container_width=True) | |
# Business model recommendations | |
st.subheader("Business Model Recommendations") | |
if audio_cost_per_user > monthly_subscription: | |
st.warning(f""" | |
⚠️ Audio processing costs (${audio_cost_per_user:.2f}/user) exceed subscription price (${monthly_subscription:.2f}). | |
Consider increasing subscription price or implementing usage limits for audio features. | |
""") | |
recommended_model = "Text-Only" if text_contribution > audio_contribution else "Hybrid" | |
st.success(f""" | |
✅ Recommended Business Model: **{recommended_model}** | |
Based on your inputs, a {'text-focused approach' if recommended_model == 'Text-Only' else 'hybrid approach with limited audio processing'} | |
would maximize profitability while maintaining competitive pricing. | |
""") | |
# Cost Calculator | |
elif page == "Cost Calculator": | |
st.header("Interactive Cost Calculator") | |
# Choose model type | |
model_type = st.selectbox("Select Model Type", ["Text", "Audio", "Hybrid"]) | |
if model_type == "Text": | |
st.subheader("GPT-4o mini Text Model Calculator") | |
# Input parameters | |
col1, col2 = st.columns(2) | |
with col1: | |
total_users = st.number_input("Total Users", min_value=1, value=1000, step=100) | |
msgs_per_user = st.number_input("Monthly Messages per User", min_value=1, value=300, step=10) | |
input_tokens = st.number_input("Avg. Input Tokens per Message", min_value=1, value=15, step=1) | |
output_tokens = st.number_input("Avg. Output Tokens per Message", min_value=1, value=20, step=1) | |
with col2: | |
subscription_price = st.number_input("Monthly Subscription Price ($)", | |
min_value=0.0, value=12.99, step=0.99) | |
free_tier_ratio = st.slider("Free:Paid User Ratio", | |
min_value=0.0, max_value=20.0, value=9.0, step=0.1) | |
cached_pct = st.slider("% Cached Input", | |
min_value=0, max_value=100, value=0, step=5) | |
# Calculate values | |
free_users = int(total_users * (free_tier_ratio / (free_tier_ratio + 1))) | |
paid_users = total_users - free_users | |
# Free tier calculations | |
free_msgs_limit = 100 # Free tier message limit | |
free_total_msgs = free_users * min(msgs_per_user, free_msgs_limit) | |
# Token calculations | |
free_costs = calculate_text_costs(free_users, min(msgs_per_user, free_msgs_limit), input_tokens, output_tokens, cached_pct) | |
paid_costs = calculate_text_costs(paid_users, msgs_per_user, input_tokens, output_tokens, cached_pct) | |
total_cost = free_costs['total_cost'] + paid_costs['total_cost'] | |
# Calculate revenue and profit | |
revenue = paid_users * subscription_price | |
profit = revenue - total_cost | |
margin = (profit / revenue) * 100 if revenue > 0 else 0 | |
# Display results | |
st.subheader("Results") | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric("Total Cost", f"${total_cost:.2f}") | |
with col2: | |
st.metric("Revenue", f"${revenue:.2f}") | |
with col3: | |
st.metric("Monthly Profit", f"${profit:.2f}") | |
with col4: | |
st.metric("Profit Margin", f"{margin:.1f}%") | |
# Detailed breakdown | |
with st.expander("See Detailed Cost Breakdown"): | |
st.markdown(f""" | |
### User Distribution | |
- Total Users: {total_users:,} | |
- Free Tier Users: {free_users:,} ({free_users/total_users*100:.1f}%) | |
- Paid Tier Users: {paid_users:,} ({paid_users/total_users*100:.1f}%) | |
### Token Usage | |
- Total Input Tokens: {free_costs['input_tokens'] + paid_costs['input_tokens']:,.0f} | |
- Total Output Tokens: {free_costs['output_tokens'] + paid_costs['output_tokens']:,.0f} | |
### Token Cost Breakdown | |
- Input Cost: ${free_costs['input_cost'] + paid_costs['input_cost']:.2f} | |
- Output Cost: ${free_costs['output_cost'] + paid_costs['output_cost']:.2f} | |
### Per User Economics | |
- Cost per Paid User: ${total_cost/paid_users:.4f} (if all costs allocated to paid users) | |
- Revenue per Paid User: ${subscription_price:.2f} | |
- Profit per Paid User: ${(revenue-total_cost)/paid_users:.2f} | |
""") | |
# Visualization | |
st.subheader("Cost vs Revenue") | |
fig = go.Figure() | |
fig.add_trace(go.Bar( | |
name='Free Tier Cost', | |
x=['Cost'], | |
y=[free_costs['total_cost']], | |
marker_color='#FF9900' | |
)) | |
fig.add_trace(go.Bar( | |
name='Paid Tier Cost', | |
x=['Cost'], | |
y=[paid_costs['total_cost']], | |
marker_color='#FF5733' | |
)) | |
fig.add_trace(go.Bar( | |
name='Revenue', | |
x=['Revenue'], | |
y=[revenue], | |
marker_color='#3366CC' | |
)) | |
fig.update_layout(barmode='stack', title="Cost vs Revenue Breakdown") | |
st.plotly_chart(fig, use_container_width=True) | |
elif model_type == "Audio": | |
st.subheader("GPT-4o mini Audio Model Calculator") | |
# Input parameters | |
col1, col2 = st.columns(2) | |
with col1: | |
users = st.number_input("Number of Users", min_value=1, value=100, step=10) | |
audio_minutes = st.number_input("Minutes of Audio per User per Month", min_value=1, value=10, step=1) | |
tokens_per_second = st.number_input("Audio Tokens per Second", min_value=100, value=600, step=10) | |
cached_pct = st.slider("% Cached Input", min_value=0, max_value=100, value=20, step=5) | |
with col2: | |
output_ratio = st.slider("Output:Input Token Ratio", min_value=0.01, max_value=0.2, value=0.05, step=0.01) | |
subscription = st.number_input("Monthly Subscription ($)", min_value=0.0, value=29.99, step=0.99) | |
silence_reduction = st.slider("Silence Reduction %", min_value=0, max_value=50, value=20, step=5) | |
# Apply silence reduction to effective minutes | |
effective_minutes = audio_minutes * (1 - silence_reduction/100) | |
# Calculate costs | |
costs = calculate_audio_costs(users, effective_minutes, tokens_per_second, output_ratio, cached_pct) | |
# Calculate revenue and profit | |
revenue = users * subscription | |
profit = revenue - costs['total_cost'] | |
margin = (profit / revenue) * 100 if revenue > 0 else 0 | |
# Display results | |
st.subheader("Results") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric("Cost per Audio Minute", f"${costs['cost_per_minute']:.4f}") | |
st.metric("Total Monthly Cost", f"${costs['total_cost']:.2f}") | |
st.metric("Cost per User", f"${costs['total_cost']/users:.2f}") | |
with col2: | |
st.metric("Monthly Revenue", f"${revenue:.2f}") | |
st.metric("Monthly Profit", f"${profit:.2f}") | |
st.metric("Profit Margin", f"{margin:.1f}%") | |