danielrosehill's picture
updated
5a25ed5
raw
history blame
3.49 kB
import streamlit as st
import pandas as pd
import plotly.express as px
# Page config
st.set_page_config(
page_title="Max Output Tokens Analysis",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
.stMultiSelect {
max-width: 800px;
}
.main > div {
padding-left: 1rem;
padding-right: 1rem;
}
</style>
""", unsafe_allow_html=True)
# Load data
@st.cache_data
def load_data():
df = pd.read_csv('data/max-tokens-by-model.csv')
df['launch_date'] = pd.to_datetime(df['launch_date'])
return df
df = load_data()
# Title
st.title("LLM Max Output Tokens Analysis")
# Company selection in a more compact layout
col1, col2 = st.columns([2, 1])
with col1:
companies = sorted(df['company'].unique())
selected_companies = st.multiselect(
"Select companies to display:",
options=companies,
default=companies,
key='company_filter'
)
# Filter data based on selection
filtered_df = df[df['company'].isin(selected_companies)]
# Create the evolution chart
fig = px.line(filtered_df,
x='launch_date',
y='max_output_tokens',
color='company',
hover_data=['model_name', 'max_output_tokens'],
title='Evolution of Max Output Tokens by Company',
labels={
'launch_date': 'Launch Date',
'max_output_tokens': 'Max Output Tokens',
'company': 'Company'
},
markers=True)
# Improved chart layout
fig.update_layout(
hovermode='x unified',
xaxis_title="Launch Date",
yaxis_title="Max Output Tokens",
yaxis_type="log",
height=500,
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
margin=dict(l=40, r=40, t=60, b=40),
yaxis=dict(
tickformat=",",
dtick=0.30102999566, # log10(2) for better log scale ticks
),
plot_bgcolor='white',
paper_bgcolor='white',
)
# Improved grid and traces
fig.update_xaxes(
gridcolor='lightgray',
gridwidth=0.5,
showgrid=True
)
fig.update_yaxes(
gridcolor='lightgray',
gridwidth=0.5,
showgrid=True
)
fig.update_traces(
line=dict(width=2),
marker=dict(size=8)
)
# Display the chart
st.plotly_chart(fig, use_container_width=True)
# Display the data table
st.subheader("Max Output Tokens by Model")
# Prepare the data with better formatting
display_df = (
filtered_df[['model_name', 'company', 'max_output_tokens', 'launch_date']]
.sort_values('max_output_tokens', ascending=False)
.assign(
launch_date=lambda x: x['launch_date'].dt.strftime('%Y-%m-%d'),
max_output_tokens=lambda x: x['max_output_tokens'].apply(lambda v: f"{v:,}")
)
.rename(columns={
'model_name': 'Model Name',
'company': 'Company',
'max_output_tokens': 'Max Output Tokens',
'launch_date': 'Launch Date'
})
)
# Display the styled table
st.dataframe(
display_df,
use_container_width=True,
hide_index=True
)
# Attribution with better spacing
st.markdown("<br>", unsafe_allow_html=True)
st.markdown(
"<div style='border-top: 1px solid #ccc; padding-top: 1rem; color: #666;'>"
"By: <a href='https://danielrosehill.com' style='color: #666;'>Daniel Rosehill</a> | "
"Data sourced from public sources on February 8, 2025"
"</div>",
unsafe_allow_html=True
)