File size: 3,493 Bytes
3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e 5a25ed5 3ab2a6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import streamlit as st
import pandas as pd
import plotly.express as px
# Page config
st.set_page_config(
page_title="Max Output Tokens Analysis",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
.stMultiSelect {
max-width: 800px;
}
.main > div {
padding-left: 1rem;
padding-right: 1rem;
}
</style>
""", unsafe_allow_html=True)
# Load data
@st.cache_data
def load_data():
df = pd.read_csv('data/max-tokens-by-model.csv')
df['launch_date'] = pd.to_datetime(df['launch_date'])
return df
df = load_data()
# Title
st.title("LLM Max Output Tokens Analysis")
# Company selection in a more compact layout
col1, col2 = st.columns([2, 1])
with col1:
companies = sorted(df['company'].unique())
selected_companies = st.multiselect(
"Select companies to display:",
options=companies,
default=companies,
key='company_filter'
)
# Filter data based on selection
filtered_df = df[df['company'].isin(selected_companies)]
# Create the evolution chart
fig = px.line(filtered_df,
x='launch_date',
y='max_output_tokens',
color='company',
hover_data=['model_name', 'max_output_tokens'],
title='Evolution of Max Output Tokens by Company',
labels={
'launch_date': 'Launch Date',
'max_output_tokens': 'Max Output Tokens',
'company': 'Company'
},
markers=True)
# Improved chart layout
fig.update_layout(
hovermode='x unified',
xaxis_title="Launch Date",
yaxis_title="Max Output Tokens",
yaxis_type="log",
height=500,
showlegend=True,
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
margin=dict(l=40, r=40, t=60, b=40),
yaxis=dict(
tickformat=",",
dtick=0.30102999566, # log10(2) for better log scale ticks
),
plot_bgcolor='white',
paper_bgcolor='white',
)
# Improved grid and traces
fig.update_xaxes(
gridcolor='lightgray',
gridwidth=0.5,
showgrid=True
)
fig.update_yaxes(
gridcolor='lightgray',
gridwidth=0.5,
showgrid=True
)
fig.update_traces(
line=dict(width=2),
marker=dict(size=8)
)
# Display the chart
st.plotly_chart(fig, use_container_width=True)
# Display the data table
st.subheader("Max Output Tokens by Model")
# Prepare the data with better formatting
display_df = (
filtered_df[['model_name', 'company', 'max_output_tokens', 'launch_date']]
.sort_values('max_output_tokens', ascending=False)
.assign(
launch_date=lambda x: x['launch_date'].dt.strftime('%Y-%m-%d'),
max_output_tokens=lambda x: x['max_output_tokens'].apply(lambda v: f"{v:,}")
)
.rename(columns={
'model_name': 'Model Name',
'company': 'Company',
'max_output_tokens': 'Max Output Tokens',
'launch_date': 'Launch Date'
})
)
# Display the styled table
st.dataframe(
display_df,
use_container_width=True,
hide_index=True
)
# Attribution with better spacing
st.markdown("<br>", unsafe_allow_html=True)
st.markdown(
"<div style='border-top: 1px solid #ccc; padding-top: 1rem; color: #666;'>"
"By: <a href='https://danielrosehill.com' style='color: #666;'>Daniel Rosehill</a> | "
"Data sourced from public sources on February 8, 2025"
"</div>",
unsafe_allow_html=True
)
|