import streamlit as st import pandas as pd import plotly.express as px # Page config st.set_page_config( page_title="Max Output Tokens Analysis", layout="wide" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Load data @st.cache_data def load_data(): df = pd.read_csv('data/max-tokens-by-model.csv') df['launch_date'] = pd.to_datetime(df['launch_date']) return df df = load_data() # Title st.title("LLM Max Output Tokens Analysis") # Company selection in a more compact layout col1, col2 = st.columns([2, 1]) with col1: companies = sorted(df['company'].unique()) selected_companies = st.multiselect( "Select companies to display:", options=companies, default=companies, key='company_filter' ) # Filter data based on selection filtered_df = df[df['company'].isin(selected_companies)] # Create the evolution chart fig = px.line(filtered_df, x='launch_date', y='max_output_tokens', color='company', hover_data=['model_name', 'max_output_tokens'], title='Evolution of Max Output Tokens by Company', labels={ 'launch_date': 'Launch Date', 'max_output_tokens': 'Max Output Tokens', 'company': 'Company' }, markers=True) # Improved chart layout fig.update_layout( hovermode='x unified', xaxis_title="Launch Date", yaxis_title="Max Output Tokens", yaxis_type="log", height=500, showlegend=True, legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ), margin=dict(l=40, r=40, t=60, b=40), yaxis=dict( tickformat=",", dtick=0.30102999566, # log10(2) for better log scale ticks ), plot_bgcolor='white', paper_bgcolor='white', ) # Improved grid and traces fig.update_xaxes( gridcolor='lightgray', gridwidth=0.5, showgrid=True ) fig.update_yaxes( gridcolor='lightgray', gridwidth=0.5, showgrid=True ) fig.update_traces( line=dict(width=2), marker=dict(size=8) ) # Display the chart st.plotly_chart(fig, use_container_width=True) # Display the data table st.subheader("Max Output Tokens by Model") # Prepare the data with better formatting display_df = ( filtered_df[['model_name', 'company', 'max_output_tokens', 'launch_date']] .sort_values('max_output_tokens', ascending=False) .assign( launch_date=lambda x: x['launch_date'].dt.strftime('%Y-%m-%d'), max_output_tokens=lambda x: x['max_output_tokens'].apply(lambda v: f"{v:,}") ) .rename(columns={ 'model_name': 'Model Name', 'company': 'Company', 'max_output_tokens': 'Max Output Tokens', 'launch_date': 'Launch Date' }) ) # Display the styled table st.dataframe( display_df, use_container_width=True, hide_index=True ) # Attribution with better spacing st.markdown("
", unsafe_allow_html=True) st.markdown( "

" "By: Daniel Rosehill | " "Data sourced from public sources on February 8, 2025" "

", unsafe_allow_html=True )