Spaces:

danielrosehill
/

Max-Output-Tokens-Analysis-0225

Sleeping

File size: 3,493 Bytes

import streamlit as st
import pandas as pd
import plotly.express as px

# Page config
st.set_page_config(
    page_title="Max Output Tokens Analysis",
    layout="wide"
)

# Custom CSS
st.markdown("""
    <style>
    .stMultiSelect {
        max-width: 800px;
    }
    .main > div {
        padding-left: 1rem;
        padding-right: 1rem;
    }
    </style>
""", unsafe_allow_html=True)

# Load data
@st.cache_data
def load_data():
    df = pd.read_csv('data/max-tokens-by-model.csv')
    df['launch_date'] = pd.to_datetime(df['launch_date'])
    return df

df = load_data()

# Title
st.title("LLM Max Output Tokens Analysis")

# Company selection in a more compact layout
col1, col2 = st.columns([2, 1])
with col1:
    companies = sorted(df['company'].unique())
    selected_companies = st.multiselect(
        "Select companies to display:",
        options=companies,
        default=companies,
        key='company_filter'
    )

# Filter data based on selection
filtered_df = df[df['company'].isin(selected_companies)]

# Create the evolution chart
fig = px.line(filtered_df, 
              x='launch_date', 
              y='max_output_tokens',
              color='company',
              hover_data=['model_name', 'max_output_tokens'],
              title='Evolution of Max Output Tokens by Company',
              labels={
                  'launch_date': 'Launch Date',
                  'max_output_tokens': 'Max Output Tokens',
                  'company': 'Company'
              },
              markers=True)

# Improved chart layout
fig.update_layout(
    hovermode='x unified',
    xaxis_title="Launch Date",
    yaxis_title="Max Output Tokens",
    yaxis_type="log",
    height=500,
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    margin=dict(l=40, r=40, t=60, b=40),
    yaxis=dict(
        tickformat=",",
        dtick=0.30102999566,  # log10(2) for better log scale ticks
    ),
    plot_bgcolor='white',
    paper_bgcolor='white',
)

# Improved grid and traces
fig.update_xaxes(
    gridcolor='lightgray',
    gridwidth=0.5,
    showgrid=True
)
fig.update_yaxes(
    gridcolor='lightgray',
    gridwidth=0.5,
    showgrid=True
)

fig.update_traces(
    line=dict(width=2),
    marker=dict(size=8)
)

# Display the chart
st.plotly_chart(fig, use_container_width=True)

# Display the data table
st.subheader("Max Output Tokens by Model")

# Prepare the data with better formatting
display_df = (
    filtered_df[['model_name', 'company', 'max_output_tokens', 'launch_date']]
    .sort_values('max_output_tokens', ascending=False)
    .assign(
        launch_date=lambda x: x['launch_date'].dt.strftime('%Y-%m-%d'),
        max_output_tokens=lambda x: x['max_output_tokens'].apply(lambda v: f"{v:,}")
    )
    .rename(columns={
        'model_name': 'Model Name',
        'company': 'Company',
        'max_output_tokens': 'Max Output Tokens',
        'launch_date': 'Launch Date'
    })
)

# Display the styled table
st.dataframe(
    display_df,
    use_container_width=True,
    hide_index=True
)

# Attribution with better spacing
st.markdown("<br>", unsafe_allow_html=True)
st.markdown(
    "<div style='border-top: 1px solid #ccc; padding-top: 1rem; color: #666;'>"
    "By: <a href='https://danielrosehill.com' style='color: #666;'>Daniel Rosehill</a> | "
    "Data sourced from public sources on February 8, 2025"
    "</div>",
    unsafe_allow_html=True
)