File size: 9,555 Bytes
7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 7205d15 96cb552 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
import polars as pl
import api_scraper
mlb_scrape = api_scraper.MLB_Scrape()
from stuff_model import *
from shiny import App, reactive, ui, render
from shiny.ui import h2, tags
from api_scraper import MLB_Scrape
import datetime
from stuff_model import feature_engineering as fe
from stuff_model import stuff_apply
from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
theme.tabulator_site()
scraper = MLB_Scrape()
df_year_old_group = pl.read_parquet('pitch_data_agg_2024.parquet')
pitcher_old_dict = dict(zip(df_year_old_group['pitcher_id'],df_year_old_group['pitcher_name']))
app_ui = ui.page_fluid(
ui.card(
ui.card_header("2025 Spring Training Pitch Data App"),
ui.row(
ui.column(4,
ui.markdown("""This app generates a table which shows the 2025 Spring Training data.
* Differences are calculated based on 2024 regular season data
* If 2024 data does not exist for pitcher, 2023 Data is used
* If no difference exists, the pitch is labelled as a new pitch"""),
ui.input_action_button(
"refresh",
"Refresh Data",
class_="btn-primary",
width="100%"
)
),
ui.column(3,
ui.div(
"By: ",
ui.tags.a(
"@TJStats",
href="https://x.com/TJStats",
target="_blank"
)
),
ui.tags.p("Data: MLB"),
ui.tags.p(
ui.tags.a(
"Support me on Patreon for more baseball content",
href="https://www.patreon.com/TJ_Stats",
target="_blank"
)
)
)
),
ui.navset_tab(
ui.nav("All Pitches",
output_tabulator("table_all")
),
)
)
)
def server(input, output, session):
@output
@render_tabulator
@reactive.event(input.refresh)
def table_all():
import polars as pl
df_spring = pl.read_parquet(f"hf://datasets/TJStatsApps/mlb_data/data/mlb_pitch_data_2025_spring.parquet")
date = datetime.datetime.now().date()
date_str = date.strftime('%Y-%m-%d')
# Initialize the scraper
game_list_input = (scraper.get_schedule(year_input=[int(date_str[0:4])], sport_id=[1], game_type=['S'])
.filter(pl.col('date') == date)['game_id'])
data = scraper.get_data(game_list_input)
df = scraper.get_data_df(data)
df_spring = pl.concat([df_spring, df]).sort('game_date', descending=True)
# df_year_old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb,df_aaa,df_a,df_afl])))
# df_year_2old = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_mlb_2023])))
df_spring_stuff = stuff_apply.stuff_apply(fe.feature_engineering(pl.concat([df_spring])))
import polars as pl
# Compute total pitches for each pitcher
df_pitcher_totals = df_spring_stuff.group_by("pitcher_id").agg(
pl.col("start_speed").count().alias("pitcher_total")
)
df_spring_group = df_spring_stuff.group_by(['pitcher_id', 'pitcher_name', 'pitch_type']).agg([
pl.col('start_speed').count().alias('count'),
pl.col('start_speed').mean().alias('start_speed'),
pl.col('ivb').mean().alias('ivb'),
pl.col('hb').mean().alias('hb'),
pl.col('release_pos_z').mean().alias('release_pos_z'),
pl.col('release_pos_x').mean().alias('release_pos_x'),
pl.col('extension').mean().alias('extension'),
pl.col('tj_stuff_plus').mean().alias('tj_stuff_plus'),
(pl.col('start_speed').filter(pl.col('batter_hand')=='L').count()).alias('rhh_count'),
(pl.col('start_speed').filter(pl.col('batter_hand')=='R').count()).alias('lhh_count')
])
# Join total pitches per pitcher to the grouped DataFrame on pitcher_id
df_spring_group = df_spring_group.join(df_pitcher_totals, on="pitcher_id", how="left")
# Now calculate the pitch percent for each pitcher/pitch_type combination
df_spring_group = df_spring_group.with_columns(
(pl.col("count") / pl.col("pitcher_total")).alias("pitch_percent")
)
# Optionally, if you want the percentage of left/right-handed batters within the group:
df_spring_group = df_spring_group.with_columns([
(pl.col("rhh_count") / pl.col("pitcher_total")).alias("rhh_percent"),
(pl.col("lhh_count") / pl.col("pitcher_total")).alias("lhh_percent")
])
df_merge = df_spring_group.join(df_year_old_group,on=['pitcher_id','pitcher_name','pitch_type'],how='left',suffix='_old')
df_merge = df_merge.with_columns(
pl.col('pitcher_id').is_in(df_year_old_group['pitcher_id']).alias('exists_in_old')
)
df_merge = df_merge.with_columns(
pl.when(pl.col('start_speed_old').is_null() & pl.col('exists_in_old'))
.then(pl.lit("TRUE"))
.otherwise(pl.lit(None))
.alias("new_pitch")
)
import polars as pl
# Define the columns to subtract
cols_to_subtract = [
("start_speed", "start_speed_old"),
("ivb", "ivb_old"),
("hb", "hb_old"),
("release_pos_z", "release_pos_z_old"),
("release_pos_x", "release_pos_x_old"),
("extension", "extension_old"),
("tj_stuff_plus", "tj_stuff_plus_old")
]
df_merge = df_merge.with_columns([
# Step 1: Create _diff columns with the default value (e.g., 80) if old is null
pl.when(pl.col(old).is_null())
.then(pl.lit(10000)) # If old is null, assign 80 as the default
.otherwise(pl.col(new) - pl.col(old)) # Otherwise subtract old from new
.alias(new + "_diff")
for new, old in cols_to_subtract
])
# Step 2: Format the columns with (value (+diff)) - exclude brackets if diff is 80
df_merge = df_merge.with_columns([
pl.when(pl.col(new + "_diff").eq(10000)) # If diff is 80, no need to include brackets
.then(pl.col(new).round(1).cast(pl.Utf8)+'\n\t') # Just return the new value as string
.otherwise(
pl.col(new).round(1).cast(pl.Utf8) +
"\n(" +
pl.col(new + "_diff").round(1)
.map_elements(lambda x: f"{x:+.1f}") +
")"
).alias(new + "_formatted")
for new, _ in cols_to_subtract
])
percent_cols = ['pitch_percent', 'rhh_percent', 'lhh_percent']
df_merge = df_merge.with_columns([
(pl.col(col) * 100) # Convert to percentage
.round(1) # Round to 1 decimal
.map_elements(lambda x: f"{x:.1f}%") # Format as string with '%'
.alias(col + "_formatted")
for col in percent_cols
]).sort(['pitcher_id','count'],descending=True)
columns = [
{ "title": "Pitcher Name", "field": "pitcher_name", "width": 250, "headerFilter":"input" ,"frozen":True,},
{ "title": "Team", "field": "pitcher_team", "width": 100, "headerFilter":"input" ,"frozen":True,},
{ "title": "Pitch Type", "field": "pitch_type", "width": 125, "headerFilter":"input" ,"frozen":True,},
{ "title": "New Pitch?", "field": "new_pitch", "width": 125, "headerFilter":"input" ,"frozen":False,},
{ "title": "Pitches", "field": "count", "width": 100 , "headerFilter":"input"},
{ "title": "Pitch%", "field": "pitch_percent_formatted", "width": 100, "headerFilter":"input"},
{ "title": "RHH%", "field": "rhh_percent_formatted", "width": 100, "headerFilter":"input"},
{ "title": "LHH%", "field": "lhh_percent_formatted", "width": 100, "headerFilter":"input"},
{ "title": "Velocity", "field": "start_speed_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
{ "title": "iVB", "field": "ivb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
{ "title": "HB", "field": "hb_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
{ "title": "RelH", "field": "release_pos_z_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
{ "title": "RelS", "field": "release_pos_x_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" },
{ "title": "Extension", "field": "extension_formatted", "width": 125, "headerFilter":"input", "formatter":"textarea" },
{ "title": "tjStuff+", "field": "tj_stuff_plus_formatted", "width": 100, "headerFilter":"input", "formatter":"textarea" }
]
df_plot = df_merge.to_pandas()
team_dict = dict(zip(df_spring['pitcher_id'],df_spring['pitcher_team']))
df_plot['pitcher_team'] = df_plot['pitcher_id'].map(team_dict)
return Tabulator(
df_plot,
table_options=TableOptions(
height=750,
columns=columns,
)
)
app = App(app_ui, server)
|